11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/kernel/exit.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds #include <linux/mm.h> 81da177e4SLinus Torvalds #include <linux/slab.h> 91da177e4SLinus Torvalds #include <linux/interrupt.h> 101da177e4SLinus Torvalds #include <linux/module.h> 11c59ede7bSRandy.Dunlap #include <linux/capability.h> 121da177e4SLinus Torvalds #include <linux/completion.h> 131da177e4SLinus Torvalds #include <linux/personality.h> 141da177e4SLinus Torvalds #include <linux/tty.h> 15da9cbc87SJens Axboe #include <linux/iocontext.h> 161da177e4SLinus Torvalds #include <linux/key.h> 171da177e4SLinus Torvalds #include <linux/security.h> 181da177e4SLinus Torvalds #include <linux/cpu.h> 191da177e4SLinus Torvalds #include <linux/acct.h> 208f0ab514SJay Lan #include <linux/tsacct_kern.h> 211da177e4SLinus Torvalds #include <linux/file.h> 229f3acc31SAl Viro #include <linux/fdtable.h> 2380d26af8SMandeep Singh Baines #include <linux/freezer.h> 241da177e4SLinus Torvalds #include <linux/binfmts.h> 25ab516013SSerge E. Hallyn #include <linux/nsproxy.h> 2684d73786SSukadev Bhattiprolu #include <linux/pid_namespace.h> 271da177e4SLinus Torvalds #include <linux/ptrace.h> 281da177e4SLinus Torvalds #include <linux/profile.h> 291da177e4SLinus Torvalds #include <linux/mount.h> 301da177e4SLinus Torvalds #include <linux/proc_fs.h> 3149d769d5SEric W. Biederman #include <linux/kthread.h> 321da177e4SLinus Torvalds #include <linux/mempolicy.h> 33c757249aSShailabh Nagar #include <linux/taskstats_kern.h> 34ca74e92bSShailabh Nagar #include <linux/delayacct.h> 35b4f48b63SPaul Menage #include <linux/cgroup.h> 361da177e4SLinus Torvalds #include <linux/syscalls.h> 377ed20e1aSJesper Juhl #include <linux/signal.h> 386a14c5c9SOleg Nesterov #include <linux/posix-timers.h> 399f46080cSMatt Helsley #include <linux/cn_proc.h> 40de5097c2SIngo Molnar #include <linux/mutex.h> 410771dfefSIngo Molnar #include <linux/futex.h> 42b92ce558SJens Axboe #include <linux/pipe_fs_i.h> 43fa84cb93SAl Viro #include <linux/audit.h> /* for audit_free() */ 4483cc5ed3SAdrian Bunk #include <linux/resource.h> 450d67a46dSDavid Howells #include <linux/blkdev.h> 466eaeeabaSEric Dumazet #include <linux/task_io_accounting_ops.h> 4730199f5aSRoland McGrath #include <linux/tracehook.h> 485ad4e53bSAl Viro #include <linux/fs_struct.h> 49d84f4f99SDavid Howells #include <linux/init_task.h> 50cdd6c482SIngo Molnar #include <linux/perf_event.h> 51ad8d75ffSSteven Rostedt #include <trace/events/sched.h> 5224f1e32cSFrederic Weisbecker #include <linux/hw_breakpoint.h> 533d5992d2SYing Han #include <linux/oom.h> 5454848d73SWu Fengguang #include <linux/writeback.h> 5540401530SAl Viro #include <linux/shm.h> 565c9a8750SDmitry Vyukov #include <linux/kcov.h> 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds #include <asm/uaccess.h> 591da177e4SLinus Torvalds #include <asm/unistd.h> 601da177e4SLinus Torvalds #include <asm/pgtable.h> 611da177e4SLinus Torvalds #include <asm/mmu_context.h> 621da177e4SLinus Torvalds 63d40e48e0SOleg Nesterov static void __unhash_process(struct task_struct *p, bool group_dead) 641da177e4SLinus Torvalds { 651da177e4SLinus Torvalds nr_threads--; 6650d75f8dSOleg Nesterov detach_pid(p, PIDTYPE_PID); 67d40e48e0SOleg Nesterov if (group_dead) { 681da177e4SLinus Torvalds detach_pid(p, PIDTYPE_PGID); 691da177e4SLinus Torvalds detach_pid(p, PIDTYPE_SID); 70c97d9893SOleg Nesterov 715e85d4abSEric W. Biederman list_del_rcu(&p->tasks); 729cd80bbbSOleg Nesterov list_del_init(&p->sibling); 73909ea964SChristoph Lameter __this_cpu_dec(process_counts); 746347e900SEric W. Biederman } 7547e65328SOleg Nesterov list_del_rcu(&p->thread_group); 760c740d0aSOleg Nesterov list_del_rcu(&p->thread_node); 771da177e4SLinus Torvalds } 781da177e4SLinus Torvalds 796a14c5c9SOleg Nesterov /* 806a14c5c9SOleg Nesterov * This function expects the tasklist_lock write-locked. 816a14c5c9SOleg Nesterov */ 826a14c5c9SOleg Nesterov static void __exit_signal(struct task_struct *tsk) 836a14c5c9SOleg Nesterov { 846a14c5c9SOleg Nesterov struct signal_struct *sig = tsk->signal; 85d40e48e0SOleg Nesterov bool group_dead = thread_group_leader(tsk); 866a14c5c9SOleg Nesterov struct sighand_struct *sighand; 874ada856fSOleg Nesterov struct tty_struct *uninitialized_var(tty); 886fac4829SFrederic Weisbecker cputime_t utime, stime; 896a14c5c9SOleg Nesterov 90d11c563dSPaul E. McKenney sighand = rcu_dereference_check(tsk->sighand, 91db1466b3SPaul E. McKenney lockdep_tasklist_lock_is_held()); 926a14c5c9SOleg Nesterov spin_lock(&sighand->siglock); 936a14c5c9SOleg Nesterov 946a14c5c9SOleg Nesterov posix_cpu_timers_exit(tsk); 95d40e48e0SOleg Nesterov if (group_dead) { 966a14c5c9SOleg Nesterov posix_cpu_timers_exit_group(tsk); 974ada856fSOleg Nesterov tty = sig->tty; 984ada856fSOleg Nesterov sig->tty = NULL; 994a599942SOleg Nesterov } else { 1006a14c5c9SOleg Nesterov /* 101e0a70217SOleg Nesterov * This can only happen if the caller is de_thread(). 102e0a70217SOleg Nesterov * FIXME: this is the temporary hack, we should teach 103e0a70217SOleg Nesterov * posix-cpu-timers to handle this case correctly. 104e0a70217SOleg Nesterov */ 105e0a70217SOleg Nesterov if (unlikely(has_group_leader_pid(tsk))) 106e0a70217SOleg Nesterov posix_cpu_timers_exit_group(tsk); 107e0a70217SOleg Nesterov 108e0a70217SOleg Nesterov /* 1096a14c5c9SOleg Nesterov * If there is any task waiting for the group exit 1106a14c5c9SOleg Nesterov * then notify it: 1116a14c5c9SOleg Nesterov */ 112d344193aSOleg Nesterov if (sig->notify_count > 0 && !--sig->notify_count) 1136a14c5c9SOleg Nesterov wake_up_process(sig->group_exit_task); 1146db840faSOleg Nesterov 1156a14c5c9SOleg Nesterov if (tsk == sig->curr_target) 1166a14c5c9SOleg Nesterov sig->curr_target = next_thread(tsk); 11790ed9cbeSRik van Riel } 11890ed9cbeSRik van Riel 1196a14c5c9SOleg Nesterov /* 12026e75b5cSOleg Nesterov * Accumulate here the counters for all threads as they die. We could 12126e75b5cSOleg Nesterov * skip the group leader because it is the last user of signal_struct, 12226e75b5cSOleg Nesterov * but we want to avoid the race with thread_group_cputime() which can 12326e75b5cSOleg Nesterov * see the empty ->thread_head list. 1246a14c5c9SOleg Nesterov */ 1256fac4829SFrederic Weisbecker task_cputime(tsk, &utime, &stime); 126e78c3496SRik van Riel write_seqlock(&sig->stats_lock); 1276fac4829SFrederic Weisbecker sig->utime += utime; 1286fac4829SFrederic Weisbecker sig->stime += stime; 1296fac4829SFrederic Weisbecker sig->gtime += task_gtime(tsk); 1306a14c5c9SOleg Nesterov sig->min_flt += tsk->min_flt; 1316a14c5c9SOleg Nesterov sig->maj_flt += tsk->maj_flt; 1326a14c5c9SOleg Nesterov sig->nvcsw += tsk->nvcsw; 1336a14c5c9SOleg Nesterov sig->nivcsw += tsk->nivcsw; 1346eaeeabaSEric Dumazet sig->inblock += task_io_get_inblock(tsk); 1356eaeeabaSEric Dumazet sig->oublock += task_io_get_oublock(tsk); 1365995477aSAndrea Righi task_io_accounting_add(&sig->ioac, &tsk->ioac); 13732bd671dSPeter Zijlstra sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 138b3ac022cSOleg Nesterov sig->nr_threads--; 139d40e48e0SOleg Nesterov __unhash_process(tsk, group_dead); 140e78c3496SRik van Riel write_sequnlock(&sig->stats_lock); 1415876700cSOleg Nesterov 142da7978b0SOleg Nesterov /* 143da7978b0SOleg Nesterov * Do this under ->siglock, we can race with another thread 144da7978b0SOleg Nesterov * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. 145da7978b0SOleg Nesterov */ 146da7978b0SOleg Nesterov flush_sigqueue(&tsk->pending); 147a7e5328aSOleg Nesterov tsk->sighand = NULL; 1486a14c5c9SOleg Nesterov spin_unlock(&sighand->siglock); 1496a14c5c9SOleg Nesterov 150a7e5328aSOleg Nesterov __cleanup_sighand(sighand); 1516a14c5c9SOleg Nesterov clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 152d40e48e0SOleg Nesterov if (group_dead) { 1536a14c5c9SOleg Nesterov flush_sigqueue(&sig->shared_pending); 1544ada856fSOleg Nesterov tty_kref_put(tty); 1556a14c5c9SOleg Nesterov } 1566a14c5c9SOleg Nesterov } 1576a14c5c9SOleg Nesterov 1588c7904a0SEric W. Biederman static void delayed_put_task_struct(struct rcu_head *rhp) 1598c7904a0SEric W. Biederman { 1600a16b607SMathieu Desnoyers struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 1610a16b607SMathieu Desnoyers 1624e231c79SPeter Zijlstra perf_event_delayed_put(tsk); 1630a16b607SMathieu Desnoyers trace_sched_process_free(tsk); 1640a16b607SMathieu Desnoyers put_task_struct(tsk); 1658c7904a0SEric W. Biederman } 1668c7904a0SEric W. Biederman 167f470021aSRoland McGrath 1681da177e4SLinus Torvalds void release_task(struct task_struct *p) 1691da177e4SLinus Torvalds { 17036c8b586SIngo Molnar struct task_struct *leader; 1711da177e4SLinus Torvalds int zap_leader; 1721da177e4SLinus Torvalds repeat: 173c69e8d9cSDavid Howells /* don't need to get the RCU readlock here - the process is dead and 174d11c563dSPaul E. McKenney * can't be modifying its own credentials. But shut RCU-lockdep up */ 175d11c563dSPaul E. McKenney rcu_read_lock(); 176c69e8d9cSDavid Howells atomic_dec(&__task_cred(p)->user->processes); 177d11c563dSPaul E. McKenney rcu_read_unlock(); 178c69e8d9cSDavid Howells 17960347f67SPavel Emelyanov proc_flush_task(p); 1800203026bSIngo Molnar 1811da177e4SLinus Torvalds write_lock_irq(&tasklist_lock); 182a288eeccSTejun Heo ptrace_release_task(p); 1831da177e4SLinus Torvalds __exit_signal(p); 18435f5cad8SOleg Nesterov 1851da177e4SLinus Torvalds /* 1861da177e4SLinus Torvalds * If we are the last non-leader member of the thread 1871da177e4SLinus Torvalds * group, and the leader is zombie, then notify the 1881da177e4SLinus Torvalds * group leader's parent process. (if it wants notification.) 1891da177e4SLinus Torvalds */ 1901da177e4SLinus Torvalds zap_leader = 0; 1911da177e4SLinus Torvalds leader = p->group_leader; 192a0be55deSIonut Alexa if (leader != p && thread_group_empty(leader) 193a0be55deSIonut Alexa && leader->exit_state == EXIT_ZOMBIE) { 1941da177e4SLinus Torvalds /* 1951da177e4SLinus Torvalds * If we were the last child thread and the leader has 1961da177e4SLinus Torvalds * exited already, and the leader's parent ignores SIGCHLD, 1971da177e4SLinus Torvalds * then we are the one who should release the leader. 1981da177e4SLinus Torvalds */ 19986773473SOleg Nesterov zap_leader = do_notify_parent(leader, leader->exit_signal); 200dae33574SRoland McGrath if (zap_leader) 201dae33574SRoland McGrath leader->exit_state = EXIT_DEAD; 2021da177e4SLinus Torvalds } 2031da177e4SLinus Torvalds 2041da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 2051da177e4SLinus Torvalds release_thread(p); 2068c7904a0SEric W. Biederman call_rcu(&p->rcu, delayed_put_task_struct); 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds p = leader; 2091da177e4SLinus Torvalds if (unlikely(zap_leader)) 2101da177e4SLinus Torvalds goto repeat; 2111da177e4SLinus Torvalds } 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds /* 2141da177e4SLinus Torvalds * Determine if a process group is "orphaned", according to the POSIX 2151da177e4SLinus Torvalds * definition in 2.2.2.52. Orphaned process groups are not to be affected 2161da177e4SLinus Torvalds * by terminal-generated stop signals. Newly orphaned process groups are 2171da177e4SLinus Torvalds * to receive a SIGHUP and a SIGCONT. 2181da177e4SLinus Torvalds * 2191da177e4SLinus Torvalds * "I ask you, have you ever known what it is to be an orphan?" 2201da177e4SLinus Torvalds */ 221a0be55deSIonut Alexa static int will_become_orphaned_pgrp(struct pid *pgrp, 222a0be55deSIonut Alexa struct task_struct *ignored_task) 2231da177e4SLinus Torvalds { 2241da177e4SLinus Torvalds struct task_struct *p; 2251da177e4SLinus Torvalds 2260475ac08SEric W. Biederman do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 22705e83df6SOleg Nesterov if ((p == ignored_task) || 22805e83df6SOleg Nesterov (p->exit_state && thread_group_empty(p)) || 22905e83df6SOleg Nesterov is_global_init(p->real_parent)) 2301da177e4SLinus Torvalds continue; 23105e83df6SOleg Nesterov 2320475ac08SEric W. Biederman if (task_pgrp(p->real_parent) != pgrp && 23305e83df6SOleg Nesterov task_session(p->real_parent) == task_session(p)) 23405e83df6SOleg Nesterov return 0; 2350475ac08SEric W. Biederman } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 23605e83df6SOleg Nesterov 23705e83df6SOleg Nesterov return 1; 2381da177e4SLinus Torvalds } 2391da177e4SLinus Torvalds 2403e7cd6c4SEric W. Biederman int is_current_pgrp_orphaned(void) 2411da177e4SLinus Torvalds { 2421da177e4SLinus Torvalds int retval; 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds read_lock(&tasklist_lock); 2453e7cd6c4SEric W. Biederman retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); 2461da177e4SLinus Torvalds read_unlock(&tasklist_lock); 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds return retval; 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds 251961c4675SOleg Nesterov static bool has_stopped_jobs(struct pid *pgrp) 2521da177e4SLinus Torvalds { 2531da177e4SLinus Torvalds struct task_struct *p; 2541da177e4SLinus Torvalds 2550475ac08SEric W. Biederman do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 256961c4675SOleg Nesterov if (p->signal->flags & SIGNAL_STOP_STOPPED) 257961c4675SOleg Nesterov return true; 2580475ac08SEric W. Biederman } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 259961c4675SOleg Nesterov 260961c4675SOleg Nesterov return false; 2611da177e4SLinus Torvalds } 2621da177e4SLinus Torvalds 263f49ee505SOleg Nesterov /* 264f49ee505SOleg Nesterov * Check to see if any process groups have become orphaned as 265f49ee505SOleg Nesterov * a result of our exiting, and if they have any stopped jobs, 266f49ee505SOleg Nesterov * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 267f49ee505SOleg Nesterov */ 268f49ee505SOleg Nesterov static void 269f49ee505SOleg Nesterov kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) 270f49ee505SOleg Nesterov { 271f49ee505SOleg Nesterov struct pid *pgrp = task_pgrp(tsk); 272f49ee505SOleg Nesterov struct task_struct *ignored_task = tsk; 273f49ee505SOleg Nesterov 274f49ee505SOleg Nesterov if (!parent) 275f49ee505SOleg Nesterov /* exit: our father is in a different pgrp than 276f49ee505SOleg Nesterov * we are and we were the only connection outside. 277f49ee505SOleg Nesterov */ 278f49ee505SOleg Nesterov parent = tsk->real_parent; 279f49ee505SOleg Nesterov else 280f49ee505SOleg Nesterov /* reparent: our child is in a different pgrp than 281f49ee505SOleg Nesterov * we are, and it was the only connection outside. 282f49ee505SOleg Nesterov */ 283f49ee505SOleg Nesterov ignored_task = NULL; 284f49ee505SOleg Nesterov 285f49ee505SOleg Nesterov if (task_pgrp(parent) != pgrp && 286f49ee505SOleg Nesterov task_session(parent) == task_session(tsk) && 287f49ee505SOleg Nesterov will_become_orphaned_pgrp(pgrp, ignored_task) && 288f49ee505SOleg Nesterov has_stopped_jobs(pgrp)) { 289f49ee505SOleg Nesterov __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); 290f49ee505SOleg Nesterov __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); 291f49ee505SOleg Nesterov } 292f49ee505SOleg Nesterov } 293f49ee505SOleg Nesterov 294f98bafa0SOleg Nesterov #ifdef CONFIG_MEMCG 295cf475ad2SBalbir Singh /* 296733eda7aSKAMEZAWA Hiroyuki * A task is exiting. If it owned this mm, find a new owner for the mm. 297cf475ad2SBalbir Singh */ 298cf475ad2SBalbir Singh void mm_update_next_owner(struct mm_struct *mm) 299cf475ad2SBalbir Singh { 300cf475ad2SBalbir Singh struct task_struct *c, *g, *p = current; 301cf475ad2SBalbir Singh 302cf475ad2SBalbir Singh retry: 303733eda7aSKAMEZAWA Hiroyuki /* 304733eda7aSKAMEZAWA Hiroyuki * If the exiting or execing task is not the owner, it's 305733eda7aSKAMEZAWA Hiroyuki * someone else's problem. 306733eda7aSKAMEZAWA Hiroyuki */ 307733eda7aSKAMEZAWA Hiroyuki if (mm->owner != p) 308cf475ad2SBalbir Singh return; 309733eda7aSKAMEZAWA Hiroyuki /* 310733eda7aSKAMEZAWA Hiroyuki * The current owner is exiting/execing and there are no other 311733eda7aSKAMEZAWA Hiroyuki * candidates. Do not leave the mm pointing to a possibly 312733eda7aSKAMEZAWA Hiroyuki * freed task structure. 313733eda7aSKAMEZAWA Hiroyuki */ 314733eda7aSKAMEZAWA Hiroyuki if (atomic_read(&mm->mm_users) <= 1) { 315733eda7aSKAMEZAWA Hiroyuki mm->owner = NULL; 316733eda7aSKAMEZAWA Hiroyuki return; 317733eda7aSKAMEZAWA Hiroyuki } 318cf475ad2SBalbir Singh 319cf475ad2SBalbir Singh read_lock(&tasklist_lock); 320cf475ad2SBalbir Singh /* 321cf475ad2SBalbir Singh * Search in the children 322cf475ad2SBalbir Singh */ 323cf475ad2SBalbir Singh list_for_each_entry(c, &p->children, sibling) { 324cf475ad2SBalbir Singh if (c->mm == mm) 325cf475ad2SBalbir Singh goto assign_new_owner; 326cf475ad2SBalbir Singh } 327cf475ad2SBalbir Singh 328cf475ad2SBalbir Singh /* 329cf475ad2SBalbir Singh * Search in the siblings 330cf475ad2SBalbir Singh */ 331dea33cfdSOleg Nesterov list_for_each_entry(c, &p->real_parent->children, sibling) { 332cf475ad2SBalbir Singh if (c->mm == mm) 333cf475ad2SBalbir Singh goto assign_new_owner; 334cf475ad2SBalbir Singh } 335cf475ad2SBalbir Singh 336cf475ad2SBalbir Singh /* 337f87fb599SOleg Nesterov * Search through everything else, we should not get here often. 338cf475ad2SBalbir Singh */ 33939af1765SOleg Nesterov for_each_process(g) { 34039af1765SOleg Nesterov if (g->flags & PF_KTHREAD) 34139af1765SOleg Nesterov continue; 34239af1765SOleg Nesterov for_each_thread(g, c) { 34339af1765SOleg Nesterov if (c->mm == mm) 344cf475ad2SBalbir Singh goto assign_new_owner; 34539af1765SOleg Nesterov if (c->mm) 34639af1765SOleg Nesterov break; 34739af1765SOleg Nesterov } 348f87fb599SOleg Nesterov } 349cf475ad2SBalbir Singh read_unlock(&tasklist_lock); 35031a78f23SBalbir Singh /* 35131a78f23SBalbir Singh * We found no owner yet mm_users > 1: this implies that we are 35231a78f23SBalbir Singh * most likely racing with swapoff (try_to_unuse()) or /proc or 353e5991371SHugh Dickins * ptrace or page migration (get_task_mm()). Mark owner as NULL. 35431a78f23SBalbir Singh */ 35531a78f23SBalbir Singh mm->owner = NULL; 356cf475ad2SBalbir Singh return; 357cf475ad2SBalbir Singh 358cf475ad2SBalbir Singh assign_new_owner: 359cf475ad2SBalbir Singh BUG_ON(c == p); 360cf475ad2SBalbir Singh get_task_struct(c); 361cf475ad2SBalbir Singh /* 362cf475ad2SBalbir Singh * The task_lock protects c->mm from changing. 363cf475ad2SBalbir Singh * We always want mm->owner->mm == mm 364cf475ad2SBalbir Singh */ 365cf475ad2SBalbir Singh task_lock(c); 366e5991371SHugh Dickins /* 367e5991371SHugh Dickins * Delay read_unlock() till we have the task_lock() 368e5991371SHugh Dickins * to ensure that c does not slip away underneath us 369e5991371SHugh Dickins */ 370e5991371SHugh Dickins read_unlock(&tasklist_lock); 371cf475ad2SBalbir Singh if (c->mm != mm) { 372cf475ad2SBalbir Singh task_unlock(c); 373cf475ad2SBalbir Singh put_task_struct(c); 374cf475ad2SBalbir Singh goto retry; 375cf475ad2SBalbir Singh } 376cf475ad2SBalbir Singh mm->owner = c; 377cf475ad2SBalbir Singh task_unlock(c); 378cf475ad2SBalbir Singh put_task_struct(c); 379cf475ad2SBalbir Singh } 380f98bafa0SOleg Nesterov #endif /* CONFIG_MEMCG */ 381cf475ad2SBalbir Singh 3821da177e4SLinus Torvalds /* 3831da177e4SLinus Torvalds * Turn us into a lazy TLB process if we 3841da177e4SLinus Torvalds * aren't already.. 3851da177e4SLinus Torvalds */ 386408b664aSAdrian Bunk static void exit_mm(struct task_struct *tsk) 3871da177e4SLinus Torvalds { 3881da177e4SLinus Torvalds struct mm_struct *mm = tsk->mm; 389b564daf8SOleg Nesterov struct core_state *core_state; 3901da177e4SLinus Torvalds 39148d212a2SLinus Torvalds mm_release(tsk, mm); 3921da177e4SLinus Torvalds if (!mm) 3931da177e4SLinus Torvalds return; 3944fe7efdbSKonstantin Khlebnikov sync_mm_rss(mm); 3951da177e4SLinus Torvalds /* 3961da177e4SLinus Torvalds * Serialize with any possible pending coredump. 397999d9fc1SOleg Nesterov * We must hold mmap_sem around checking core_state 3981da177e4SLinus Torvalds * and clearing tsk->mm. The core-inducing thread 399999d9fc1SOleg Nesterov * will increment ->nr_threads for each thread in the 4001da177e4SLinus Torvalds * group with ->mm != NULL. 4011da177e4SLinus Torvalds */ 4021da177e4SLinus Torvalds down_read(&mm->mmap_sem); 403b564daf8SOleg Nesterov core_state = mm->core_state; 404b564daf8SOleg Nesterov if (core_state) { 405b564daf8SOleg Nesterov struct core_thread self; 406a0be55deSIonut Alexa 4071da177e4SLinus Torvalds up_read(&mm->mmap_sem); 4081da177e4SLinus Torvalds 409b564daf8SOleg Nesterov self.task = tsk; 410b564daf8SOleg Nesterov self.next = xchg(&core_state->dumper.next, &self); 411b564daf8SOleg Nesterov /* 412b564daf8SOleg Nesterov * Implies mb(), the result of xchg() must be visible 413b564daf8SOleg Nesterov * to core_state->dumper. 414b564daf8SOleg Nesterov */ 415b564daf8SOleg Nesterov if (atomic_dec_and_test(&core_state->nr_threads)) 416b564daf8SOleg Nesterov complete(&core_state->startup); 4171da177e4SLinus Torvalds 418a94e2d40SOleg Nesterov for (;;) { 419a94e2d40SOleg Nesterov set_task_state(tsk, TASK_UNINTERRUPTIBLE); 420a94e2d40SOleg Nesterov if (!self.task) /* see coredump_finish() */ 421a94e2d40SOleg Nesterov break; 42280d26af8SMandeep Singh Baines freezable_schedule(); 423a94e2d40SOleg Nesterov } 424a94e2d40SOleg Nesterov __set_task_state(tsk, TASK_RUNNING); 4251da177e4SLinus Torvalds down_read(&mm->mmap_sem); 4261da177e4SLinus Torvalds } 4271da177e4SLinus Torvalds atomic_inc(&mm->mm_count); 428125e1874SEric Sesterhenn BUG_ON(mm != tsk->active_mm); 4291da177e4SLinus Torvalds /* more a memory barrier than a real lock */ 4301da177e4SLinus Torvalds task_lock(tsk); 4311da177e4SLinus Torvalds tsk->mm = NULL; 4321da177e4SLinus Torvalds up_read(&mm->mmap_sem); 4331da177e4SLinus Torvalds enter_lazy_tlb(mm, current); 4341da177e4SLinus Torvalds task_unlock(tsk); 435cf475ad2SBalbir Singh mm_update_next_owner(mm); 4361da177e4SLinus Torvalds mmput(mm); 437c32b3cbeSMichal Hocko if (test_thread_flag(TIF_MEMDIE)) 43836324a99SMichal Hocko exit_oom_victim(tsk); 4391da177e4SLinus Torvalds } 4401da177e4SLinus Torvalds 441c9dc05bfSOleg Nesterov static struct task_struct *find_alive_thread(struct task_struct *p) 442c9dc05bfSOleg Nesterov { 443c9dc05bfSOleg Nesterov struct task_struct *t; 444c9dc05bfSOleg Nesterov 445c9dc05bfSOleg Nesterov for_each_thread(p, t) { 446c9dc05bfSOleg Nesterov if (!(t->flags & PF_EXITING)) 447c9dc05bfSOleg Nesterov return t; 448c9dc05bfSOleg Nesterov } 449c9dc05bfSOleg Nesterov return NULL; 450c9dc05bfSOleg Nesterov } 451c9dc05bfSOleg Nesterov 4521109909cSOleg Nesterov static struct task_struct *find_child_reaper(struct task_struct *father) 4531109909cSOleg Nesterov __releases(&tasklist_lock) 4541109909cSOleg Nesterov __acquires(&tasklist_lock) 4551109909cSOleg Nesterov { 4561109909cSOleg Nesterov struct pid_namespace *pid_ns = task_active_pid_ns(father); 4571109909cSOleg Nesterov struct task_struct *reaper = pid_ns->child_reaper; 4581109909cSOleg Nesterov 4591109909cSOleg Nesterov if (likely(reaper != father)) 4601109909cSOleg Nesterov return reaper; 4611109909cSOleg Nesterov 462c9dc05bfSOleg Nesterov reaper = find_alive_thread(father); 463c9dc05bfSOleg Nesterov if (reaper) { 4641109909cSOleg Nesterov pid_ns->child_reaper = reaper; 4651109909cSOleg Nesterov return reaper; 4661109909cSOleg Nesterov } 4671109909cSOleg Nesterov 4681109909cSOleg Nesterov write_unlock_irq(&tasklist_lock); 4691109909cSOleg Nesterov if (unlikely(pid_ns == &init_pid_ns)) { 4701109909cSOleg Nesterov panic("Attempted to kill init! exitcode=0x%08x\n", 4711109909cSOleg Nesterov father->signal->group_exit_code ?: father->exit_code); 4721109909cSOleg Nesterov } 4731109909cSOleg Nesterov zap_pid_ns_processes(pid_ns); 4741109909cSOleg Nesterov write_lock_irq(&tasklist_lock); 4751109909cSOleg Nesterov 4761109909cSOleg Nesterov return father; 4771109909cSOleg Nesterov } 4781109909cSOleg Nesterov 4791da177e4SLinus Torvalds /* 480ebec18a6SLennart Poettering * When we die, we re-parent all our children, and try to: 481ebec18a6SLennart Poettering * 1. give them to another thread in our thread group, if such a member exists 482ebec18a6SLennart Poettering * 2. give it to the first ancestor process which prctl'd itself as a 483ebec18a6SLennart Poettering * child_subreaper for its children (like a service manager) 484ebec18a6SLennart Poettering * 3. give it to the init process (PID 1) in our pid namespace 4851da177e4SLinus Torvalds */ 4861109909cSOleg Nesterov static struct task_struct *find_new_reaper(struct task_struct *father, 4871109909cSOleg Nesterov struct task_struct *child_reaper) 488950bbabbSOleg Nesterov { 489c9dc05bfSOleg Nesterov struct task_struct *thread, *reaper; 490950bbabbSOleg Nesterov 491c9dc05bfSOleg Nesterov thread = find_alive_thread(father); 492c9dc05bfSOleg Nesterov if (thread) 493950bbabbSOleg Nesterov return thread; 494950bbabbSOleg Nesterov 4957d24e2dfSOleg Nesterov if (father->signal->has_child_subreaper) { 496ebec18a6SLennart Poettering /* 497175aed3fSOleg Nesterov * Find the first ->is_child_subreaper ancestor in our pid_ns. 498175aed3fSOleg Nesterov * We start from father to ensure we can not look into another 499175aed3fSOleg Nesterov * namespace, this is safe because all its threads are dead. 500ebec18a6SLennart Poettering */ 5017d24e2dfSOleg Nesterov for (reaper = father; 5021109909cSOleg Nesterov !same_thread_group(reaper, child_reaper); 503ebec18a6SLennart Poettering reaper = reaper->real_parent) { 504175aed3fSOleg Nesterov /* call_usermodehelper() descendants need this check */ 505175aed3fSOleg Nesterov if (reaper == &init_task) 506ebec18a6SLennart Poettering break; 507ebec18a6SLennart Poettering if (!reaper->signal->is_child_subreaper) 508ebec18a6SLennart Poettering continue; 509c9dc05bfSOleg Nesterov thread = find_alive_thread(reaper); 510c9dc05bfSOleg Nesterov if (thread) 5118a1296aeSOleg Nesterov return thread; 5123750ef97SOleg Nesterov } 513ebec18a6SLennart Poettering } 514950bbabbSOleg Nesterov 5151109909cSOleg Nesterov return child_reaper; 516950bbabbSOleg Nesterov } 517950bbabbSOleg Nesterov 5185dfc80beSOleg Nesterov /* 5195dfc80beSOleg Nesterov * Any that need to be release_task'd are put on the @dead list. 5205dfc80beSOleg Nesterov */ 5219cd80bbbSOleg Nesterov static void reparent_leader(struct task_struct *father, struct task_struct *p, 5225dfc80beSOleg Nesterov struct list_head *dead) 5235dfc80beSOleg Nesterov { 5242831096eSOleg Nesterov if (unlikely(p->exit_state == EXIT_DEAD)) 5255dfc80beSOleg Nesterov return; 5265dfc80beSOleg Nesterov 527abd50b39SOleg Nesterov /* We don't want people slaying init. */ 5285dfc80beSOleg Nesterov p->exit_signal = SIGCHLD; 5295dfc80beSOleg Nesterov 5305dfc80beSOleg Nesterov /* If it has exited notify the new parent about this child's death. */ 531d21142ecSTejun Heo if (!p->ptrace && 5325dfc80beSOleg Nesterov p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 53386773473SOleg Nesterov if (do_notify_parent(p, p->exit_signal)) { 5345dfc80beSOleg Nesterov p->exit_state = EXIT_DEAD; 535dc2fd4b0SOleg Nesterov list_add(&p->ptrace_entry, dead); 5365dfc80beSOleg Nesterov } 5375dfc80beSOleg Nesterov } 5385dfc80beSOleg Nesterov 5395dfc80beSOleg Nesterov kill_orphaned_pgrp(p, father); 5405dfc80beSOleg Nesterov } 5415dfc80beSOleg Nesterov 542482a3767SOleg Nesterov /* 543482a3767SOleg Nesterov * This does two things: 544482a3767SOleg Nesterov * 545482a3767SOleg Nesterov * A. Make init inherit all the child processes 546482a3767SOleg Nesterov * B. Check to see if any process groups have become orphaned 547482a3767SOleg Nesterov * as a result of our exiting, and if they have any stopped 548482a3767SOleg Nesterov * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 549482a3767SOleg Nesterov */ 550482a3767SOleg Nesterov static void forget_original_parent(struct task_struct *father, 551482a3767SOleg Nesterov struct list_head *dead) 5521da177e4SLinus Torvalds { 553482a3767SOleg Nesterov struct task_struct *p, *t, *reaper; 554762a24beSOleg Nesterov 5557c8bd232SOleg Nesterov if (unlikely(!list_empty(&father->ptraced))) 556482a3767SOleg Nesterov exit_ptrace(father, dead); 557f470021aSRoland McGrath 5587c8bd232SOleg Nesterov /* Can drop and reacquire tasklist_lock */ 5591109909cSOleg Nesterov reaper = find_child_reaper(father); 560ad9e206aSOleg Nesterov if (list_empty(&father->children)) 561482a3767SOleg Nesterov return; 5621109909cSOleg Nesterov 5631109909cSOleg Nesterov reaper = find_new_reaper(father, reaper); 5642831096eSOleg Nesterov list_for_each_entry(p, &father->children, sibling) { 56557a05918SOleg Nesterov for_each_thread(p, t) { 5669cd80bbbSOleg Nesterov t->real_parent = reaper; 56757a05918SOleg Nesterov BUG_ON((!t->ptrace) != (t->parent == father)); 56857a05918SOleg Nesterov if (likely(!t->ptrace)) 5699cd80bbbSOleg Nesterov t->parent = t->real_parent; 5709cd80bbbSOleg Nesterov if (t->pdeath_signal) 5719cd80bbbSOleg Nesterov group_send_sig_info(t->pdeath_signal, 5729cd80bbbSOleg Nesterov SEND_SIG_NOINFO, t); 57357a05918SOleg Nesterov } 5742831096eSOleg Nesterov /* 5752831096eSOleg Nesterov * If this is a threaded reparent there is no need to 5762831096eSOleg Nesterov * notify anyone anything has happened. 5772831096eSOleg Nesterov */ 5782831096eSOleg Nesterov if (!same_thread_group(reaper, father)) 579482a3767SOleg Nesterov reparent_leader(father, p, dead); 5801da177e4SLinus Torvalds } 5812831096eSOleg Nesterov list_splice_tail_init(&father->children, &reaper->children); 5821da177e4SLinus Torvalds } 5831da177e4SLinus Torvalds 5841da177e4SLinus Torvalds /* 5851da177e4SLinus Torvalds * Send signals to all our closest relatives so that they know 5861da177e4SLinus Torvalds * to properly mourn us.. 5871da177e4SLinus Torvalds */ 588821c7de7SOleg Nesterov static void exit_notify(struct task_struct *tsk, int group_dead) 5891da177e4SLinus Torvalds { 59053c8f9f1SOleg Nesterov bool autoreap; 591482a3767SOleg Nesterov struct task_struct *p, *n; 592482a3767SOleg Nesterov LIST_HEAD(dead); 5931da177e4SLinus Torvalds 594762a24beSOleg Nesterov write_lock_irq(&tasklist_lock); 595482a3767SOleg Nesterov forget_original_parent(tsk, &dead); 596482a3767SOleg Nesterov 597821c7de7SOleg Nesterov if (group_dead) 598821c7de7SOleg Nesterov kill_orphaned_pgrp(tsk->group_leader, NULL); 5991da177e4SLinus Torvalds 60045cdf5ccSOleg Nesterov if (unlikely(tsk->ptrace)) { 60145cdf5ccSOleg Nesterov int sig = thread_group_leader(tsk) && 60245cdf5ccSOleg Nesterov thread_group_empty(tsk) && 60345cdf5ccSOleg Nesterov !ptrace_reparented(tsk) ? 60445cdf5ccSOleg Nesterov tsk->exit_signal : SIGCHLD; 60545cdf5ccSOleg Nesterov autoreap = do_notify_parent(tsk, sig); 60645cdf5ccSOleg Nesterov } else if (thread_group_leader(tsk)) { 60745cdf5ccSOleg Nesterov autoreap = thread_group_empty(tsk) && 60845cdf5ccSOleg Nesterov do_notify_parent(tsk, tsk->exit_signal); 60945cdf5ccSOleg Nesterov } else { 61045cdf5ccSOleg Nesterov autoreap = true; 61145cdf5ccSOleg Nesterov } 6121da177e4SLinus Torvalds 61353c8f9f1SOleg Nesterov tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; 6146c66e7dbSOleg Nesterov if (tsk->exit_state == EXIT_DEAD) 6156c66e7dbSOleg Nesterov list_add(&tsk->ptrace_entry, &dead); 6161da177e4SLinus Torvalds 6179c339168SOleg Nesterov /* mt-exec, de_thread() is waiting for group leader */ 6189c339168SOleg Nesterov if (unlikely(tsk->signal->notify_count < 0)) 6196db840faSOleg Nesterov wake_up_process(tsk->signal->group_exit_task); 6201da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 6211da177e4SLinus Torvalds 622482a3767SOleg Nesterov list_for_each_entry_safe(p, n, &dead, ptrace_entry) { 623482a3767SOleg Nesterov list_del_init(&p->ptrace_entry); 624482a3767SOleg Nesterov release_task(p); 625482a3767SOleg Nesterov } 6261da177e4SLinus Torvalds } 6271da177e4SLinus Torvalds 628e18eecb8SJeff Dike #ifdef CONFIG_DEBUG_STACK_USAGE 629e18eecb8SJeff Dike static void check_stack_usage(void) 630e18eecb8SJeff Dike { 631e18eecb8SJeff Dike static DEFINE_SPINLOCK(low_water_lock); 632e18eecb8SJeff Dike static int lowest_to_date = THREAD_SIZE; 633e18eecb8SJeff Dike unsigned long free; 634e18eecb8SJeff Dike 6357c9f8861SEric Sandeen free = stack_not_used(current); 636e18eecb8SJeff Dike 637e18eecb8SJeff Dike if (free >= lowest_to_date) 638e18eecb8SJeff Dike return; 639e18eecb8SJeff Dike 640e18eecb8SJeff Dike spin_lock(&low_water_lock); 641e18eecb8SJeff Dike if (free < lowest_to_date) { 642a0be55deSIonut Alexa pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n", 643168eeccbSTim Bird current->comm, task_pid_nr(current), free); 644e18eecb8SJeff Dike lowest_to_date = free; 645e18eecb8SJeff Dike } 646e18eecb8SJeff Dike spin_unlock(&low_water_lock); 647e18eecb8SJeff Dike } 648e18eecb8SJeff Dike #else 649e18eecb8SJeff Dike static inline void check_stack_usage(void) {} 650e18eecb8SJeff Dike #endif 651e18eecb8SJeff Dike 6529402c95fSJoe Perches void do_exit(long code) 6531da177e4SLinus Torvalds { 6541da177e4SLinus Torvalds struct task_struct *tsk = current; 6551da177e4SLinus Torvalds int group_dead; 6563f95aa81SPaul E. McKenney TASKS_RCU(int tasks_rcu_i); 6571da177e4SLinus Torvalds 6581da177e4SLinus Torvalds profile_task_exit(tsk); 6595c9a8750SDmitry Vyukov kcov_task_exit(tsk); 6601da177e4SLinus Torvalds 66173c10101SJens Axboe WARN_ON(blk_needs_flush_plug(tsk)); 66222e2c507SJens Axboe 6631da177e4SLinus Torvalds if (unlikely(in_interrupt())) 6641da177e4SLinus Torvalds panic("Aiee, killing interrupt handler!"); 6651da177e4SLinus Torvalds if (unlikely(!tsk->pid)) 6661da177e4SLinus Torvalds panic("Attempted to kill the idle task!"); 6671da177e4SLinus Torvalds 66833dd94aeSNelson Elhage /* 66933dd94aeSNelson Elhage * If do_exit is called because this processes oopsed, it's possible 67033dd94aeSNelson Elhage * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before 67133dd94aeSNelson Elhage * continuing. Amongst other possible reasons, this is to prevent 67233dd94aeSNelson Elhage * mm_release()->clear_child_tid() from writing to a user-controlled 67333dd94aeSNelson Elhage * kernel address. 67433dd94aeSNelson Elhage */ 67533dd94aeSNelson Elhage set_fs(USER_DS); 67633dd94aeSNelson Elhage 677a288eeccSTejun Heo ptrace_event(PTRACE_EVENT_EXIT, code); 6781da177e4SLinus Torvalds 679e0e81739SDavid Howells validate_creds_for_do_exit(tsk); 680e0e81739SDavid Howells 681df164db5SAlexander Nyberg /* 682df164db5SAlexander Nyberg * We're taking recursive faults here in do_exit. Safest is to just 683df164db5SAlexander Nyberg * leave this task alone and wait for reboot. 684df164db5SAlexander Nyberg */ 685df164db5SAlexander Nyberg if (unlikely(tsk->flags & PF_EXITING)) { 686a0be55deSIonut Alexa pr_alert("Fixing recursive fault but reboot is needed!\n"); 687778e9a9cSAlexey Kuznetsov /* 688778e9a9cSAlexey Kuznetsov * We can do this unlocked here. The futex code uses 689778e9a9cSAlexey Kuznetsov * this flag just to verify whether the pi state 690778e9a9cSAlexey Kuznetsov * cleanup has been done or not. In the worst case it 691778e9a9cSAlexey Kuznetsov * loops once more. We pretend that the cleanup was 692778e9a9cSAlexey Kuznetsov * done as there is no way to return. Either the 693778e9a9cSAlexey Kuznetsov * OWNER_DIED bit is set by now or we push the blocked 694778e9a9cSAlexey Kuznetsov * task into the wait for ever nirwana as well. 695778e9a9cSAlexey Kuznetsov */ 696778e9a9cSAlexey Kuznetsov tsk->flags |= PF_EXITPIDONE; 697df164db5SAlexander Nyberg set_current_state(TASK_UNINTERRUPTIBLE); 698df164db5SAlexander Nyberg schedule(); 699df164db5SAlexander Nyberg } 700df164db5SAlexander Nyberg 701d12619b5SOleg Nesterov exit_signals(tsk); /* sets PF_EXITING */ 702778e9a9cSAlexey Kuznetsov /* 703778e9a9cSAlexey Kuznetsov * tsk->flags are checked in the futex code to protect against 704ed3e694dSAl Viro * an exiting task cleaning up the robust pi futexes. 705778e9a9cSAlexey Kuznetsov */ 706d2ee7198SOleg Nesterov smp_mb(); 7071d615482SThomas Gleixner raw_spin_unlock_wait(&tsk->pi_lock); 7081da177e4SLinus Torvalds 7091dc0fffcSPeter Zijlstra if (unlikely(in_atomic())) { 710a0be55deSIonut Alexa pr_info("note: %s[%d] exited with preempt_count %d\n", 711ba25f9dcSPavel Emelyanov current->comm, task_pid_nr(current), 7121da177e4SLinus Torvalds preempt_count()); 7131dc0fffcSPeter Zijlstra preempt_count_set(PREEMPT_ENABLED); 7141dc0fffcSPeter Zijlstra } 7151da177e4SLinus Torvalds 71648d212a2SLinus Torvalds /* sync mm's RSS info before statistics gathering */ 71748d212a2SLinus Torvalds if (tsk->mm) 71848d212a2SLinus Torvalds sync_mm_rss(tsk->mm); 71951229b49SRik van Riel acct_update_integrals(tsk); 7201da177e4SLinus Torvalds group_dead = atomic_dec_and_test(&tsk->signal->live); 721c3068951SAndrew Morton if (group_dead) { 7222ff678b8SThomas Gleixner hrtimer_cancel(&tsk->signal->real_timer); 72325f407f0SRoland McGrath exit_itimers(tsk->signal); 7241f10206cSJiri Pirko if (tsk->mm) 7251f10206cSJiri Pirko setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); 726c3068951SAndrew Morton } 727f6ec29a4SKaiGai Kohei acct_collect(code, group_dead); 728522ed776SMiloslav Trmac if (group_dead) 729522ed776SMiloslav Trmac tty_audit_exit(); 730fa84cb93SAl Viro audit_free(tsk); 731115085eaSOleg Nesterov 73248d212a2SLinus Torvalds tsk->exit_code = code; 733115085eaSOleg Nesterov taskstats_exit(tsk, group_dead); 734c757249aSShailabh Nagar 7351da177e4SLinus Torvalds exit_mm(tsk); 7361da177e4SLinus Torvalds 7370e464814SKaiGai Kohei if (group_dead) 738f6ec29a4SKaiGai Kohei acct_process(); 7390a16b607SMathieu Desnoyers trace_sched_process_exit(tsk); 7400a16b607SMathieu Desnoyers 7411da177e4SLinus Torvalds exit_sem(tsk); 742b34a6b1dSVasiliy Kulikov exit_shm(tsk); 7431ec7f1ddSAl Viro exit_files(tsk); 7441ec7f1ddSAl Viro exit_fs(tsk); 745c39df5faSOleg Nesterov if (group_dead) 746c39df5faSOleg Nesterov disassociate_ctty(1); 7478aac6270SOleg Nesterov exit_task_namespaces(tsk); 748ed3e694dSAl Viro exit_task_work(tsk); 749e6464694SJiri Slaby exit_thread(tsk); 7500b3fcf17SStephane Eranian 7510b3fcf17SStephane Eranian /* 7520b3fcf17SStephane Eranian * Flush inherited counters to the parent - before the parent 7530b3fcf17SStephane Eranian * gets woken up by child-exit notifications. 7540b3fcf17SStephane Eranian * 7550b3fcf17SStephane Eranian * because of cgroup mode, must be called before cgroup_exit() 7560b3fcf17SStephane Eranian */ 7570b3fcf17SStephane Eranian perf_event_exit_task(tsk); 7580b3fcf17SStephane Eranian 7591ec41830SLi Zefan cgroup_exit(tsk); 7601da177e4SLinus Torvalds 76133b2fb30SIngo Molnar /* 76224f1e32cSFrederic Weisbecker * FIXME: do that only when needed, using sched_exit tracepoint 76324f1e32cSFrederic Weisbecker */ 7647c8df286SOleg Nesterov flush_ptrace_hw_breakpoint(tsk); 76533b2fb30SIngo Molnar 76649f5903bSPaul E. McKenney TASKS_RCU(preempt_disable()); 7673f95aa81SPaul E. McKenney TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); 76849f5903bSPaul E. McKenney TASKS_RCU(preempt_enable()); 769821c7de7SOleg Nesterov exit_notify(tsk, group_dead); 770ef982393SGuillaume Morin proc_exit_connector(tsk); 7711da177e4SLinus Torvalds #ifdef CONFIG_NUMA 772c0ff7453SMiao Xie task_lock(tsk); 773f0be3d32SLee Schermerhorn mpol_put(tsk->mempolicy); 7741da177e4SLinus Torvalds tsk->mempolicy = NULL; 775c0ff7453SMiao Xie task_unlock(tsk); 7761da177e4SLinus Torvalds #endif 77742b2dd0aSAlexey Dobriyan #ifdef CONFIG_FUTEX 778c87e2837SIngo Molnar if (unlikely(current->pi_state_cache)) 779c87e2837SIngo Molnar kfree(current->pi_state_cache); 78042b2dd0aSAlexey Dobriyan #endif 781c87e2837SIngo Molnar /* 7829a11b49aSIngo Molnar * Make sure we are holding no locks: 783de5097c2SIngo Molnar */ 7841b1d2fb4SColin Cross debug_check_no_locks_held(); 785778e9a9cSAlexey Kuznetsov /* 786778e9a9cSAlexey Kuznetsov * We can do this unlocked here. The futex code uses this flag 787778e9a9cSAlexey Kuznetsov * just to verify whether the pi state cleanup has been done 788778e9a9cSAlexey Kuznetsov * or not. In the worst case it loops once more. 789778e9a9cSAlexey Kuznetsov */ 790778e9a9cSAlexey Kuznetsov tsk->flags |= PF_EXITPIDONE; 7911da177e4SLinus Torvalds 792afc847b7SAl Viro if (tsk->io_context) 793b69f2292SLouis Rilling exit_io_context(tsk); 794afc847b7SAl Viro 795b92ce558SJens Axboe if (tsk->splice_pipe) 7964b8a8f1eSAl Viro free_pipe_info(tsk->splice_pipe); 797b92ce558SJens Axboe 7985640f768SEric Dumazet if (tsk->task_frag.page) 7995640f768SEric Dumazet put_page(tsk->task_frag.page); 8005640f768SEric Dumazet 801e0e81739SDavid Howells validate_creds_for_do_exit(tsk); 802e0e81739SDavid Howells 8034bcb8232SOleg Nesterov check_stack_usage(); 8047407251aSCoywolf Qi Hunt preempt_disable(); 80554848d73SWu Fengguang if (tsk->nr_dirtied) 80654848d73SWu Fengguang __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 807f41d911fSPaul E. McKenney exit_rcu(); 8083f95aa81SPaul E. McKenney TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); 809b5740f4bSYasunori Goto 810b5740f4bSYasunori Goto /* 811b5740f4bSYasunori Goto * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 812b5740f4bSYasunori Goto * when the following two conditions become true. 813b5740f4bSYasunori Goto * - There is race condition of mmap_sem (It is acquired by 814b5740f4bSYasunori Goto * exit_mm()), and 815b5740f4bSYasunori Goto * - SMI occurs before setting TASK_RUNINNG. 816b5740f4bSYasunori Goto * (or hypervisor of virtual machine switches to other guest) 817b5740f4bSYasunori Goto * As a result, we may become TASK_RUNNING after becoming TASK_DEAD 818b5740f4bSYasunori Goto * 819b5740f4bSYasunori Goto * To avoid it, we have to wait for releasing tsk->pi_lock which 820b5740f4bSYasunori Goto * is held by try_to_wake_up() 821b5740f4bSYasunori Goto */ 822b5740f4bSYasunori Goto smp_mb(); 823b5740f4bSYasunori Goto raw_spin_unlock_wait(&tsk->pi_lock); 824b5740f4bSYasunori Goto 82555a101f8SOleg Nesterov /* causes final put_task_struct in finish_task_switch(). */ 826c394cc9fSOleg Nesterov tsk->state = TASK_DEAD; 827a585042fSTejun Heo tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ 8281da177e4SLinus Torvalds schedule(); 8291da177e4SLinus Torvalds BUG(); 8301da177e4SLinus Torvalds /* Avoid "noreturn function does return". */ 83154306cf0SAlan Cox for (;;) 83254306cf0SAlan Cox cpu_relax(); /* For when BUG is null */ 8331da177e4SLinus Torvalds } 834012914daSRuss Anderson EXPORT_SYMBOL_GPL(do_exit); 835012914daSRuss Anderson 8369402c95fSJoe Perches void complete_and_exit(struct completion *comp, long code) 8371da177e4SLinus Torvalds { 8381da177e4SLinus Torvalds if (comp) 8391da177e4SLinus Torvalds complete(comp); 8401da177e4SLinus Torvalds 8411da177e4SLinus Torvalds do_exit(code); 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds EXPORT_SYMBOL(complete_and_exit); 8441da177e4SLinus Torvalds 845754fe8d2SHeiko Carstens SYSCALL_DEFINE1(exit, int, error_code) 8461da177e4SLinus Torvalds { 8471da177e4SLinus Torvalds do_exit((error_code&0xff)<<8); 8481da177e4SLinus Torvalds } 8491da177e4SLinus Torvalds 8501da177e4SLinus Torvalds /* 8511da177e4SLinus Torvalds * Take down every thread in the group. This is called by fatal signals 8521da177e4SLinus Torvalds * as well as by sys_exit_group (below). 8531da177e4SLinus Torvalds */ 8549402c95fSJoe Perches void 8551da177e4SLinus Torvalds do_group_exit(int exit_code) 8561da177e4SLinus Torvalds { 857bfc4b089SOleg Nesterov struct signal_struct *sig = current->signal; 858bfc4b089SOleg Nesterov 8591da177e4SLinus Torvalds BUG_ON(exit_code & 0x80); /* core dumps don't get here */ 8601da177e4SLinus Torvalds 861bfc4b089SOleg Nesterov if (signal_group_exit(sig)) 862bfc4b089SOleg Nesterov exit_code = sig->group_exit_code; 8631da177e4SLinus Torvalds else if (!thread_group_empty(current)) { 8641da177e4SLinus Torvalds struct sighand_struct *const sighand = current->sighand; 865a0be55deSIonut Alexa 8661da177e4SLinus Torvalds spin_lock_irq(&sighand->siglock); 867ed5d2cacSOleg Nesterov if (signal_group_exit(sig)) 8681da177e4SLinus Torvalds /* Another thread got here before we took the lock. */ 8691da177e4SLinus Torvalds exit_code = sig->group_exit_code; 8701da177e4SLinus Torvalds else { 8711da177e4SLinus Torvalds sig->group_exit_code = exit_code; 872ed5d2cacSOleg Nesterov sig->flags = SIGNAL_GROUP_EXIT; 8731da177e4SLinus Torvalds zap_other_threads(current); 8741da177e4SLinus Torvalds } 8751da177e4SLinus Torvalds spin_unlock_irq(&sighand->siglock); 8761da177e4SLinus Torvalds } 8771da177e4SLinus Torvalds 8781da177e4SLinus Torvalds do_exit(exit_code); 8791da177e4SLinus Torvalds /* NOTREACHED */ 8801da177e4SLinus Torvalds } 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds /* 8831da177e4SLinus Torvalds * this kills every thread in the thread group. Note that any externally 8841da177e4SLinus Torvalds * wait4()-ing process will get the correct exit code - even if this 8851da177e4SLinus Torvalds * thread is not the thread group leader. 8861da177e4SLinus Torvalds */ 887754fe8d2SHeiko Carstens SYSCALL_DEFINE1(exit_group, int, error_code) 8881da177e4SLinus Torvalds { 8891da177e4SLinus Torvalds do_group_exit((error_code & 0xff) << 8); 8902ed7c03eSHeiko Carstens /* NOTREACHED */ 8912ed7c03eSHeiko Carstens return 0; 8921da177e4SLinus Torvalds } 8931da177e4SLinus Torvalds 8949e8ae01dSOleg Nesterov struct wait_opts { 8959e8ae01dSOleg Nesterov enum pid_type wo_type; 8969e8ae01dSOleg Nesterov int wo_flags; 897e1eb1ebcSRichard Kennedy struct pid *wo_pid; 8989e8ae01dSOleg Nesterov 8999e8ae01dSOleg Nesterov struct siginfo __user *wo_info; 9009e8ae01dSOleg Nesterov int __user *wo_stat; 9019e8ae01dSOleg Nesterov struct rusage __user *wo_rusage; 9029e8ae01dSOleg Nesterov 9030b7570e7SOleg Nesterov wait_queue_t child_wait; 9049e8ae01dSOleg Nesterov int notask_error; 9059e8ae01dSOleg Nesterov }; 9069e8ae01dSOleg Nesterov 907989264f4SOleg Nesterov static inline 908989264f4SOleg Nesterov struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 909161550d7SEric W. Biederman { 910989264f4SOleg Nesterov if (type != PIDTYPE_PID) 911989264f4SOleg Nesterov task = task->group_leader; 912989264f4SOleg Nesterov return task->pids[type].pid; 913161550d7SEric W. Biederman } 914161550d7SEric W. Biederman 915989264f4SOleg Nesterov static int eligible_pid(struct wait_opts *wo, struct task_struct *p) 9161da177e4SLinus Torvalds { 9175c01ba49SOleg Nesterov return wo->wo_type == PIDTYPE_MAX || 9185c01ba49SOleg Nesterov task_pid_type(p, wo->wo_type) == wo->wo_pid; 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds 921bf959931SOleg Nesterov static int 922bf959931SOleg Nesterov eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) 9235c01ba49SOleg Nesterov { 9245c01ba49SOleg Nesterov if (!eligible_pid(wo, p)) 9255c01ba49SOleg Nesterov return 0; 926bf959931SOleg Nesterov 927bf959931SOleg Nesterov /* 928bf959931SOleg Nesterov * Wait for all children (clone and not) if __WALL is set or 929bf959931SOleg Nesterov * if it is traced by us. 930bf959931SOleg Nesterov */ 931bf959931SOleg Nesterov if (ptrace || (wo->wo_flags & __WALL)) 932bf959931SOleg Nesterov return 1; 933bf959931SOleg Nesterov 934bf959931SOleg Nesterov /* 935bf959931SOleg Nesterov * Otherwise, wait for clone children *only* if __WCLONE is set; 936bf959931SOleg Nesterov * otherwise, wait for non-clone children *only*. 937bf959931SOleg Nesterov * 938bf959931SOleg Nesterov * Note: a "clone" child here is one that reports to its parent 939bf959931SOleg Nesterov * using a signal other than SIGCHLD, or a non-leader thread which 940bf959931SOleg Nesterov * we can only see if it is traced by us. 941bf959931SOleg Nesterov */ 942bf959931SOleg Nesterov if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) 9431da177e4SLinus Torvalds return 0; 9441da177e4SLinus Torvalds 94514dd0b81SRoland McGrath return 1; 9461da177e4SLinus Torvalds } 9471da177e4SLinus Torvalds 9489e8ae01dSOleg Nesterov static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, 9499e8ae01dSOleg Nesterov pid_t pid, uid_t uid, int why, int status) 9501da177e4SLinus Torvalds { 9519e8ae01dSOleg Nesterov struct siginfo __user *infop; 9529e8ae01dSOleg Nesterov int retval = wo->wo_rusage 9539e8ae01dSOleg Nesterov ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 95436c8b586SIngo Molnar 9551da177e4SLinus Torvalds put_task_struct(p); 9569e8ae01dSOleg Nesterov infop = wo->wo_info; 957b6fe2d11SVitaly Mayatskikh if (infop) { 9581da177e4SLinus Torvalds if (!retval) 9591da177e4SLinus Torvalds retval = put_user(SIGCHLD, &infop->si_signo); 9601da177e4SLinus Torvalds if (!retval) 9611da177e4SLinus Torvalds retval = put_user(0, &infop->si_errno); 9621da177e4SLinus Torvalds if (!retval) 9631da177e4SLinus Torvalds retval = put_user((short)why, &infop->si_code); 9641da177e4SLinus Torvalds if (!retval) 9651da177e4SLinus Torvalds retval = put_user(pid, &infop->si_pid); 9661da177e4SLinus Torvalds if (!retval) 9671da177e4SLinus Torvalds retval = put_user(uid, &infop->si_uid); 9681da177e4SLinus Torvalds if (!retval) 9691da177e4SLinus Torvalds retval = put_user(status, &infop->si_status); 970b6fe2d11SVitaly Mayatskikh } 9711da177e4SLinus Torvalds if (!retval) 9721da177e4SLinus Torvalds retval = pid; 9731da177e4SLinus Torvalds return retval; 9741da177e4SLinus Torvalds } 9751da177e4SLinus Torvalds 9761da177e4SLinus Torvalds /* 9771da177e4SLinus Torvalds * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold 9781da177e4SLinus Torvalds * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 9791da177e4SLinus Torvalds * the lock and this task is uninteresting. If we return nonzero, we have 9801da177e4SLinus Torvalds * released the lock and the system call should return. 9811da177e4SLinus Torvalds */ 9829e8ae01dSOleg Nesterov static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) 9831da177e4SLinus Torvalds { 984f6507f83SOleg Nesterov int state, retval, status; 9856c5f3e7bSPavel Emelyanov pid_t pid = task_pid_vnr(p); 98643e13cc1SOleg Nesterov uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); 9879e8ae01dSOleg Nesterov struct siginfo __user *infop; 9881da177e4SLinus Torvalds 9899e8ae01dSOleg Nesterov if (!likely(wo->wo_flags & WEXITED)) 99098abed02SRoland McGrath return 0; 99198abed02SRoland McGrath 9929e8ae01dSOleg Nesterov if (unlikely(wo->wo_flags & WNOWAIT)) { 9931da177e4SLinus Torvalds int exit_code = p->exit_code; 994f3abd4f9SThiago Farina int why; 9951da177e4SLinus Torvalds 9961da177e4SLinus Torvalds get_task_struct(p); 9971da177e4SLinus Torvalds read_unlock(&tasklist_lock); 9981029a2b5SPeter Zijlstra sched_annotate_sleep(); 9991029a2b5SPeter Zijlstra 10001da177e4SLinus Torvalds if ((exit_code & 0x7f) == 0) { 10011da177e4SLinus Torvalds why = CLD_EXITED; 10021da177e4SLinus Torvalds status = exit_code >> 8; 10031da177e4SLinus Torvalds } else { 10041da177e4SLinus Torvalds why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 10051da177e4SLinus Torvalds status = exit_code & 0x7f; 10061da177e4SLinus Torvalds } 10079e8ae01dSOleg Nesterov return wait_noreap_copyout(wo, p, pid, uid, why, status); 10081da177e4SLinus Torvalds } 1009befca967SOleg Nesterov /* 1010abd50b39SOleg Nesterov * Move the task's state to DEAD/TRACE, only one thread can do this. 1011abd50b39SOleg Nesterov */ 1012f6507f83SOleg Nesterov state = (ptrace_reparented(p) && thread_group_leader(p)) ? 1013f6507f83SOleg Nesterov EXIT_TRACE : EXIT_DEAD; 1014abd50b39SOleg Nesterov if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) 1015abd50b39SOleg Nesterov return 0; 1016986094dfSOleg Nesterov /* 1017986094dfSOleg Nesterov * We own this thread, nobody else can reap it. 1018986094dfSOleg Nesterov */ 1019986094dfSOleg Nesterov read_unlock(&tasklist_lock); 1020986094dfSOleg Nesterov sched_annotate_sleep(); 1021f6507f83SOleg Nesterov 1022abd50b39SOleg Nesterov /* 1023f6507f83SOleg Nesterov * Check thread_group_leader() to exclude the traced sub-threads. 1024befca967SOleg Nesterov */ 1025f6507f83SOleg Nesterov if (state == EXIT_DEAD && thread_group_leader(p)) { 1026f953ccd0SOleg Nesterov struct signal_struct *sig = p->signal; 1027f953ccd0SOleg Nesterov struct signal_struct *psig = current->signal; 10281f10206cSJiri Pirko unsigned long maxrss; 10290cf55e1eSHidetoshi Seto cputime_t tgutime, tgstime; 10303795e161SJesper Juhl 10311da177e4SLinus Torvalds /* 10321da177e4SLinus Torvalds * The resource counters for the group leader are in its 10331da177e4SLinus Torvalds * own task_struct. Those for dead threads in the group 10341da177e4SLinus Torvalds * are in its signal_struct, as are those for the child 10351da177e4SLinus Torvalds * processes it has previously reaped. All these 10361da177e4SLinus Torvalds * accumulate in the parent's signal_struct c* fields. 10371da177e4SLinus Torvalds * 10381da177e4SLinus Torvalds * We don't bother to take a lock here to protect these 1039f953ccd0SOleg Nesterov * p->signal fields because the whole thread group is dead 1040f953ccd0SOleg Nesterov * and nobody can change them. 1041f953ccd0SOleg Nesterov * 1042f953ccd0SOleg Nesterov * psig->stats_lock also protects us from our sub-theads 1043f953ccd0SOleg Nesterov * which can reap other children at the same time. Until 1044f953ccd0SOleg Nesterov * we change k_getrusage()-like users to rely on this lock 1045f953ccd0SOleg Nesterov * we have to take ->siglock as well. 10460cf55e1eSHidetoshi Seto * 1047a0be55deSIonut Alexa * We use thread_group_cputime_adjusted() to get times for 1048a0be55deSIonut Alexa * the thread group, which consolidates times for all threads 1049a0be55deSIonut Alexa * in the group including the group leader. 10501da177e4SLinus Torvalds */ 1051e80d0a1aSFrederic Weisbecker thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1052f953ccd0SOleg Nesterov spin_lock_irq(¤t->sighand->siglock); 1053e78c3496SRik van Riel write_seqlock(&psig->stats_lock); 105464861634SMartin Schwidefsky psig->cutime += tgutime + sig->cutime; 105564861634SMartin Schwidefsky psig->cstime += tgstime + sig->cstime; 10566fac4829SFrederic Weisbecker psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; 10573795e161SJesper Juhl psig->cmin_flt += 10583795e161SJesper Juhl p->min_flt + sig->min_flt + sig->cmin_flt; 10593795e161SJesper Juhl psig->cmaj_flt += 10603795e161SJesper Juhl p->maj_flt + sig->maj_flt + sig->cmaj_flt; 10613795e161SJesper Juhl psig->cnvcsw += 10623795e161SJesper Juhl p->nvcsw + sig->nvcsw + sig->cnvcsw; 10633795e161SJesper Juhl psig->cnivcsw += 10643795e161SJesper Juhl p->nivcsw + sig->nivcsw + sig->cnivcsw; 10656eaeeabaSEric Dumazet psig->cinblock += 10666eaeeabaSEric Dumazet task_io_get_inblock(p) + 10676eaeeabaSEric Dumazet sig->inblock + sig->cinblock; 10686eaeeabaSEric Dumazet psig->coublock += 10696eaeeabaSEric Dumazet task_io_get_oublock(p) + 10706eaeeabaSEric Dumazet sig->oublock + sig->coublock; 10711f10206cSJiri Pirko maxrss = max(sig->maxrss, sig->cmaxrss); 10721f10206cSJiri Pirko if (psig->cmaxrss < maxrss) 10731f10206cSJiri Pirko psig->cmaxrss = maxrss; 10745995477aSAndrea Righi task_io_accounting_add(&psig->ioac, &p->ioac); 10755995477aSAndrea Righi task_io_accounting_add(&psig->ioac, &sig->ioac); 1076e78c3496SRik van Riel write_sequnlock(&psig->stats_lock); 1077f953ccd0SOleg Nesterov spin_unlock_irq(¤t->sighand->siglock); 10781da177e4SLinus Torvalds } 10791da177e4SLinus Torvalds 10809e8ae01dSOleg Nesterov retval = wo->wo_rusage 10819e8ae01dSOleg Nesterov ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 10821da177e4SLinus Torvalds status = (p->signal->flags & SIGNAL_GROUP_EXIT) 10831da177e4SLinus Torvalds ? p->signal->group_exit_code : p->exit_code; 10849e8ae01dSOleg Nesterov if (!retval && wo->wo_stat) 10859e8ae01dSOleg Nesterov retval = put_user(status, wo->wo_stat); 10869e8ae01dSOleg Nesterov 10879e8ae01dSOleg Nesterov infop = wo->wo_info; 10881da177e4SLinus Torvalds if (!retval && infop) 10891da177e4SLinus Torvalds retval = put_user(SIGCHLD, &infop->si_signo); 10901da177e4SLinus Torvalds if (!retval && infop) 10911da177e4SLinus Torvalds retval = put_user(0, &infop->si_errno); 10921da177e4SLinus Torvalds if (!retval && infop) { 10931da177e4SLinus Torvalds int why; 10941da177e4SLinus Torvalds 10951da177e4SLinus Torvalds if ((status & 0x7f) == 0) { 10961da177e4SLinus Torvalds why = CLD_EXITED; 10971da177e4SLinus Torvalds status >>= 8; 10981da177e4SLinus Torvalds } else { 10991da177e4SLinus Torvalds why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; 11001da177e4SLinus Torvalds status &= 0x7f; 11011da177e4SLinus Torvalds } 11021da177e4SLinus Torvalds retval = put_user((short)why, &infop->si_code); 11031da177e4SLinus Torvalds if (!retval) 11041da177e4SLinus Torvalds retval = put_user(status, &infop->si_status); 11051da177e4SLinus Torvalds } 11061da177e4SLinus Torvalds if (!retval && infop) 11073a515e4aSOleg Nesterov retval = put_user(pid, &infop->si_pid); 11081da177e4SLinus Torvalds if (!retval && infop) 1109c69e8d9cSDavid Howells retval = put_user(uid, &infop->si_uid); 11102f4e6e2aSOleg Nesterov if (!retval) 11113a515e4aSOleg Nesterov retval = pid; 11122f4e6e2aSOleg Nesterov 1113b4360690SOleg Nesterov if (state == EXIT_TRACE) { 11141da177e4SLinus Torvalds write_lock_irq(&tasklist_lock); 11152f4e6e2aSOleg Nesterov /* We dropped tasklist, ptracer could die and untrace */ 11162f4e6e2aSOleg Nesterov ptrace_unlink(p); 1117b4360690SOleg Nesterov 1118b4360690SOleg Nesterov /* If parent wants a zombie, don't release it now */ 1119abd50b39SOleg Nesterov state = EXIT_ZOMBIE; 1120b4360690SOleg Nesterov if (do_notify_parent(p, p->exit_signal)) 1121b4360690SOleg Nesterov state = EXIT_DEAD; 1122abd50b39SOleg Nesterov p->exit_state = state; 11231da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 11241da177e4SLinus Torvalds } 1125abd50b39SOleg Nesterov if (state == EXIT_DEAD) 11261da177e4SLinus Torvalds release_task(p); 11272f4e6e2aSOleg Nesterov 11281da177e4SLinus Torvalds return retval; 11291da177e4SLinus Torvalds } 11301da177e4SLinus Torvalds 113190bc8d8bSOleg Nesterov static int *task_stopped_code(struct task_struct *p, bool ptrace) 113290bc8d8bSOleg Nesterov { 113390bc8d8bSOleg Nesterov if (ptrace) { 1134570ac933SOleg Nesterov if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING)) 113590bc8d8bSOleg Nesterov return &p->exit_code; 113690bc8d8bSOleg Nesterov } else { 113790bc8d8bSOleg Nesterov if (p->signal->flags & SIGNAL_STOP_STOPPED) 113890bc8d8bSOleg Nesterov return &p->signal->group_exit_code; 113990bc8d8bSOleg Nesterov } 114090bc8d8bSOleg Nesterov return NULL; 114190bc8d8bSOleg Nesterov } 114290bc8d8bSOleg Nesterov 114319e27463STejun Heo /** 114419e27463STejun Heo * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED 114519e27463STejun Heo * @wo: wait options 114619e27463STejun Heo * @ptrace: is the wait for ptrace 114719e27463STejun Heo * @p: task to wait for 114819e27463STejun Heo * 114919e27463STejun Heo * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. 115019e27463STejun Heo * 115119e27463STejun Heo * CONTEXT: 115219e27463STejun Heo * read_lock(&tasklist_lock), which is released if return value is 115319e27463STejun Heo * non-zero. Also, grabs and releases @p->sighand->siglock. 115419e27463STejun Heo * 115519e27463STejun Heo * RETURNS: 115619e27463STejun Heo * 0 if wait condition didn't exist and search for other wait conditions 115719e27463STejun Heo * should continue. Non-zero return, -errno on failure and @p's pid on 115819e27463STejun Heo * success, implies that tasklist_lock is released and wait condition 115919e27463STejun Heo * search should terminate. 11601da177e4SLinus Torvalds */ 11619e8ae01dSOleg Nesterov static int wait_task_stopped(struct wait_opts *wo, 11629e8ae01dSOleg Nesterov int ptrace, struct task_struct *p) 11631da177e4SLinus Torvalds { 11649e8ae01dSOleg Nesterov struct siginfo __user *infop; 116590bc8d8bSOleg Nesterov int retval, exit_code, *p_code, why; 1166ee7c82daSOleg Nesterov uid_t uid = 0; /* unneeded, required by compiler */ 1167c8950783SOleg Nesterov pid_t pid; 11681da177e4SLinus Torvalds 116947918025SOleg Nesterov /* 117047918025SOleg Nesterov * Traditionally we see ptrace'd stopped tasks regardless of options. 117147918025SOleg Nesterov */ 11729e8ae01dSOleg Nesterov if (!ptrace && !(wo->wo_flags & WUNTRACED)) 117398abed02SRoland McGrath return 0; 117498abed02SRoland McGrath 117519e27463STejun Heo if (!task_stopped_code(p, ptrace)) 117619e27463STejun Heo return 0; 117719e27463STejun Heo 1178ee7c82daSOleg Nesterov exit_code = 0; 1179ee7c82daSOleg Nesterov spin_lock_irq(&p->sighand->siglock); 1180ee7c82daSOleg Nesterov 118190bc8d8bSOleg Nesterov p_code = task_stopped_code(p, ptrace); 118290bc8d8bSOleg Nesterov if (unlikely(!p_code)) 1183ee7c82daSOleg Nesterov goto unlock_sig; 1184ee7c82daSOleg Nesterov 118590bc8d8bSOleg Nesterov exit_code = *p_code; 1186ee7c82daSOleg Nesterov if (!exit_code) 1187ee7c82daSOleg Nesterov goto unlock_sig; 1188ee7c82daSOleg Nesterov 11899e8ae01dSOleg Nesterov if (!unlikely(wo->wo_flags & WNOWAIT)) 119090bc8d8bSOleg Nesterov *p_code = 0; 1191ee7c82daSOleg Nesterov 11928ca937a6SSasha Levin uid = from_kuid_munged(current_user_ns(), task_uid(p)); 1193ee7c82daSOleg Nesterov unlock_sig: 1194ee7c82daSOleg Nesterov spin_unlock_irq(&p->sighand->siglock); 1195ee7c82daSOleg Nesterov if (!exit_code) 11961da177e4SLinus Torvalds return 0; 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds /* 11991da177e4SLinus Torvalds * Now we are pretty sure this task is interesting. 12001da177e4SLinus Torvalds * Make sure it doesn't get reaped out from under us while we 12011da177e4SLinus Torvalds * give up the lock and then examine it below. We don't want to 12021da177e4SLinus Torvalds * keep holding onto the tasklist_lock while we call getrusage and 12031da177e4SLinus Torvalds * possibly take page faults for user memory. 12041da177e4SLinus Torvalds */ 12051da177e4SLinus Torvalds get_task_struct(p); 12066c5f3e7bSPavel Emelyanov pid = task_pid_vnr(p); 1207f470021aSRoland McGrath why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 12081da177e4SLinus Torvalds read_unlock(&tasklist_lock); 12091029a2b5SPeter Zijlstra sched_annotate_sleep(); 12101da177e4SLinus Torvalds 12119e8ae01dSOleg Nesterov if (unlikely(wo->wo_flags & WNOWAIT)) 12129e8ae01dSOleg Nesterov return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); 12131da177e4SLinus Torvalds 12149e8ae01dSOleg Nesterov retval = wo->wo_rusage 12159e8ae01dSOleg Nesterov ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 12169e8ae01dSOleg Nesterov if (!retval && wo->wo_stat) 12179e8ae01dSOleg Nesterov retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); 12189e8ae01dSOleg Nesterov 12199e8ae01dSOleg Nesterov infop = wo->wo_info; 12201da177e4SLinus Torvalds if (!retval && infop) 12211da177e4SLinus Torvalds retval = put_user(SIGCHLD, &infop->si_signo); 12221da177e4SLinus Torvalds if (!retval && infop) 12231da177e4SLinus Torvalds retval = put_user(0, &infop->si_errno); 12241da177e4SLinus Torvalds if (!retval && infop) 12256efcae46SRoland McGrath retval = put_user((short)why, &infop->si_code); 12261da177e4SLinus Torvalds if (!retval && infop) 12271da177e4SLinus Torvalds retval = put_user(exit_code, &infop->si_status); 12281da177e4SLinus Torvalds if (!retval && infop) 1229c8950783SOleg Nesterov retval = put_user(pid, &infop->si_pid); 12301da177e4SLinus Torvalds if (!retval && infop) 1231ee7c82daSOleg Nesterov retval = put_user(uid, &infop->si_uid); 12321da177e4SLinus Torvalds if (!retval) 1233c8950783SOleg Nesterov retval = pid; 12341da177e4SLinus Torvalds put_task_struct(p); 12351da177e4SLinus Torvalds 12361da177e4SLinus Torvalds BUG_ON(!retval); 12371da177e4SLinus Torvalds return retval; 12381da177e4SLinus Torvalds } 12391da177e4SLinus Torvalds 12401da177e4SLinus Torvalds /* 12411da177e4SLinus Torvalds * Handle do_wait work for one task in a live, non-stopped state. 12421da177e4SLinus Torvalds * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 12431da177e4SLinus Torvalds * the lock and this task is uninteresting. If we return nonzero, we have 12441da177e4SLinus Torvalds * released the lock and the system call should return. 12451da177e4SLinus Torvalds */ 12469e8ae01dSOleg Nesterov static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) 12471da177e4SLinus Torvalds { 12481da177e4SLinus Torvalds int retval; 12491da177e4SLinus Torvalds pid_t pid; 12501da177e4SLinus Torvalds uid_t uid; 12511da177e4SLinus Torvalds 12529e8ae01dSOleg Nesterov if (!unlikely(wo->wo_flags & WCONTINUED)) 125398abed02SRoland McGrath return 0; 125498abed02SRoland McGrath 12551da177e4SLinus Torvalds if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 12561da177e4SLinus Torvalds return 0; 12571da177e4SLinus Torvalds 12581da177e4SLinus Torvalds spin_lock_irq(&p->sighand->siglock); 12591da177e4SLinus Torvalds /* Re-check with the lock held. */ 12601da177e4SLinus Torvalds if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { 12611da177e4SLinus Torvalds spin_unlock_irq(&p->sighand->siglock); 12621da177e4SLinus Torvalds return 0; 12631da177e4SLinus Torvalds } 12649e8ae01dSOleg Nesterov if (!unlikely(wo->wo_flags & WNOWAIT)) 12651da177e4SLinus Torvalds p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 12668ca937a6SSasha Levin uid = from_kuid_munged(current_user_ns(), task_uid(p)); 12671da177e4SLinus Torvalds spin_unlock_irq(&p->sighand->siglock); 12681da177e4SLinus Torvalds 12696c5f3e7bSPavel Emelyanov pid = task_pid_vnr(p); 12701da177e4SLinus Torvalds get_task_struct(p); 12711da177e4SLinus Torvalds read_unlock(&tasklist_lock); 12721029a2b5SPeter Zijlstra sched_annotate_sleep(); 12731da177e4SLinus Torvalds 12749e8ae01dSOleg Nesterov if (!wo->wo_info) { 12759e8ae01dSOleg Nesterov retval = wo->wo_rusage 12769e8ae01dSOleg Nesterov ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 12771da177e4SLinus Torvalds put_task_struct(p); 12789e8ae01dSOleg Nesterov if (!retval && wo->wo_stat) 12799e8ae01dSOleg Nesterov retval = put_user(0xffff, wo->wo_stat); 12801da177e4SLinus Torvalds if (!retval) 12813a515e4aSOleg Nesterov retval = pid; 12821da177e4SLinus Torvalds } else { 12839e8ae01dSOleg Nesterov retval = wait_noreap_copyout(wo, p, pid, uid, 12849e8ae01dSOleg Nesterov CLD_CONTINUED, SIGCONT); 12851da177e4SLinus Torvalds BUG_ON(retval == 0); 12861da177e4SLinus Torvalds } 12871da177e4SLinus Torvalds 12881da177e4SLinus Torvalds return retval; 12891da177e4SLinus Torvalds } 12901da177e4SLinus Torvalds 129198abed02SRoland McGrath /* 129298abed02SRoland McGrath * Consider @p for a wait by @parent. 129398abed02SRoland McGrath * 12949e8ae01dSOleg Nesterov * -ECHILD should be in ->notask_error before the first call. 129598abed02SRoland McGrath * Returns nonzero for a final return, when we have unlocked tasklist_lock. 129698abed02SRoland McGrath * Returns zero if the search for a child should continue; 12979e8ae01dSOleg Nesterov * then ->notask_error is 0 if @p is an eligible child, 129814dd0b81SRoland McGrath * or another error from security_task_wait(), or still -ECHILD. 129998abed02SRoland McGrath */ 1300b6e763f0SOleg Nesterov static int wait_consider_task(struct wait_opts *wo, int ptrace, 1301b6e763f0SOleg Nesterov struct task_struct *p) 130298abed02SRoland McGrath { 13033245d6acSOleg Nesterov /* 13043245d6acSOleg Nesterov * We can race with wait_task_zombie() from another thread. 13053245d6acSOleg Nesterov * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition 13063245d6acSOleg Nesterov * can't confuse the checks below. 13073245d6acSOleg Nesterov */ 13083245d6acSOleg Nesterov int exit_state = ACCESS_ONCE(p->exit_state); 1309b3ab0316SOleg Nesterov int ret; 1310b3ab0316SOleg Nesterov 13113245d6acSOleg Nesterov if (unlikely(exit_state == EXIT_DEAD)) 1312b3ab0316SOleg Nesterov return 0; 1313b3ab0316SOleg Nesterov 1314bf959931SOleg Nesterov ret = eligible_child(wo, ptrace, p); 131514dd0b81SRoland McGrath if (!ret) 131698abed02SRoland McGrath return ret; 131798abed02SRoland McGrath 1318a2322e1dSOleg Nesterov ret = security_task_wait(p); 131914dd0b81SRoland McGrath if (unlikely(ret < 0)) { 132014dd0b81SRoland McGrath /* 132114dd0b81SRoland McGrath * If we have not yet seen any eligible child, 132214dd0b81SRoland McGrath * then let this error code replace -ECHILD. 132314dd0b81SRoland McGrath * A permission error will give the user a clue 132414dd0b81SRoland McGrath * to look for security policy problems, rather 132514dd0b81SRoland McGrath * than for mysterious wait bugs. 132614dd0b81SRoland McGrath */ 13279e8ae01dSOleg Nesterov if (wo->notask_error) 13289e8ae01dSOleg Nesterov wo->notask_error = ret; 132978a3d9d5SOleg Nesterov return 0; 133014dd0b81SRoland McGrath } 133114dd0b81SRoland McGrath 13323245d6acSOleg Nesterov if (unlikely(exit_state == EXIT_TRACE)) { 133350b8d257SOleg Nesterov /* 1334abd50b39SOleg Nesterov * ptrace == 0 means we are the natural parent. In this case 1335abd50b39SOleg Nesterov * we should clear notask_error, debugger will notify us. 133650b8d257SOleg Nesterov */ 1337abd50b39SOleg Nesterov if (likely(!ptrace)) 133850b8d257SOleg Nesterov wo->notask_error = 0; 1339823b018eSTejun Heo return 0; 134050b8d257SOleg Nesterov } 1341823b018eSTejun Heo 1342377d75daSOleg Nesterov if (likely(!ptrace) && unlikely(p->ptrace)) { 1343377d75daSOleg Nesterov /* 1344377d75daSOleg Nesterov * If it is traced by its real parent's group, just pretend 1345377d75daSOleg Nesterov * the caller is ptrace_do_wait() and reap this child if it 1346377d75daSOleg Nesterov * is zombie. 1347377d75daSOleg Nesterov * 1348377d75daSOleg Nesterov * This also hides group stop state from real parent; otherwise 1349377d75daSOleg Nesterov * a single stop can be reported twice as group and ptrace stop. 1350377d75daSOleg Nesterov * If a ptracer wants to distinguish these two events for its 1351377d75daSOleg Nesterov * own children it should create a separate process which takes 1352377d75daSOleg Nesterov * the role of real parent. 1353377d75daSOleg Nesterov */ 1354377d75daSOleg Nesterov if (!ptrace_reparented(p)) 1355377d75daSOleg Nesterov ptrace = 1; 1356377d75daSOleg Nesterov } 1357377d75daSOleg Nesterov 135845cb24a1STejun Heo /* slay zombie? */ 13593245d6acSOleg Nesterov if (exit_state == EXIT_ZOMBIE) { 13607c733eb3SOleg Nesterov /* we don't reap group leaders with subthreads */ 13617c733eb3SOleg Nesterov if (!delay_group_leader(p)) { 1362f470021aSRoland McGrath /* 136345cb24a1STejun Heo * A zombie ptracee is only visible to its ptracer. 13647c733eb3SOleg Nesterov * Notification and reaping will be cascaded to the 13657c733eb3SOleg Nesterov * real parent when the ptracer detaches. 1366f470021aSRoland McGrath */ 13677c733eb3SOleg Nesterov if (unlikely(ptrace) || likely(!p->ptrace)) 13689e8ae01dSOleg Nesterov return wait_task_zombie(wo, p); 13697c733eb3SOleg Nesterov } 137098abed02SRoland McGrath 137198abed02SRoland McGrath /* 13729b84cca2STejun Heo * Allow access to stopped/continued state via zombie by 13739b84cca2STejun Heo * falling through. Clearing of notask_error is complex. 13749b84cca2STejun Heo * 13759b84cca2STejun Heo * When !@ptrace: 13769b84cca2STejun Heo * 13779b84cca2STejun Heo * If WEXITED is set, notask_error should naturally be 13789b84cca2STejun Heo * cleared. If not, subset of WSTOPPED|WCONTINUED is set, 13799b84cca2STejun Heo * so, if there are live subthreads, there are events to 13809b84cca2STejun Heo * wait for. If all subthreads are dead, it's still safe 13819b84cca2STejun Heo * to clear - this function will be called again in finite 13829b84cca2STejun Heo * amount time once all the subthreads are released and 13839b84cca2STejun Heo * will then return without clearing. 13849b84cca2STejun Heo * 13859b84cca2STejun Heo * When @ptrace: 13869b84cca2STejun Heo * 13879b84cca2STejun Heo * Stopped state is per-task and thus can't change once the 13889b84cca2STejun Heo * target task dies. Only continued and exited can happen. 13899b84cca2STejun Heo * Clear notask_error if WCONTINUED | WEXITED. 13909b84cca2STejun Heo */ 13919b84cca2STejun Heo if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) 13929b84cca2STejun Heo wo->notask_error = 0; 13939b84cca2STejun Heo } else { 13949b84cca2STejun Heo /* 13959b84cca2STejun Heo * @p is alive and it's gonna stop, continue or exit, so 13969b84cca2STejun Heo * there always is something to wait for. 139798abed02SRoland McGrath */ 13989e8ae01dSOleg Nesterov wo->notask_error = 0; 13999b84cca2STejun Heo } 140098abed02SRoland McGrath 140145cb24a1STejun Heo /* 140245cb24a1STejun Heo * Wait for stopped. Depending on @ptrace, different stopped state 140345cb24a1STejun Heo * is used and the two don't interact with each other. 140445cb24a1STejun Heo */ 140519e27463STejun Heo ret = wait_task_stopped(wo, ptrace, p); 140619e27463STejun Heo if (ret) 140719e27463STejun Heo return ret; 140898abed02SRoland McGrath 140945cb24a1STejun Heo /* 141045cb24a1STejun Heo * Wait for continued. There's only one continued state and the 141145cb24a1STejun Heo * ptracer can consume it which can confuse the real parent. Don't 141245cb24a1STejun Heo * use WCONTINUED from ptracer. You don't need or want it. 141345cb24a1STejun Heo */ 14149e8ae01dSOleg Nesterov return wait_task_continued(wo, p); 141598abed02SRoland McGrath } 141698abed02SRoland McGrath 141798abed02SRoland McGrath /* 141898abed02SRoland McGrath * Do the work of do_wait() for one thread in the group, @tsk. 141998abed02SRoland McGrath * 14209e8ae01dSOleg Nesterov * -ECHILD should be in ->notask_error before the first call. 142198abed02SRoland McGrath * Returns nonzero for a final return, when we have unlocked tasklist_lock. 142298abed02SRoland McGrath * Returns zero if the search for a child should continue; then 14239e8ae01dSOleg Nesterov * ->notask_error is 0 if there were any eligible children, 142414dd0b81SRoland McGrath * or another error from security_task_wait(), or still -ECHILD. 142598abed02SRoland McGrath */ 14269e8ae01dSOleg Nesterov static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) 142798abed02SRoland McGrath { 142898abed02SRoland McGrath struct task_struct *p; 142998abed02SRoland McGrath 143098abed02SRoland McGrath list_for_each_entry(p, &tsk->children, sibling) { 1431b6e763f0SOleg Nesterov int ret = wait_consider_task(wo, 0, p); 1432a0be55deSIonut Alexa 143398abed02SRoland McGrath if (ret) 143498abed02SRoland McGrath return ret; 143598abed02SRoland McGrath } 143698abed02SRoland McGrath 143798abed02SRoland McGrath return 0; 143898abed02SRoland McGrath } 143998abed02SRoland McGrath 14409e8ae01dSOleg Nesterov static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) 144198abed02SRoland McGrath { 144298abed02SRoland McGrath struct task_struct *p; 144398abed02SRoland McGrath 1444f470021aSRoland McGrath list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1445b6e763f0SOleg Nesterov int ret = wait_consider_task(wo, 1, p); 1446a0be55deSIonut Alexa 1447f470021aSRoland McGrath if (ret) 144898abed02SRoland McGrath return ret; 144998abed02SRoland McGrath } 145098abed02SRoland McGrath 145198abed02SRoland McGrath return 0; 145298abed02SRoland McGrath } 145398abed02SRoland McGrath 14540b7570e7SOleg Nesterov static int child_wait_callback(wait_queue_t *wait, unsigned mode, 14550b7570e7SOleg Nesterov int sync, void *key) 14560b7570e7SOleg Nesterov { 14570b7570e7SOleg Nesterov struct wait_opts *wo = container_of(wait, struct wait_opts, 14580b7570e7SOleg Nesterov child_wait); 14590b7570e7SOleg Nesterov struct task_struct *p = key; 14600b7570e7SOleg Nesterov 14615c01ba49SOleg Nesterov if (!eligible_pid(wo, p)) 14620b7570e7SOleg Nesterov return 0; 14630b7570e7SOleg Nesterov 1464b4fe5182SOleg Nesterov if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) 1465b4fe5182SOleg Nesterov return 0; 1466b4fe5182SOleg Nesterov 14670b7570e7SOleg Nesterov return default_wake_function(wait, mode, sync, key); 14680b7570e7SOleg Nesterov } 14690b7570e7SOleg Nesterov 1470a7f0765eSOleg Nesterov void __wake_up_parent(struct task_struct *p, struct task_struct *parent) 1471a7f0765eSOleg Nesterov { 14720b7570e7SOleg Nesterov __wake_up_sync_key(&parent->signal->wait_chldexit, 14730b7570e7SOleg Nesterov TASK_INTERRUPTIBLE, 1, p); 1474a7f0765eSOleg Nesterov } 1475a7f0765eSOleg Nesterov 14769e8ae01dSOleg Nesterov static long do_wait(struct wait_opts *wo) 14771da177e4SLinus Torvalds { 14781da177e4SLinus Torvalds struct task_struct *tsk; 147998abed02SRoland McGrath int retval; 14801da177e4SLinus Torvalds 14819e8ae01dSOleg Nesterov trace_sched_process_wait(wo->wo_pid); 14820a16b607SMathieu Desnoyers 14830b7570e7SOleg Nesterov init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); 14840b7570e7SOleg Nesterov wo->child_wait.private = current; 14850b7570e7SOleg Nesterov add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); 14861da177e4SLinus Torvalds repeat: 148798abed02SRoland McGrath /* 14883da56d16SFrans Klaver * If there is nothing that can match our criteria, just get out. 14899e8ae01dSOleg Nesterov * We will clear ->notask_error to zero if we see any child that 14909e8ae01dSOleg Nesterov * might later match our criteria, even if we are not able to reap 14919e8ae01dSOleg Nesterov * it yet. 149298abed02SRoland McGrath */ 149364a16cafSOleg Nesterov wo->notask_error = -ECHILD; 14949e8ae01dSOleg Nesterov if ((wo->wo_type < PIDTYPE_MAX) && 14959e8ae01dSOleg Nesterov (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) 149664a16cafSOleg Nesterov goto notask; 1497161550d7SEric W. Biederman 1498f95d39d1SOleg Nesterov set_current_state(TASK_INTERRUPTIBLE); 14991da177e4SLinus Torvalds read_lock(&tasklist_lock); 15001da177e4SLinus Torvalds tsk = current; 15011da177e4SLinus Torvalds do { 150264a16cafSOleg Nesterov retval = do_wait_thread(wo, tsk); 150364a16cafSOleg Nesterov if (retval) 15049cbab810SOleg Nesterov goto end; 150564a16cafSOleg Nesterov 150664a16cafSOleg Nesterov retval = ptrace_do_wait(wo, tsk); 150764a16cafSOleg Nesterov if (retval) 150864a16cafSOleg Nesterov goto end; 150998abed02SRoland McGrath 15109e8ae01dSOleg Nesterov if (wo->wo_flags & __WNOTHREAD) 15111da177e4SLinus Torvalds break; 1512a3f6dfb7SOleg Nesterov } while_each_thread(current, tsk); 15131da177e4SLinus Torvalds read_unlock(&tasklist_lock); 1514f2cc3eb1SOleg Nesterov 151564a16cafSOleg Nesterov notask: 15169e8ae01dSOleg Nesterov retval = wo->notask_error; 15179e8ae01dSOleg Nesterov if (!retval && !(wo->wo_flags & WNOHANG)) { 15181da177e4SLinus Torvalds retval = -ERESTARTSYS; 151998abed02SRoland McGrath if (!signal_pending(current)) { 15201da177e4SLinus Torvalds schedule(); 15211da177e4SLinus Torvalds goto repeat; 15221da177e4SLinus Torvalds } 152398abed02SRoland McGrath } 15241da177e4SLinus Torvalds end: 1525f95d39d1SOleg Nesterov __set_current_state(TASK_RUNNING); 15260b7570e7SOleg Nesterov remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); 15271da177e4SLinus Torvalds return retval; 15281da177e4SLinus Torvalds } 15291da177e4SLinus Torvalds 153017da2bd9SHeiko Carstens SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 153117da2bd9SHeiko Carstens infop, int, options, struct rusage __user *, ru) 15321da177e4SLinus Torvalds { 15339e8ae01dSOleg Nesterov struct wait_opts wo; 1534161550d7SEric W. Biederman struct pid *pid = NULL; 1535161550d7SEric W. Biederman enum pid_type type; 15361da177e4SLinus Torvalds long ret; 15371da177e4SLinus Torvalds 1538*91c4e8eaSOleg Nesterov if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED| 1539*91c4e8eaSOleg Nesterov __WNOTHREAD|__WCLONE|__WALL)) 15401da177e4SLinus Torvalds return -EINVAL; 15411da177e4SLinus Torvalds if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) 15421da177e4SLinus Torvalds return -EINVAL; 15431da177e4SLinus Torvalds 15441da177e4SLinus Torvalds switch (which) { 15451da177e4SLinus Torvalds case P_ALL: 1546161550d7SEric W. Biederman type = PIDTYPE_MAX; 15471da177e4SLinus Torvalds break; 15481da177e4SLinus Torvalds case P_PID: 1549161550d7SEric W. Biederman type = PIDTYPE_PID; 1550161550d7SEric W. Biederman if (upid <= 0) 15511da177e4SLinus Torvalds return -EINVAL; 15521da177e4SLinus Torvalds break; 15531da177e4SLinus Torvalds case P_PGID: 1554161550d7SEric W. Biederman type = PIDTYPE_PGID; 1555161550d7SEric W. Biederman if (upid <= 0) 15561da177e4SLinus Torvalds return -EINVAL; 15571da177e4SLinus Torvalds break; 15581da177e4SLinus Torvalds default: 15591da177e4SLinus Torvalds return -EINVAL; 15601da177e4SLinus Torvalds } 15611da177e4SLinus Torvalds 1562161550d7SEric W. Biederman if (type < PIDTYPE_MAX) 1563161550d7SEric W. Biederman pid = find_get_pid(upid); 15649e8ae01dSOleg Nesterov 15659e8ae01dSOleg Nesterov wo.wo_type = type; 15669e8ae01dSOleg Nesterov wo.wo_pid = pid; 15679e8ae01dSOleg Nesterov wo.wo_flags = options; 15689e8ae01dSOleg Nesterov wo.wo_info = infop; 15699e8ae01dSOleg Nesterov wo.wo_stat = NULL; 15709e8ae01dSOleg Nesterov wo.wo_rusage = ru; 15719e8ae01dSOleg Nesterov ret = do_wait(&wo); 1572dfe16dfaSVitaly Mayatskikh 1573dfe16dfaSVitaly Mayatskikh if (ret > 0) { 1574dfe16dfaSVitaly Mayatskikh ret = 0; 1575dfe16dfaSVitaly Mayatskikh } else if (infop) { 1576dfe16dfaSVitaly Mayatskikh /* 1577dfe16dfaSVitaly Mayatskikh * For a WNOHANG return, clear out all the fields 1578dfe16dfaSVitaly Mayatskikh * we would set so the user can easily tell the 1579dfe16dfaSVitaly Mayatskikh * difference. 1580dfe16dfaSVitaly Mayatskikh */ 1581dfe16dfaSVitaly Mayatskikh if (!ret) 1582dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_signo); 1583dfe16dfaSVitaly Mayatskikh if (!ret) 1584dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_errno); 1585dfe16dfaSVitaly Mayatskikh if (!ret) 1586dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_code); 1587dfe16dfaSVitaly Mayatskikh if (!ret) 1588dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_pid); 1589dfe16dfaSVitaly Mayatskikh if (!ret) 1590dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_uid); 1591dfe16dfaSVitaly Mayatskikh if (!ret) 1592dfe16dfaSVitaly Mayatskikh ret = put_user(0, &infop->si_status); 1593dfe16dfaSVitaly Mayatskikh } 1594dfe16dfaSVitaly Mayatskikh 1595161550d7SEric W. Biederman put_pid(pid); 15961da177e4SLinus Torvalds return ret; 15971da177e4SLinus Torvalds } 15981da177e4SLinus Torvalds 1599754fe8d2SHeiko Carstens SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1600754fe8d2SHeiko Carstens int, options, struct rusage __user *, ru) 16011da177e4SLinus Torvalds { 16029e8ae01dSOleg Nesterov struct wait_opts wo; 1603161550d7SEric W. Biederman struct pid *pid = NULL; 1604161550d7SEric W. Biederman enum pid_type type; 16051da177e4SLinus Torvalds long ret; 16061da177e4SLinus Torvalds 16071da177e4SLinus Torvalds if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| 16081da177e4SLinus Torvalds __WNOTHREAD|__WCLONE|__WALL)) 16091da177e4SLinus Torvalds return -EINVAL; 1610161550d7SEric W. Biederman 1611161550d7SEric W. Biederman if (upid == -1) 1612161550d7SEric W. Biederman type = PIDTYPE_MAX; 1613161550d7SEric W. Biederman else if (upid < 0) { 1614161550d7SEric W. Biederman type = PIDTYPE_PGID; 1615161550d7SEric W. Biederman pid = find_get_pid(-upid); 1616161550d7SEric W. Biederman } else if (upid == 0) { 1617161550d7SEric W. Biederman type = PIDTYPE_PGID; 16182ae448efSOleg Nesterov pid = get_task_pid(current, PIDTYPE_PGID); 1619161550d7SEric W. Biederman } else /* upid > 0 */ { 1620161550d7SEric W. Biederman type = PIDTYPE_PID; 1621161550d7SEric W. Biederman pid = find_get_pid(upid); 1622161550d7SEric W. Biederman } 1623161550d7SEric W. Biederman 16249e8ae01dSOleg Nesterov wo.wo_type = type; 16259e8ae01dSOleg Nesterov wo.wo_pid = pid; 16269e8ae01dSOleg Nesterov wo.wo_flags = options | WEXITED; 16279e8ae01dSOleg Nesterov wo.wo_info = NULL; 16289e8ae01dSOleg Nesterov wo.wo_stat = stat_addr; 16299e8ae01dSOleg Nesterov wo.wo_rusage = ru; 16309e8ae01dSOleg Nesterov ret = do_wait(&wo); 1631161550d7SEric W. Biederman put_pid(pid); 16321da177e4SLinus Torvalds 16331da177e4SLinus Torvalds return ret; 16341da177e4SLinus Torvalds } 16351da177e4SLinus Torvalds 16361da177e4SLinus Torvalds #ifdef __ARCH_WANT_SYS_WAITPID 16371da177e4SLinus Torvalds 16381da177e4SLinus Torvalds /* 16391da177e4SLinus Torvalds * sys_waitpid() remains for compatibility. waitpid() should be 16401da177e4SLinus Torvalds * implemented by calling sys_wait4() from libc.a. 16411da177e4SLinus Torvalds */ 164217da2bd9SHeiko Carstens SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) 16431da177e4SLinus Torvalds { 16441da177e4SLinus Torvalds return sys_wait4(pid, stat_addr, options, NULL); 16451da177e4SLinus Torvalds } 16461da177e4SLinus Torvalds 16471da177e4SLinus Torvalds #endif 1648