xref: /linux-6.15/kernel/pid.c (revision 52ee2dfd)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Generic pidhash and scalable, time-bounded PID allocator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * (C) 2002-2003 William Irwin, IBM
51da177e4SLinus Torvalds  * (C) 2004 William Irwin, Oracle
61da177e4SLinus Torvalds  * (C) 2002-2004 Ingo Molnar, Red Hat
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * pid-structures are backing objects for tasks sharing a given ID to chain
91da177e4SLinus Torvalds  * against. There is very little to them aside from hashing them and
101da177e4SLinus Torvalds  * parking tasks using given ID's on a list.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * The hash is always changed with the tasklist_lock write-acquired,
131da177e4SLinus Torvalds  * and the hash is only accessed with the tasklist_lock at least
141da177e4SLinus Torvalds  * read-acquired, so there's no additional SMP locking needed here.
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  * We have a list of bitmap pages, which bitmaps represent the PID space.
171da177e4SLinus Torvalds  * Allocating and freeing PIDs is completely lockless. The worst-case
181da177e4SLinus Torvalds  * allocation scenario when all but one out of 1 million PIDs possible are
191da177e4SLinus Torvalds  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
201da177e4SLinus Torvalds  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
2130e49c26SPavel Emelyanov  *
2230e49c26SPavel Emelyanov  * Pid namespaces:
2330e49c26SPavel Emelyanov  *    (C) 2007 Pavel Emelyanov <[email protected]>, OpenVZ, SWsoft Inc.
2430e49c26SPavel Emelyanov  *    (C) 2007 Sukadev Bhattiprolu <[email protected]>, IBM
2530e49c26SPavel Emelyanov  *     Many thanks to Oleg Nesterov for comments and help
2630e49c26SPavel Emelyanov  *
271da177e4SLinus Torvalds  */
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds #include <linux/mm.h>
301da177e4SLinus Torvalds #include <linux/module.h>
311da177e4SLinus Torvalds #include <linux/slab.h>
321da177e4SLinus Torvalds #include <linux/init.h>
3382524746SFranck Bui-Huu #include <linux/rculist.h>
341da177e4SLinus Torvalds #include <linux/bootmem.h>
351da177e4SLinus Torvalds #include <linux/hash.h>
3661a58c6cSSukadev Bhattiprolu #include <linux/pid_namespace.h>
37820e45dbSSukadev Bhattiprolu #include <linux/init_task.h>
383eb07c8cSSukadev Bhattiprolu #include <linux/syscalls.h>
391da177e4SLinus Torvalds 
408ef047aaSPavel Emelyanov #define pid_hashfn(nr, ns)	\
418ef047aaSPavel Emelyanov 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
4292476d7fSEric W. Biederman static struct hlist_head *pid_hash;
431da177e4SLinus Torvalds static int pidhash_shift;
44820e45dbSSukadev Bhattiprolu struct pid init_struct_pid = INIT_STRUCT_PID;
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds int pid_max = PID_MAX_DEFAULT;
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds #define RESERVED_PIDS		300
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds int pid_max_min = RESERVED_PIDS + 1;
511da177e4SLinus Torvalds int pid_max_max = PID_MAX_LIMIT;
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds #define BITS_PER_PAGE		(PAGE_SIZE*8)
541da177e4SLinus Torvalds #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
553fbc9648SSukadev Bhattiprolu 
5661a58c6cSSukadev Bhattiprolu static inline int mk_pid(struct pid_namespace *pid_ns,
5761a58c6cSSukadev Bhattiprolu 		struct pidmap *map, int off)
583fbc9648SSukadev Bhattiprolu {
5961a58c6cSSukadev Bhattiprolu 	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
603fbc9648SSukadev Bhattiprolu }
613fbc9648SSukadev Bhattiprolu 
621da177e4SLinus Torvalds #define find_next_offset(map, off)					\
631da177e4SLinus Torvalds 		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds /*
661da177e4SLinus Torvalds  * PID-map pages start out as NULL, they get allocated upon
671da177e4SLinus Torvalds  * first use and are never deallocated. This way a low pid_max
681da177e4SLinus Torvalds  * value does not cause lots of bitmaps to be allocated, but
691da177e4SLinus Torvalds  * the scheme scales to up to 4 million PIDs, runtime.
701da177e4SLinus Torvalds  */
7161a58c6cSSukadev Bhattiprolu struct pid_namespace init_pid_ns = {
729a575a92SCedric Le Goater 	.kref = {
739a575a92SCedric Le Goater 		.refcount       = ATOMIC_INIT(2),
749a575a92SCedric Le Goater 	},
753fbc9648SSukadev Bhattiprolu 	.pidmap = {
763fbc9648SSukadev Bhattiprolu 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
773fbc9648SSukadev Bhattiprolu 	},
7884d73786SSukadev Bhattiprolu 	.last_pid = 0,
79faacbfd3SPavel Emelyanov 	.level = 0,
80faacbfd3SPavel Emelyanov 	.child_reaper = &init_task,
813fbc9648SSukadev Bhattiprolu };
82198fe21bSPavel Emelyanov EXPORT_SYMBOL_GPL(init_pid_ns);
831da177e4SLinus Torvalds 
84b461cc03SPavel Emelyanov int is_container_init(struct task_struct *tsk)
85b460cbc5SSerge E. Hallyn {
86b461cc03SPavel Emelyanov 	int ret = 0;
87b461cc03SPavel Emelyanov 	struct pid *pid;
88b461cc03SPavel Emelyanov 
89b461cc03SPavel Emelyanov 	rcu_read_lock();
90b461cc03SPavel Emelyanov 	pid = task_pid(tsk);
91b461cc03SPavel Emelyanov 	if (pid != NULL && pid->numbers[pid->level].nr == 1)
92b461cc03SPavel Emelyanov 		ret = 1;
93b461cc03SPavel Emelyanov 	rcu_read_unlock();
94b461cc03SPavel Emelyanov 
95b461cc03SPavel Emelyanov 	return ret;
96b460cbc5SSerge E. Hallyn }
97b461cc03SPavel Emelyanov EXPORT_SYMBOL(is_container_init);
98b460cbc5SSerge E. Hallyn 
9992476d7fSEric W. Biederman /*
10092476d7fSEric W. Biederman  * Note: disable interrupts while the pidmap_lock is held as an
10192476d7fSEric W. Biederman  * interrupt might come in and do read_lock(&tasklist_lock).
10292476d7fSEric W. Biederman  *
10392476d7fSEric W. Biederman  * If we don't disable interrupts there is a nasty deadlock between
10492476d7fSEric W. Biederman  * detach_pid()->free_pid() and another cpu that does
10592476d7fSEric W. Biederman  * spin_lock(&pidmap_lock) followed by an interrupt routine that does
10692476d7fSEric W. Biederman  * read_lock(&tasklist_lock);
10792476d7fSEric W. Biederman  *
10892476d7fSEric W. Biederman  * After we clean up the tasklist_lock and know there are no
10992476d7fSEric W. Biederman  * irq handlers that take it we can leave the interrupts enabled.
11092476d7fSEric W. Biederman  * For now it is easier to be safe than to prove it can't happen.
11192476d7fSEric W. Biederman  */
1123fbc9648SSukadev Bhattiprolu 
1131da177e4SLinus Torvalds static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
1141da177e4SLinus Torvalds 
115b7127aa4SOleg Nesterov static void free_pidmap(struct upid *upid)
1161da177e4SLinus Torvalds {
117b7127aa4SOleg Nesterov 	int nr = upid->nr;
118b7127aa4SOleg Nesterov 	struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
119b7127aa4SOleg Nesterov 	int offset = nr & BITS_PER_PAGE_MASK;
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds 	clear_bit(offset, map->page);
1221da177e4SLinus Torvalds 	atomic_inc(&map->nr_free);
1231da177e4SLinus Torvalds }
1241da177e4SLinus Torvalds 
12561a58c6cSSukadev Bhattiprolu static int alloc_pidmap(struct pid_namespace *pid_ns)
1261da177e4SLinus Torvalds {
12761a58c6cSSukadev Bhattiprolu 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
1286a1f3b84SSukadev Bhattiprolu 	struct pidmap *map;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 	pid = last + 1;
1311da177e4SLinus Torvalds 	if (pid >= pid_max)
1321da177e4SLinus Torvalds 		pid = RESERVED_PIDS;
1331da177e4SLinus Torvalds 	offset = pid & BITS_PER_PAGE_MASK;
13461a58c6cSSukadev Bhattiprolu 	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
1351da177e4SLinus Torvalds 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
1361da177e4SLinus Torvalds 	for (i = 0; i <= max_scan; ++i) {
1371da177e4SLinus Torvalds 		if (unlikely(!map->page)) {
1383fbc9648SSukadev Bhattiprolu 			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1391da177e4SLinus Torvalds 			/*
1401da177e4SLinus Torvalds 			 * Free the page if someone raced with us
1411da177e4SLinus Torvalds 			 * installing it:
1421da177e4SLinus Torvalds 			 */
14392476d7fSEric W. Biederman 			spin_lock_irq(&pidmap_lock);
1441da177e4SLinus Torvalds 			if (map->page)
1453fbc9648SSukadev Bhattiprolu 				kfree(page);
1461da177e4SLinus Torvalds 			else
1473fbc9648SSukadev Bhattiprolu 				map->page = page;
14892476d7fSEric W. Biederman 			spin_unlock_irq(&pidmap_lock);
1491da177e4SLinus Torvalds 			if (unlikely(!map->page))
1501da177e4SLinus Torvalds 				break;
1511da177e4SLinus Torvalds 		}
1521da177e4SLinus Torvalds 		if (likely(atomic_read(&map->nr_free))) {
1531da177e4SLinus Torvalds 			do {
1541da177e4SLinus Torvalds 				if (!test_and_set_bit(offset, map->page)) {
1551da177e4SLinus Torvalds 					atomic_dec(&map->nr_free);
15661a58c6cSSukadev Bhattiprolu 					pid_ns->last_pid = pid;
1571da177e4SLinus Torvalds 					return pid;
1581da177e4SLinus Torvalds 				}
1591da177e4SLinus Torvalds 				offset = find_next_offset(map, offset);
16061a58c6cSSukadev Bhattiprolu 				pid = mk_pid(pid_ns, map, offset);
1611da177e4SLinus Torvalds 			/*
1621da177e4SLinus Torvalds 			 * find_next_offset() found a bit, the pid from it
1631da177e4SLinus Torvalds 			 * is in-bounds, and if we fell back to the last
1641da177e4SLinus Torvalds 			 * bitmap block and the final block was the same
1651da177e4SLinus Torvalds 			 * as the starting point, pid is before last_pid.
1661da177e4SLinus Torvalds 			 */
1671da177e4SLinus Torvalds 			} while (offset < BITS_PER_PAGE && pid < pid_max &&
1681da177e4SLinus Torvalds 					(i != max_scan || pid < last ||
1691da177e4SLinus Torvalds 					    !((last+1) & BITS_PER_PAGE_MASK)));
1701da177e4SLinus Torvalds 		}
17161a58c6cSSukadev Bhattiprolu 		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
1721da177e4SLinus Torvalds 			++map;
1731da177e4SLinus Torvalds 			offset = 0;
1741da177e4SLinus Torvalds 		} else {
17561a58c6cSSukadev Bhattiprolu 			map = &pid_ns->pidmap[0];
1761da177e4SLinus Torvalds 			offset = RESERVED_PIDS;
1771da177e4SLinus Torvalds 			if (unlikely(last == offset))
1781da177e4SLinus Torvalds 				break;
1791da177e4SLinus Torvalds 		}
18061a58c6cSSukadev Bhattiprolu 		pid = mk_pid(pid_ns, map, offset);
1811da177e4SLinus Torvalds 	}
1821da177e4SLinus Torvalds 	return -1;
1831da177e4SLinus Torvalds }
1841da177e4SLinus Torvalds 
18574bd59bbSPavel Emelyanov int next_pidmap(struct pid_namespace *pid_ns, int last)
1860804ef4bSEric W. Biederman {
1870804ef4bSEric W. Biederman 	int offset;
188f40f50d3SEric W. Biederman 	struct pidmap *map, *end;
1890804ef4bSEric W. Biederman 
1900804ef4bSEric W. Biederman 	offset = (last + 1) & BITS_PER_PAGE_MASK;
19161a58c6cSSukadev Bhattiprolu 	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
19261a58c6cSSukadev Bhattiprolu 	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
193f40f50d3SEric W. Biederman 	for (; map < end; map++, offset = 0) {
1940804ef4bSEric W. Biederman 		if (unlikely(!map->page))
1950804ef4bSEric W. Biederman 			continue;
1960804ef4bSEric W. Biederman 		offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
1970804ef4bSEric W. Biederman 		if (offset < BITS_PER_PAGE)
19861a58c6cSSukadev Bhattiprolu 			return mk_pid(pid_ns, map, offset);
1990804ef4bSEric W. Biederman 	}
2000804ef4bSEric W. Biederman 	return -1;
2010804ef4bSEric W. Biederman }
2020804ef4bSEric W. Biederman 
2037ad5b3a5SHarvey Harrison void put_pid(struct pid *pid)
20492476d7fSEric W. Biederman {
205baf8f0f8SPavel Emelianov 	struct pid_namespace *ns;
206baf8f0f8SPavel Emelianov 
20792476d7fSEric W. Biederman 	if (!pid)
20892476d7fSEric W. Biederman 		return;
209baf8f0f8SPavel Emelianov 
2108ef047aaSPavel Emelyanov 	ns = pid->numbers[pid->level].ns;
21192476d7fSEric W. Biederman 	if ((atomic_read(&pid->count) == 1) ||
2128ef047aaSPavel Emelyanov 	     atomic_dec_and_test(&pid->count)) {
213baf8f0f8SPavel Emelianov 		kmem_cache_free(ns->pid_cachep, pid);
2148ef047aaSPavel Emelyanov 		put_pid_ns(ns);
2158ef047aaSPavel Emelyanov 	}
21692476d7fSEric W. Biederman }
217bbf73147SEric W. Biederman EXPORT_SYMBOL_GPL(put_pid);
21892476d7fSEric W. Biederman 
21992476d7fSEric W. Biederman static void delayed_put_pid(struct rcu_head *rhp)
22092476d7fSEric W. Biederman {
22192476d7fSEric W. Biederman 	struct pid *pid = container_of(rhp, struct pid, rcu);
22292476d7fSEric W. Biederman 	put_pid(pid);
22392476d7fSEric W. Biederman }
22492476d7fSEric W. Biederman 
2257ad5b3a5SHarvey Harrison void free_pid(struct pid *pid)
22692476d7fSEric W. Biederman {
22792476d7fSEric W. Biederman 	/* We can be called with write_lock_irq(&tasklist_lock) held */
2288ef047aaSPavel Emelyanov 	int i;
22992476d7fSEric W. Biederman 	unsigned long flags;
23092476d7fSEric W. Biederman 
23192476d7fSEric W. Biederman 	spin_lock_irqsave(&pidmap_lock, flags);
232198fe21bSPavel Emelyanov 	for (i = 0; i <= pid->level; i++)
233198fe21bSPavel Emelyanov 		hlist_del_rcu(&pid->numbers[i].pid_chain);
23492476d7fSEric W. Biederman 	spin_unlock_irqrestore(&pidmap_lock, flags);
23592476d7fSEric W. Biederman 
2368ef047aaSPavel Emelyanov 	for (i = 0; i <= pid->level; i++)
237b7127aa4SOleg Nesterov 		free_pidmap(pid->numbers + i);
2388ef047aaSPavel Emelyanov 
23992476d7fSEric W. Biederman 	call_rcu(&pid->rcu, delayed_put_pid);
24092476d7fSEric W. Biederman }
24192476d7fSEric W. Biederman 
2428ef047aaSPavel Emelyanov struct pid *alloc_pid(struct pid_namespace *ns)
24392476d7fSEric W. Biederman {
24492476d7fSEric W. Biederman 	struct pid *pid;
24592476d7fSEric W. Biederman 	enum pid_type type;
2468ef047aaSPavel Emelyanov 	int i, nr;
2478ef047aaSPavel Emelyanov 	struct pid_namespace *tmp;
248198fe21bSPavel Emelyanov 	struct upid *upid;
24992476d7fSEric W. Biederman 
250baf8f0f8SPavel Emelianov 	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
25192476d7fSEric W. Biederman 	if (!pid)
25292476d7fSEric W. Biederman 		goto out;
25392476d7fSEric W. Biederman 
2548ef047aaSPavel Emelyanov 	tmp = ns;
2558ef047aaSPavel Emelyanov 	for (i = ns->level; i >= 0; i--) {
2568ef047aaSPavel Emelyanov 		nr = alloc_pidmap(tmp);
25792476d7fSEric W. Biederman 		if (nr < 0)
25892476d7fSEric W. Biederman 			goto out_free;
25992476d7fSEric W. Biederman 
2608ef047aaSPavel Emelyanov 		pid->numbers[i].nr = nr;
2618ef047aaSPavel Emelyanov 		pid->numbers[i].ns = tmp;
2628ef047aaSPavel Emelyanov 		tmp = tmp->parent;
2638ef047aaSPavel Emelyanov 	}
2648ef047aaSPavel Emelyanov 
2658ef047aaSPavel Emelyanov 	get_pid_ns(ns);
2668ef047aaSPavel Emelyanov 	pid->level = ns->level;
26792476d7fSEric W. Biederman 	atomic_set(&pid->count, 1);
26892476d7fSEric W. Biederman 	for (type = 0; type < PIDTYPE_MAX; ++type)
26992476d7fSEric W. Biederman 		INIT_HLIST_HEAD(&pid->tasks[type]);
27092476d7fSEric W. Biederman 
27192476d7fSEric W. Biederman 	spin_lock_irq(&pidmap_lock);
272198fe21bSPavel Emelyanov 	for (i = ns->level; i >= 0; i--) {
273198fe21bSPavel Emelyanov 		upid = &pid->numbers[i];
274198fe21bSPavel Emelyanov 		hlist_add_head_rcu(&upid->pid_chain,
275198fe21bSPavel Emelyanov 				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
276198fe21bSPavel Emelyanov 	}
27792476d7fSEric W. Biederman 	spin_unlock_irq(&pidmap_lock);
27892476d7fSEric W. Biederman 
27992476d7fSEric W. Biederman out:
28092476d7fSEric W. Biederman 	return pid;
28192476d7fSEric W. Biederman 
28292476d7fSEric W. Biederman out_free:
283b7127aa4SOleg Nesterov 	while (++i <= ns->level)
284b7127aa4SOleg Nesterov 		free_pidmap(pid->numbers + i);
2858ef047aaSPavel Emelyanov 
286baf8f0f8SPavel Emelianov 	kmem_cache_free(ns->pid_cachep, pid);
28792476d7fSEric W. Biederman 	pid = NULL;
28892476d7fSEric W. Biederman 	goto out;
28992476d7fSEric W. Biederman }
29092476d7fSEric W. Biederman 
2917ad5b3a5SHarvey Harrison struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
2921da177e4SLinus Torvalds {
2931da177e4SLinus Torvalds 	struct hlist_node *elem;
294198fe21bSPavel Emelyanov 	struct upid *pnr;
2951da177e4SLinus Torvalds 
296198fe21bSPavel Emelyanov 	hlist_for_each_entry_rcu(pnr, elem,
297198fe21bSPavel Emelyanov 			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
298198fe21bSPavel Emelyanov 		if (pnr->nr == nr && pnr->ns == ns)
299198fe21bSPavel Emelyanov 			return container_of(pnr, struct pid,
300198fe21bSPavel Emelyanov 					numbers[ns->level]);
301198fe21bSPavel Emelyanov 
3021da177e4SLinus Torvalds 	return NULL;
3031da177e4SLinus Torvalds }
304198fe21bSPavel Emelyanov EXPORT_SYMBOL_GPL(find_pid_ns);
3051da177e4SLinus Torvalds 
3068990571eSPavel Emelyanov struct pid *find_vpid(int nr)
3078990571eSPavel Emelyanov {
3088990571eSPavel Emelyanov 	return find_pid_ns(nr, current->nsproxy->pid_ns);
3098990571eSPavel Emelyanov }
3108990571eSPavel Emelyanov EXPORT_SYMBOL_GPL(find_vpid);
3118990571eSPavel Emelyanov 
312e713d0daSSukadev Bhattiprolu /*
313e713d0daSSukadev Bhattiprolu  * attach_pid() must be called with the tasklist_lock write-held.
314e713d0daSSukadev Bhattiprolu  */
31524336eaeSOleg Nesterov void attach_pid(struct task_struct *task, enum pid_type type,
316e713d0daSSukadev Bhattiprolu 		struct pid *pid)
3171da177e4SLinus Torvalds {
31892476d7fSEric W. Biederman 	struct pid_link *link;
3191da177e4SLinus Torvalds 
32092476d7fSEric W. Biederman 	link = &task->pids[type];
321e713d0daSSukadev Bhattiprolu 	link->pid = pid;
32292476d7fSEric W. Biederman 	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
3231da177e4SLinus Torvalds }
3241da177e4SLinus Torvalds 
32524336eaeSOleg Nesterov static void __change_pid(struct task_struct *task, enum pid_type type,
32624336eaeSOleg Nesterov 			struct pid *new)
3271da177e4SLinus Torvalds {
32892476d7fSEric W. Biederman 	struct pid_link *link;
32992476d7fSEric W. Biederman 	struct pid *pid;
33092476d7fSEric W. Biederman 	int tmp;
3311da177e4SLinus Torvalds 
33292476d7fSEric W. Biederman 	link = &task->pids[type];
33392476d7fSEric W. Biederman 	pid = link->pid;
33492476d7fSEric W. Biederman 
33592476d7fSEric W. Biederman 	hlist_del_rcu(&link->node);
33624336eaeSOleg Nesterov 	link->pid = new;
3371da177e4SLinus Torvalds 
3381da177e4SLinus Torvalds 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
33992476d7fSEric W. Biederman 		if (!hlist_empty(&pid->tasks[tmp]))
3401da177e4SLinus Torvalds 			return;
3411da177e4SLinus Torvalds 
34292476d7fSEric W. Biederman 	free_pid(pid);
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
34524336eaeSOleg Nesterov void detach_pid(struct task_struct *task, enum pid_type type)
34624336eaeSOleg Nesterov {
34724336eaeSOleg Nesterov 	__change_pid(task, type, NULL);
34824336eaeSOleg Nesterov }
34924336eaeSOleg Nesterov 
35024336eaeSOleg Nesterov void change_pid(struct task_struct *task, enum pid_type type,
35124336eaeSOleg Nesterov 		struct pid *pid)
35224336eaeSOleg Nesterov {
35324336eaeSOleg Nesterov 	__change_pid(task, type, pid);
35424336eaeSOleg Nesterov 	attach_pid(task, type, pid);
35524336eaeSOleg Nesterov }
35624336eaeSOleg Nesterov 
357c18258c6SEric W. Biederman /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
3587ad5b3a5SHarvey Harrison void transfer_pid(struct task_struct *old, struct task_struct *new,
359c18258c6SEric W. Biederman 			   enum pid_type type)
360c18258c6SEric W. Biederman {
361c18258c6SEric W. Biederman 	new->pids[type].pid = old->pids[type].pid;
362c18258c6SEric W. Biederman 	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
363c18258c6SEric W. Biederman }
364c18258c6SEric W. Biederman 
3657ad5b3a5SHarvey Harrison struct task_struct *pid_task(struct pid *pid, enum pid_type type)
36692476d7fSEric W. Biederman {
36792476d7fSEric W. Biederman 	struct task_struct *result = NULL;
36892476d7fSEric W. Biederman 	if (pid) {
36992476d7fSEric W. Biederman 		struct hlist_node *first;
37092476d7fSEric W. Biederman 		first = rcu_dereference(pid->tasks[type].first);
37192476d7fSEric W. Biederman 		if (first)
37292476d7fSEric W. Biederman 			result = hlist_entry(first, struct task_struct, pids[(type)].node);
37392476d7fSEric W. Biederman 	}
37492476d7fSEric W. Biederman 	return result;
37592476d7fSEric W. Biederman }
376eccba068SPavel Emelyanov EXPORT_SYMBOL(pid_task);
37792476d7fSEric W. Biederman 
37892476d7fSEric W. Biederman /*
37992476d7fSEric W. Biederman  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
38092476d7fSEric W. Biederman  */
381198fe21bSPavel Emelyanov struct task_struct *find_task_by_pid_type_ns(int type, int nr,
382198fe21bSPavel Emelyanov 		struct pid_namespace *ns)
3831da177e4SLinus Torvalds {
384198fe21bSPavel Emelyanov 	return pid_task(find_pid_ns(nr, ns), type);
3851da177e4SLinus Torvalds }
3861da177e4SLinus Torvalds 
387198fe21bSPavel Emelyanov EXPORT_SYMBOL(find_task_by_pid_type_ns);
3881da177e4SLinus Torvalds 
389228ebcbeSPavel Emelyanov struct task_struct *find_task_by_vpid(pid_t vnr)
390228ebcbeSPavel Emelyanov {
391228ebcbeSPavel Emelyanov 	return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
392228ebcbeSPavel Emelyanov 			current->nsproxy->pid_ns);
393228ebcbeSPavel Emelyanov }
394228ebcbeSPavel Emelyanov EXPORT_SYMBOL(find_task_by_vpid);
395228ebcbeSPavel Emelyanov 
396228ebcbeSPavel Emelyanov struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
397228ebcbeSPavel Emelyanov {
398228ebcbeSPavel Emelyanov 	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
399228ebcbeSPavel Emelyanov }
400228ebcbeSPavel Emelyanov EXPORT_SYMBOL(find_task_by_pid_ns);
401228ebcbeSPavel Emelyanov 
4021a657f78SOleg Nesterov struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
4031a657f78SOleg Nesterov {
4041a657f78SOleg Nesterov 	struct pid *pid;
4051a657f78SOleg Nesterov 	rcu_read_lock();
4062ae448efSOleg Nesterov 	if (type != PIDTYPE_PID)
4072ae448efSOleg Nesterov 		task = task->group_leader;
4081a657f78SOleg Nesterov 	pid = get_pid(task->pids[type].pid);
4091a657f78SOleg Nesterov 	rcu_read_unlock();
4101a657f78SOleg Nesterov 	return pid;
4111a657f78SOleg Nesterov }
4121a657f78SOleg Nesterov 
4137ad5b3a5SHarvey Harrison struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
41492476d7fSEric W. Biederman {
41592476d7fSEric W. Biederman 	struct task_struct *result;
41692476d7fSEric W. Biederman 	rcu_read_lock();
41792476d7fSEric W. Biederman 	result = pid_task(pid, type);
41892476d7fSEric W. Biederman 	if (result)
41992476d7fSEric W. Biederman 		get_task_struct(result);
42092476d7fSEric W. Biederman 	rcu_read_unlock();
42192476d7fSEric W. Biederman 	return result;
42292476d7fSEric W. Biederman }
42392476d7fSEric W. Biederman 
42492476d7fSEric W. Biederman struct pid *find_get_pid(pid_t nr)
42592476d7fSEric W. Biederman {
42692476d7fSEric W. Biederman 	struct pid *pid;
42792476d7fSEric W. Biederman 
42892476d7fSEric W. Biederman 	rcu_read_lock();
429198fe21bSPavel Emelyanov 	pid = get_pid(find_vpid(nr));
43092476d7fSEric W. Biederman 	rcu_read_unlock();
43192476d7fSEric W. Biederman 
43292476d7fSEric W. Biederman 	return pid;
43392476d7fSEric W. Biederman }
434339caf2aSDavid Sterba EXPORT_SYMBOL_GPL(find_get_pid);
43592476d7fSEric W. Biederman 
4367af57294SPavel Emelyanov pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
4377af57294SPavel Emelyanov {
4387af57294SPavel Emelyanov 	struct upid *upid;
4397af57294SPavel Emelyanov 	pid_t nr = 0;
4407af57294SPavel Emelyanov 
4417af57294SPavel Emelyanov 	if (pid && ns->level <= pid->level) {
4427af57294SPavel Emelyanov 		upid = &pid->numbers[ns->level];
4437af57294SPavel Emelyanov 		if (upid->ns == ns)
4447af57294SPavel Emelyanov 			nr = upid->nr;
4457af57294SPavel Emelyanov 	}
4467af57294SPavel Emelyanov 	return nr;
4477af57294SPavel Emelyanov }
4487af57294SPavel Emelyanov 
44944c4e1b2SEric W. Biederman pid_t pid_vnr(struct pid *pid)
45044c4e1b2SEric W. Biederman {
45144c4e1b2SEric W. Biederman 	return pid_nr_ns(pid, current->nsproxy->pid_ns);
45244c4e1b2SEric W. Biederman }
45344c4e1b2SEric W. Biederman EXPORT_SYMBOL_GPL(pid_vnr);
45444c4e1b2SEric W. Biederman 
455*52ee2dfdSOleg Nesterov pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
456*52ee2dfdSOleg Nesterov 			struct pid_namespace *ns)
4572f2a3a46SPavel Emelyanov {
458*52ee2dfdSOleg Nesterov 	pid_t nr = 0;
459*52ee2dfdSOleg Nesterov 
460*52ee2dfdSOleg Nesterov 	rcu_read_lock();
461*52ee2dfdSOleg Nesterov 	if (!ns)
462*52ee2dfdSOleg Nesterov 		ns = current->nsproxy->pid_ns;
463*52ee2dfdSOleg Nesterov 	if (likely(pid_alive(task))) {
464*52ee2dfdSOleg Nesterov 		if (type != PIDTYPE_PID)
465*52ee2dfdSOleg Nesterov 			task = task->group_leader;
466*52ee2dfdSOleg Nesterov 		nr = pid_nr_ns(task->pids[type].pid, ns);
4672f2a3a46SPavel Emelyanov 	}
468*52ee2dfdSOleg Nesterov 	rcu_read_unlock();
469*52ee2dfdSOleg Nesterov 
470*52ee2dfdSOleg Nesterov 	return nr;
471*52ee2dfdSOleg Nesterov }
472*52ee2dfdSOleg Nesterov EXPORT_SYMBOL(__task_pid_nr_ns);
4732f2a3a46SPavel Emelyanov 
4742f2a3a46SPavel Emelyanov pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
4752f2a3a46SPavel Emelyanov {
4762f2a3a46SPavel Emelyanov 	return pid_nr_ns(task_tgid(tsk), ns);
4772f2a3a46SPavel Emelyanov }
4782f2a3a46SPavel Emelyanov EXPORT_SYMBOL(task_tgid_nr_ns);
4792f2a3a46SPavel Emelyanov 
48061bce0f1SEric W. Biederman struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
48161bce0f1SEric W. Biederman {
48261bce0f1SEric W. Biederman 	return ns_of_pid(task_pid(tsk));
48361bce0f1SEric W. Biederman }
48461bce0f1SEric W. Biederman EXPORT_SYMBOL_GPL(task_active_pid_ns);
48561bce0f1SEric W. Biederman 
4861da177e4SLinus Torvalds /*
487025dfdafSFrederik Schwarzer  * Used by proc to find the first pid that is greater than or equal to nr.
4880804ef4bSEric W. Biederman  *
489e49859e7SPavel Emelyanov  * If there is a pid at nr this function is exactly the same as find_pid_ns.
4900804ef4bSEric W. Biederman  */
491198fe21bSPavel Emelyanov struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
4920804ef4bSEric W. Biederman {
4930804ef4bSEric W. Biederman 	struct pid *pid;
4940804ef4bSEric W. Biederman 
4950804ef4bSEric W. Biederman 	do {
496198fe21bSPavel Emelyanov 		pid = find_pid_ns(nr, ns);
4970804ef4bSEric W. Biederman 		if (pid)
4980804ef4bSEric W. Biederman 			break;
499198fe21bSPavel Emelyanov 		nr = next_pidmap(ns, nr);
5000804ef4bSEric W. Biederman 	} while (nr > 0);
5010804ef4bSEric W. Biederman 
5020804ef4bSEric W. Biederman 	return pid;
5030804ef4bSEric W. Biederman }
5040804ef4bSEric W. Biederman 
5050804ef4bSEric W. Biederman /*
5061da177e4SLinus Torvalds  * The pid hash table is scaled according to the amount of memory in the
5071da177e4SLinus Torvalds  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
5081da177e4SLinus Torvalds  * more.
5091da177e4SLinus Torvalds  */
5101da177e4SLinus Torvalds void __init pidhash_init(void)
5111da177e4SLinus Torvalds {
51292476d7fSEric W. Biederman 	int i, pidhash_size;
5131da177e4SLinus Torvalds 	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds 	pidhash_shift = max(4, fls(megabytes * 4));
5161da177e4SLinus Torvalds 	pidhash_shift = min(12, pidhash_shift);
5171da177e4SLinus Torvalds 	pidhash_size = 1 << pidhash_shift;
5181da177e4SLinus Torvalds 
5191da177e4SLinus Torvalds 	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
5201da177e4SLinus Torvalds 		pidhash_size, pidhash_shift,
52192476d7fSEric W. Biederman 		pidhash_size * sizeof(struct hlist_head));
5221da177e4SLinus Torvalds 
52392476d7fSEric W. Biederman 	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
52492476d7fSEric W. Biederman 	if (!pid_hash)
5251da177e4SLinus Torvalds 		panic("Could not alloc pidhash!\n");
52692476d7fSEric W. Biederman 	for (i = 0; i < pidhash_size; i++)
52792476d7fSEric W. Biederman 		INIT_HLIST_HEAD(&pid_hash[i]);
5281da177e4SLinus Torvalds }
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds void __init pidmap_init(void)
5311da177e4SLinus Torvalds {
53261a58c6cSSukadev Bhattiprolu 	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
53373b9ebfeSOleg Nesterov 	/* Reserve PID 0. We never call free_pidmap(0) */
53461a58c6cSSukadev Bhattiprolu 	set_bit(0, init_pid_ns.pidmap[0].page);
53561a58c6cSSukadev Bhattiprolu 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
53692476d7fSEric W. Biederman 
53774bd59bbSPavel Emelyanov 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
53874bd59bbSPavel Emelyanov 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
5391da177e4SLinus Torvalds }
540