11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Generic pidhash and scalable, time-bounded PID allocator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * (C) 2002-2003 William Irwin, IBM 51da177e4SLinus Torvalds * (C) 2004 William Irwin, Oracle 61da177e4SLinus Torvalds * (C) 2002-2004 Ingo Molnar, Red Hat 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * pid-structures are backing objects for tasks sharing a given ID to chain 91da177e4SLinus Torvalds * against. There is very little to them aside from hashing them and 101da177e4SLinus Torvalds * parking tasks using given ID's on a list. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * The hash is always changed with the tasklist_lock write-acquired, 131da177e4SLinus Torvalds * and the hash is only accessed with the tasklist_lock at least 141da177e4SLinus Torvalds * read-acquired, so there's no additional SMP locking needed here. 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * We have a list of bitmap pages, which bitmaps represent the PID space. 171da177e4SLinus Torvalds * Allocating and freeing PIDs is completely lockless. The worst-case 181da177e4SLinus Torvalds * allocation scenario when all but one out of 1 million PIDs possible are 191da177e4SLinus Torvalds * allocated already: the scanning of 32 list entries and at most PAGE_SIZE 201da177e4SLinus Torvalds * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). 2130e49c26SPavel Emelyanov * 2230e49c26SPavel Emelyanov * Pid namespaces: 2330e49c26SPavel Emelyanov * (C) 2007 Pavel Emelyanov <[email protected]>, OpenVZ, SWsoft Inc. 2430e49c26SPavel Emelyanov * (C) 2007 Sukadev Bhattiprolu <[email protected]>, IBM 2530e49c26SPavel Emelyanov * Many thanks to Oleg Nesterov for comments and help 2630e49c26SPavel Emelyanov * 271da177e4SLinus Torvalds */ 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds #include <linux/mm.h> 301da177e4SLinus Torvalds #include <linux/module.h> 311da177e4SLinus Torvalds #include <linux/slab.h> 321da177e4SLinus Torvalds #include <linux/init.h> 3382524746SFranck Bui-Huu #include <linux/rculist.h> 341da177e4SLinus Torvalds #include <linux/bootmem.h> 351da177e4SLinus Torvalds #include <linux/hash.h> 3661a58c6cSSukadev Bhattiprolu #include <linux/pid_namespace.h> 37820e45dbSSukadev Bhattiprolu #include <linux/init_task.h> 383eb07c8cSSukadev Bhattiprolu #include <linux/syscalls.h> 391da177e4SLinus Torvalds 408ef047aaSPavel Emelyanov #define pid_hashfn(nr, ns) \ 418ef047aaSPavel Emelyanov hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 4292476d7fSEric W. Biederman static struct hlist_head *pid_hash; 431da177e4SLinus Torvalds static int pidhash_shift; 44820e45dbSSukadev Bhattiprolu struct pid init_struct_pid = INIT_STRUCT_PID; 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds int pid_max = PID_MAX_DEFAULT; 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds #define RESERVED_PIDS 300 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds int pid_max_min = RESERVED_PIDS + 1; 511da177e4SLinus Torvalds int pid_max_max = PID_MAX_LIMIT; 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds #define BITS_PER_PAGE (PAGE_SIZE*8) 541da177e4SLinus Torvalds #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 553fbc9648SSukadev Bhattiprolu 5661a58c6cSSukadev Bhattiprolu static inline int mk_pid(struct pid_namespace *pid_ns, 5761a58c6cSSukadev Bhattiprolu struct pidmap *map, int off) 583fbc9648SSukadev Bhattiprolu { 5961a58c6cSSukadev Bhattiprolu return (map - pid_ns->pidmap)*BITS_PER_PAGE + off; 603fbc9648SSukadev Bhattiprolu } 613fbc9648SSukadev Bhattiprolu 621da177e4SLinus Torvalds #define find_next_offset(map, off) \ 631da177e4SLinus Torvalds find_next_zero_bit((map)->page, BITS_PER_PAGE, off) 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds /* 661da177e4SLinus Torvalds * PID-map pages start out as NULL, they get allocated upon 671da177e4SLinus Torvalds * first use and are never deallocated. This way a low pid_max 681da177e4SLinus Torvalds * value does not cause lots of bitmaps to be allocated, but 691da177e4SLinus Torvalds * the scheme scales to up to 4 million PIDs, runtime. 701da177e4SLinus Torvalds */ 7161a58c6cSSukadev Bhattiprolu struct pid_namespace init_pid_ns = { 729a575a92SCedric Le Goater .kref = { 739a575a92SCedric Le Goater .refcount = ATOMIC_INIT(2), 749a575a92SCedric Le Goater }, 753fbc9648SSukadev Bhattiprolu .pidmap = { 763fbc9648SSukadev Bhattiprolu [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } 773fbc9648SSukadev Bhattiprolu }, 7884d73786SSukadev Bhattiprolu .last_pid = 0, 79faacbfd3SPavel Emelyanov .level = 0, 80faacbfd3SPavel Emelyanov .child_reaper = &init_task, 813fbc9648SSukadev Bhattiprolu }; 82198fe21bSPavel Emelyanov EXPORT_SYMBOL_GPL(init_pid_ns); 831da177e4SLinus Torvalds 84b461cc03SPavel Emelyanov int is_container_init(struct task_struct *tsk) 85b460cbc5SSerge E. Hallyn { 86b461cc03SPavel Emelyanov int ret = 0; 87b461cc03SPavel Emelyanov struct pid *pid; 88b461cc03SPavel Emelyanov 89b461cc03SPavel Emelyanov rcu_read_lock(); 90b461cc03SPavel Emelyanov pid = task_pid(tsk); 91b461cc03SPavel Emelyanov if (pid != NULL && pid->numbers[pid->level].nr == 1) 92b461cc03SPavel Emelyanov ret = 1; 93b461cc03SPavel Emelyanov rcu_read_unlock(); 94b461cc03SPavel Emelyanov 95b461cc03SPavel Emelyanov return ret; 96b460cbc5SSerge E. Hallyn } 97b461cc03SPavel Emelyanov EXPORT_SYMBOL(is_container_init); 98b460cbc5SSerge E. Hallyn 9992476d7fSEric W. Biederman /* 10092476d7fSEric W. Biederman * Note: disable interrupts while the pidmap_lock is held as an 10192476d7fSEric W. Biederman * interrupt might come in and do read_lock(&tasklist_lock). 10292476d7fSEric W. Biederman * 10392476d7fSEric W. Biederman * If we don't disable interrupts there is a nasty deadlock between 10492476d7fSEric W. Biederman * detach_pid()->free_pid() and another cpu that does 10592476d7fSEric W. Biederman * spin_lock(&pidmap_lock) followed by an interrupt routine that does 10692476d7fSEric W. Biederman * read_lock(&tasklist_lock); 10792476d7fSEric W. Biederman * 10892476d7fSEric W. Biederman * After we clean up the tasklist_lock and know there are no 10992476d7fSEric W. Biederman * irq handlers that take it we can leave the interrupts enabled. 11092476d7fSEric W. Biederman * For now it is easier to be safe than to prove it can't happen. 11192476d7fSEric W. Biederman */ 1123fbc9648SSukadev Bhattiprolu 1131da177e4SLinus Torvalds static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); 1141da177e4SLinus Torvalds 115b7127aa4SOleg Nesterov static void free_pidmap(struct upid *upid) 1161da177e4SLinus Torvalds { 117b7127aa4SOleg Nesterov int nr = upid->nr; 118b7127aa4SOleg Nesterov struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE; 119b7127aa4SOleg Nesterov int offset = nr & BITS_PER_PAGE_MASK; 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds clear_bit(offset, map->page); 1221da177e4SLinus Torvalds atomic_inc(&map->nr_free); 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds 12561a58c6cSSukadev Bhattiprolu static int alloc_pidmap(struct pid_namespace *pid_ns) 1261da177e4SLinus Torvalds { 12761a58c6cSSukadev Bhattiprolu int i, offset, max_scan, pid, last = pid_ns->last_pid; 1286a1f3b84SSukadev Bhattiprolu struct pidmap *map; 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds pid = last + 1; 1311da177e4SLinus Torvalds if (pid >= pid_max) 1321da177e4SLinus Torvalds pid = RESERVED_PIDS; 1331da177e4SLinus Torvalds offset = pid & BITS_PER_PAGE_MASK; 13461a58c6cSSukadev Bhattiprolu map = &pid_ns->pidmap[pid/BITS_PER_PAGE]; 1351da177e4SLinus Torvalds max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; 1361da177e4SLinus Torvalds for (i = 0; i <= max_scan; ++i) { 1371da177e4SLinus Torvalds if (unlikely(!map->page)) { 1383fbc9648SSukadev Bhattiprolu void *page = kzalloc(PAGE_SIZE, GFP_KERNEL); 1391da177e4SLinus Torvalds /* 1401da177e4SLinus Torvalds * Free the page if someone raced with us 1411da177e4SLinus Torvalds * installing it: 1421da177e4SLinus Torvalds */ 14392476d7fSEric W. Biederman spin_lock_irq(&pidmap_lock); 1441da177e4SLinus Torvalds if (map->page) 1453fbc9648SSukadev Bhattiprolu kfree(page); 1461da177e4SLinus Torvalds else 1473fbc9648SSukadev Bhattiprolu map->page = page; 14892476d7fSEric W. Biederman spin_unlock_irq(&pidmap_lock); 1491da177e4SLinus Torvalds if (unlikely(!map->page)) 1501da177e4SLinus Torvalds break; 1511da177e4SLinus Torvalds } 1521da177e4SLinus Torvalds if (likely(atomic_read(&map->nr_free))) { 1531da177e4SLinus Torvalds do { 1541da177e4SLinus Torvalds if (!test_and_set_bit(offset, map->page)) { 1551da177e4SLinus Torvalds atomic_dec(&map->nr_free); 15661a58c6cSSukadev Bhattiprolu pid_ns->last_pid = pid; 1571da177e4SLinus Torvalds return pid; 1581da177e4SLinus Torvalds } 1591da177e4SLinus Torvalds offset = find_next_offset(map, offset); 16061a58c6cSSukadev Bhattiprolu pid = mk_pid(pid_ns, map, offset); 1611da177e4SLinus Torvalds /* 1621da177e4SLinus Torvalds * find_next_offset() found a bit, the pid from it 1631da177e4SLinus Torvalds * is in-bounds, and if we fell back to the last 1641da177e4SLinus Torvalds * bitmap block and the final block was the same 1651da177e4SLinus Torvalds * as the starting point, pid is before last_pid. 1661da177e4SLinus Torvalds */ 1671da177e4SLinus Torvalds } while (offset < BITS_PER_PAGE && pid < pid_max && 1681da177e4SLinus Torvalds (i != max_scan || pid < last || 1691da177e4SLinus Torvalds !((last+1) & BITS_PER_PAGE_MASK))); 1701da177e4SLinus Torvalds } 17161a58c6cSSukadev Bhattiprolu if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { 1721da177e4SLinus Torvalds ++map; 1731da177e4SLinus Torvalds offset = 0; 1741da177e4SLinus Torvalds } else { 17561a58c6cSSukadev Bhattiprolu map = &pid_ns->pidmap[0]; 1761da177e4SLinus Torvalds offset = RESERVED_PIDS; 1771da177e4SLinus Torvalds if (unlikely(last == offset)) 1781da177e4SLinus Torvalds break; 1791da177e4SLinus Torvalds } 18061a58c6cSSukadev Bhattiprolu pid = mk_pid(pid_ns, map, offset); 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds return -1; 1831da177e4SLinus Torvalds } 1841da177e4SLinus Torvalds 18574bd59bbSPavel Emelyanov int next_pidmap(struct pid_namespace *pid_ns, int last) 1860804ef4bSEric W. Biederman { 1870804ef4bSEric W. Biederman int offset; 188f40f50d3SEric W. Biederman struct pidmap *map, *end; 1890804ef4bSEric W. Biederman 1900804ef4bSEric W. Biederman offset = (last + 1) & BITS_PER_PAGE_MASK; 19161a58c6cSSukadev Bhattiprolu map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; 19261a58c6cSSukadev Bhattiprolu end = &pid_ns->pidmap[PIDMAP_ENTRIES]; 193f40f50d3SEric W. Biederman for (; map < end; map++, offset = 0) { 1940804ef4bSEric W. Biederman if (unlikely(!map->page)) 1950804ef4bSEric W. Biederman continue; 1960804ef4bSEric W. Biederman offset = find_next_bit((map)->page, BITS_PER_PAGE, offset); 1970804ef4bSEric W. Biederman if (offset < BITS_PER_PAGE) 19861a58c6cSSukadev Bhattiprolu return mk_pid(pid_ns, map, offset); 1990804ef4bSEric W. Biederman } 2000804ef4bSEric W. Biederman return -1; 2010804ef4bSEric W. Biederman } 2020804ef4bSEric W. Biederman 2037ad5b3a5SHarvey Harrison void put_pid(struct pid *pid) 20492476d7fSEric W. Biederman { 205baf8f0f8SPavel Emelianov struct pid_namespace *ns; 206baf8f0f8SPavel Emelianov 20792476d7fSEric W. Biederman if (!pid) 20892476d7fSEric W. Biederman return; 209baf8f0f8SPavel Emelianov 2108ef047aaSPavel Emelyanov ns = pid->numbers[pid->level].ns; 21192476d7fSEric W. Biederman if ((atomic_read(&pid->count) == 1) || 2128ef047aaSPavel Emelyanov atomic_dec_and_test(&pid->count)) { 213baf8f0f8SPavel Emelianov kmem_cache_free(ns->pid_cachep, pid); 2148ef047aaSPavel Emelyanov put_pid_ns(ns); 2158ef047aaSPavel Emelyanov } 21692476d7fSEric W. Biederman } 217bbf73147SEric W. Biederman EXPORT_SYMBOL_GPL(put_pid); 21892476d7fSEric W. Biederman 21992476d7fSEric W. Biederman static void delayed_put_pid(struct rcu_head *rhp) 22092476d7fSEric W. Biederman { 22192476d7fSEric W. Biederman struct pid *pid = container_of(rhp, struct pid, rcu); 22292476d7fSEric W. Biederman put_pid(pid); 22392476d7fSEric W. Biederman } 22492476d7fSEric W. Biederman 2257ad5b3a5SHarvey Harrison void free_pid(struct pid *pid) 22692476d7fSEric W. Biederman { 22792476d7fSEric W. Biederman /* We can be called with write_lock_irq(&tasklist_lock) held */ 2288ef047aaSPavel Emelyanov int i; 22992476d7fSEric W. Biederman unsigned long flags; 23092476d7fSEric W. Biederman 23192476d7fSEric W. Biederman spin_lock_irqsave(&pidmap_lock, flags); 232198fe21bSPavel Emelyanov for (i = 0; i <= pid->level; i++) 233198fe21bSPavel Emelyanov hlist_del_rcu(&pid->numbers[i].pid_chain); 23492476d7fSEric W. Biederman spin_unlock_irqrestore(&pidmap_lock, flags); 23592476d7fSEric W. Biederman 2368ef047aaSPavel Emelyanov for (i = 0; i <= pid->level; i++) 237b7127aa4SOleg Nesterov free_pidmap(pid->numbers + i); 2388ef047aaSPavel Emelyanov 23992476d7fSEric W. Biederman call_rcu(&pid->rcu, delayed_put_pid); 24092476d7fSEric W. Biederman } 24192476d7fSEric W. Biederman 2428ef047aaSPavel Emelyanov struct pid *alloc_pid(struct pid_namespace *ns) 24392476d7fSEric W. Biederman { 24492476d7fSEric W. Biederman struct pid *pid; 24592476d7fSEric W. Biederman enum pid_type type; 2468ef047aaSPavel Emelyanov int i, nr; 2478ef047aaSPavel Emelyanov struct pid_namespace *tmp; 248198fe21bSPavel Emelyanov struct upid *upid; 24992476d7fSEric W. Biederman 250baf8f0f8SPavel Emelianov pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); 25192476d7fSEric W. Biederman if (!pid) 25292476d7fSEric W. Biederman goto out; 25392476d7fSEric W. Biederman 2548ef047aaSPavel Emelyanov tmp = ns; 2558ef047aaSPavel Emelyanov for (i = ns->level; i >= 0; i--) { 2568ef047aaSPavel Emelyanov nr = alloc_pidmap(tmp); 25792476d7fSEric W. Biederman if (nr < 0) 25892476d7fSEric W. Biederman goto out_free; 25992476d7fSEric W. Biederman 2608ef047aaSPavel Emelyanov pid->numbers[i].nr = nr; 2618ef047aaSPavel Emelyanov pid->numbers[i].ns = tmp; 2628ef047aaSPavel Emelyanov tmp = tmp->parent; 2638ef047aaSPavel Emelyanov } 2648ef047aaSPavel Emelyanov 2658ef047aaSPavel Emelyanov get_pid_ns(ns); 2668ef047aaSPavel Emelyanov pid->level = ns->level; 26792476d7fSEric W. Biederman atomic_set(&pid->count, 1); 26892476d7fSEric W. Biederman for (type = 0; type < PIDTYPE_MAX; ++type) 26992476d7fSEric W. Biederman INIT_HLIST_HEAD(&pid->tasks[type]); 27092476d7fSEric W. Biederman 27192476d7fSEric W. Biederman spin_lock_irq(&pidmap_lock); 272198fe21bSPavel Emelyanov for (i = ns->level; i >= 0; i--) { 273198fe21bSPavel Emelyanov upid = &pid->numbers[i]; 274198fe21bSPavel Emelyanov hlist_add_head_rcu(&upid->pid_chain, 275198fe21bSPavel Emelyanov &pid_hash[pid_hashfn(upid->nr, upid->ns)]); 276198fe21bSPavel Emelyanov } 27792476d7fSEric W. Biederman spin_unlock_irq(&pidmap_lock); 27892476d7fSEric W. Biederman 27992476d7fSEric W. Biederman out: 28092476d7fSEric W. Biederman return pid; 28192476d7fSEric W. Biederman 28292476d7fSEric W. Biederman out_free: 283b7127aa4SOleg Nesterov while (++i <= ns->level) 284b7127aa4SOleg Nesterov free_pidmap(pid->numbers + i); 2858ef047aaSPavel Emelyanov 286baf8f0f8SPavel Emelianov kmem_cache_free(ns->pid_cachep, pid); 28792476d7fSEric W. Biederman pid = NULL; 28892476d7fSEric W. Biederman goto out; 28992476d7fSEric W. Biederman } 29092476d7fSEric W. Biederman 2917ad5b3a5SHarvey Harrison struct pid *find_pid_ns(int nr, struct pid_namespace *ns) 2921da177e4SLinus Torvalds { 2931da177e4SLinus Torvalds struct hlist_node *elem; 294198fe21bSPavel Emelyanov struct upid *pnr; 2951da177e4SLinus Torvalds 296198fe21bSPavel Emelyanov hlist_for_each_entry_rcu(pnr, elem, 297198fe21bSPavel Emelyanov &pid_hash[pid_hashfn(nr, ns)], pid_chain) 298198fe21bSPavel Emelyanov if (pnr->nr == nr && pnr->ns == ns) 299198fe21bSPavel Emelyanov return container_of(pnr, struct pid, 300198fe21bSPavel Emelyanov numbers[ns->level]); 301198fe21bSPavel Emelyanov 3021da177e4SLinus Torvalds return NULL; 3031da177e4SLinus Torvalds } 304198fe21bSPavel Emelyanov EXPORT_SYMBOL_GPL(find_pid_ns); 3051da177e4SLinus Torvalds 3068990571eSPavel Emelyanov struct pid *find_vpid(int nr) 3078990571eSPavel Emelyanov { 3088990571eSPavel Emelyanov return find_pid_ns(nr, current->nsproxy->pid_ns); 3098990571eSPavel Emelyanov } 3108990571eSPavel Emelyanov EXPORT_SYMBOL_GPL(find_vpid); 3118990571eSPavel Emelyanov 312e713d0daSSukadev Bhattiprolu /* 313e713d0daSSukadev Bhattiprolu * attach_pid() must be called with the tasklist_lock write-held. 314e713d0daSSukadev Bhattiprolu */ 31524336eaeSOleg Nesterov void attach_pid(struct task_struct *task, enum pid_type type, 316e713d0daSSukadev Bhattiprolu struct pid *pid) 3171da177e4SLinus Torvalds { 31892476d7fSEric W. Biederman struct pid_link *link; 3191da177e4SLinus Torvalds 32092476d7fSEric W. Biederman link = &task->pids[type]; 321e713d0daSSukadev Bhattiprolu link->pid = pid; 32292476d7fSEric W. Biederman hlist_add_head_rcu(&link->node, &pid->tasks[type]); 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds 32524336eaeSOleg Nesterov static void __change_pid(struct task_struct *task, enum pid_type type, 32624336eaeSOleg Nesterov struct pid *new) 3271da177e4SLinus Torvalds { 32892476d7fSEric W. Biederman struct pid_link *link; 32992476d7fSEric W. Biederman struct pid *pid; 33092476d7fSEric W. Biederman int tmp; 3311da177e4SLinus Torvalds 33292476d7fSEric W. Biederman link = &task->pids[type]; 33392476d7fSEric W. Biederman pid = link->pid; 33492476d7fSEric W. Biederman 33592476d7fSEric W. Biederman hlist_del_rcu(&link->node); 33624336eaeSOleg Nesterov link->pid = new; 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds for (tmp = PIDTYPE_MAX; --tmp >= 0; ) 33992476d7fSEric W. Biederman if (!hlist_empty(&pid->tasks[tmp])) 3401da177e4SLinus Torvalds return; 3411da177e4SLinus Torvalds 34292476d7fSEric W. Biederman free_pid(pid); 3431da177e4SLinus Torvalds } 3441da177e4SLinus Torvalds 34524336eaeSOleg Nesterov void detach_pid(struct task_struct *task, enum pid_type type) 34624336eaeSOleg Nesterov { 34724336eaeSOleg Nesterov __change_pid(task, type, NULL); 34824336eaeSOleg Nesterov } 34924336eaeSOleg Nesterov 35024336eaeSOleg Nesterov void change_pid(struct task_struct *task, enum pid_type type, 35124336eaeSOleg Nesterov struct pid *pid) 35224336eaeSOleg Nesterov { 35324336eaeSOleg Nesterov __change_pid(task, type, pid); 35424336eaeSOleg Nesterov attach_pid(task, type, pid); 35524336eaeSOleg Nesterov } 35624336eaeSOleg Nesterov 357c18258c6SEric W. Biederman /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ 3587ad5b3a5SHarvey Harrison void transfer_pid(struct task_struct *old, struct task_struct *new, 359c18258c6SEric W. Biederman enum pid_type type) 360c18258c6SEric W. Biederman { 361c18258c6SEric W. Biederman new->pids[type].pid = old->pids[type].pid; 362c18258c6SEric W. Biederman hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); 363c18258c6SEric W. Biederman } 364c18258c6SEric W. Biederman 3657ad5b3a5SHarvey Harrison struct task_struct *pid_task(struct pid *pid, enum pid_type type) 36692476d7fSEric W. Biederman { 36792476d7fSEric W. Biederman struct task_struct *result = NULL; 36892476d7fSEric W. Biederman if (pid) { 36992476d7fSEric W. Biederman struct hlist_node *first; 37092476d7fSEric W. Biederman first = rcu_dereference(pid->tasks[type].first); 37192476d7fSEric W. Biederman if (first) 37292476d7fSEric W. Biederman result = hlist_entry(first, struct task_struct, pids[(type)].node); 37392476d7fSEric W. Biederman } 37492476d7fSEric W. Biederman return result; 37592476d7fSEric W. Biederman } 376eccba068SPavel Emelyanov EXPORT_SYMBOL(pid_task); 37792476d7fSEric W. Biederman 37892476d7fSEric W. Biederman /* 37992476d7fSEric W. Biederman * Must be called under rcu_read_lock() or with tasklist_lock read-held. 38092476d7fSEric W. Biederman */ 381198fe21bSPavel Emelyanov struct task_struct *find_task_by_pid_type_ns(int type, int nr, 382198fe21bSPavel Emelyanov struct pid_namespace *ns) 3831da177e4SLinus Torvalds { 384198fe21bSPavel Emelyanov return pid_task(find_pid_ns(nr, ns), type); 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds 387198fe21bSPavel Emelyanov EXPORT_SYMBOL(find_task_by_pid_type_ns); 3881da177e4SLinus Torvalds 389228ebcbeSPavel Emelyanov struct task_struct *find_task_by_vpid(pid_t vnr) 390228ebcbeSPavel Emelyanov { 391228ebcbeSPavel Emelyanov return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, 392228ebcbeSPavel Emelyanov current->nsproxy->pid_ns); 393228ebcbeSPavel Emelyanov } 394228ebcbeSPavel Emelyanov EXPORT_SYMBOL(find_task_by_vpid); 395228ebcbeSPavel Emelyanov 396228ebcbeSPavel Emelyanov struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) 397228ebcbeSPavel Emelyanov { 398228ebcbeSPavel Emelyanov return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns); 399228ebcbeSPavel Emelyanov } 400228ebcbeSPavel Emelyanov EXPORT_SYMBOL(find_task_by_pid_ns); 401228ebcbeSPavel Emelyanov 4021a657f78SOleg Nesterov struct pid *get_task_pid(struct task_struct *task, enum pid_type type) 4031a657f78SOleg Nesterov { 4041a657f78SOleg Nesterov struct pid *pid; 4051a657f78SOleg Nesterov rcu_read_lock(); 4062ae448efSOleg Nesterov if (type != PIDTYPE_PID) 4072ae448efSOleg Nesterov task = task->group_leader; 4081a657f78SOleg Nesterov pid = get_pid(task->pids[type].pid); 4091a657f78SOleg Nesterov rcu_read_unlock(); 4101a657f78SOleg Nesterov return pid; 4111a657f78SOleg Nesterov } 4121a657f78SOleg Nesterov 4137ad5b3a5SHarvey Harrison struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) 41492476d7fSEric W. Biederman { 41592476d7fSEric W. Biederman struct task_struct *result; 41692476d7fSEric W. Biederman rcu_read_lock(); 41792476d7fSEric W. Biederman result = pid_task(pid, type); 41892476d7fSEric W. Biederman if (result) 41992476d7fSEric W. Biederman get_task_struct(result); 42092476d7fSEric W. Biederman rcu_read_unlock(); 42192476d7fSEric W. Biederman return result; 42292476d7fSEric W. Biederman } 42392476d7fSEric W. Biederman 42492476d7fSEric W. Biederman struct pid *find_get_pid(pid_t nr) 42592476d7fSEric W. Biederman { 42692476d7fSEric W. Biederman struct pid *pid; 42792476d7fSEric W. Biederman 42892476d7fSEric W. Biederman rcu_read_lock(); 429198fe21bSPavel Emelyanov pid = get_pid(find_vpid(nr)); 43092476d7fSEric W. Biederman rcu_read_unlock(); 43192476d7fSEric W. Biederman 43292476d7fSEric W. Biederman return pid; 43392476d7fSEric W. Biederman } 434339caf2aSDavid Sterba EXPORT_SYMBOL_GPL(find_get_pid); 43592476d7fSEric W. Biederman 4367af57294SPavel Emelyanov pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) 4377af57294SPavel Emelyanov { 4387af57294SPavel Emelyanov struct upid *upid; 4397af57294SPavel Emelyanov pid_t nr = 0; 4407af57294SPavel Emelyanov 4417af57294SPavel Emelyanov if (pid && ns->level <= pid->level) { 4427af57294SPavel Emelyanov upid = &pid->numbers[ns->level]; 4437af57294SPavel Emelyanov if (upid->ns == ns) 4447af57294SPavel Emelyanov nr = upid->nr; 4457af57294SPavel Emelyanov } 4467af57294SPavel Emelyanov return nr; 4477af57294SPavel Emelyanov } 4487af57294SPavel Emelyanov 44944c4e1b2SEric W. Biederman pid_t pid_vnr(struct pid *pid) 45044c4e1b2SEric W. Biederman { 45144c4e1b2SEric W. Biederman return pid_nr_ns(pid, current->nsproxy->pid_ns); 45244c4e1b2SEric W. Biederman } 45344c4e1b2SEric W. Biederman EXPORT_SYMBOL_GPL(pid_vnr); 45444c4e1b2SEric W. Biederman 455*52ee2dfdSOleg Nesterov pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, 456*52ee2dfdSOleg Nesterov struct pid_namespace *ns) 4572f2a3a46SPavel Emelyanov { 458*52ee2dfdSOleg Nesterov pid_t nr = 0; 459*52ee2dfdSOleg Nesterov 460*52ee2dfdSOleg Nesterov rcu_read_lock(); 461*52ee2dfdSOleg Nesterov if (!ns) 462*52ee2dfdSOleg Nesterov ns = current->nsproxy->pid_ns; 463*52ee2dfdSOleg Nesterov if (likely(pid_alive(task))) { 464*52ee2dfdSOleg Nesterov if (type != PIDTYPE_PID) 465*52ee2dfdSOleg Nesterov task = task->group_leader; 466*52ee2dfdSOleg Nesterov nr = pid_nr_ns(task->pids[type].pid, ns); 4672f2a3a46SPavel Emelyanov } 468*52ee2dfdSOleg Nesterov rcu_read_unlock(); 469*52ee2dfdSOleg Nesterov 470*52ee2dfdSOleg Nesterov return nr; 471*52ee2dfdSOleg Nesterov } 472*52ee2dfdSOleg Nesterov EXPORT_SYMBOL(__task_pid_nr_ns); 4732f2a3a46SPavel Emelyanov 4742f2a3a46SPavel Emelyanov pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) 4752f2a3a46SPavel Emelyanov { 4762f2a3a46SPavel Emelyanov return pid_nr_ns(task_tgid(tsk), ns); 4772f2a3a46SPavel Emelyanov } 4782f2a3a46SPavel Emelyanov EXPORT_SYMBOL(task_tgid_nr_ns); 4792f2a3a46SPavel Emelyanov 48061bce0f1SEric W. Biederman struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) 48161bce0f1SEric W. Biederman { 48261bce0f1SEric W. Biederman return ns_of_pid(task_pid(tsk)); 48361bce0f1SEric W. Biederman } 48461bce0f1SEric W. Biederman EXPORT_SYMBOL_GPL(task_active_pid_ns); 48561bce0f1SEric W. Biederman 4861da177e4SLinus Torvalds /* 487025dfdafSFrederik Schwarzer * Used by proc to find the first pid that is greater than or equal to nr. 4880804ef4bSEric W. Biederman * 489e49859e7SPavel Emelyanov * If there is a pid at nr this function is exactly the same as find_pid_ns. 4900804ef4bSEric W. Biederman */ 491198fe21bSPavel Emelyanov struct pid *find_ge_pid(int nr, struct pid_namespace *ns) 4920804ef4bSEric W. Biederman { 4930804ef4bSEric W. Biederman struct pid *pid; 4940804ef4bSEric W. Biederman 4950804ef4bSEric W. Biederman do { 496198fe21bSPavel Emelyanov pid = find_pid_ns(nr, ns); 4970804ef4bSEric W. Biederman if (pid) 4980804ef4bSEric W. Biederman break; 499198fe21bSPavel Emelyanov nr = next_pidmap(ns, nr); 5000804ef4bSEric W. Biederman } while (nr > 0); 5010804ef4bSEric W. Biederman 5020804ef4bSEric W. Biederman return pid; 5030804ef4bSEric W. Biederman } 5040804ef4bSEric W. Biederman 5050804ef4bSEric W. Biederman /* 5061da177e4SLinus Torvalds * The pid hash table is scaled according to the amount of memory in the 5071da177e4SLinus Torvalds * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 5081da177e4SLinus Torvalds * more. 5091da177e4SLinus Torvalds */ 5101da177e4SLinus Torvalds void __init pidhash_init(void) 5111da177e4SLinus Torvalds { 51292476d7fSEric W. Biederman int i, pidhash_size; 5131da177e4SLinus Torvalds unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds pidhash_shift = max(4, fls(megabytes * 4)); 5161da177e4SLinus Torvalds pidhash_shift = min(12, pidhash_shift); 5171da177e4SLinus Torvalds pidhash_size = 1 << pidhash_shift; 5181da177e4SLinus Torvalds 5191da177e4SLinus Torvalds printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", 5201da177e4SLinus Torvalds pidhash_size, pidhash_shift, 52192476d7fSEric W. Biederman pidhash_size * sizeof(struct hlist_head)); 5221da177e4SLinus Torvalds 52392476d7fSEric W. Biederman pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); 52492476d7fSEric W. Biederman if (!pid_hash) 5251da177e4SLinus Torvalds panic("Could not alloc pidhash!\n"); 52692476d7fSEric W. Biederman for (i = 0; i < pidhash_size; i++) 52792476d7fSEric W. Biederman INIT_HLIST_HEAD(&pid_hash[i]); 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds void __init pidmap_init(void) 5311da177e4SLinus Torvalds { 53261a58c6cSSukadev Bhattiprolu init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); 53373b9ebfeSOleg Nesterov /* Reserve PID 0. We never call free_pidmap(0) */ 53461a58c6cSSukadev Bhattiprolu set_bit(0, init_pid_ns.pidmap[0].page); 53561a58c6cSSukadev Bhattiprolu atomic_dec(&init_pid_ns.pidmap[0].nr_free); 53692476d7fSEric W. Biederman 53774bd59bbSPavel Emelyanov init_pid_ns.pid_cachep = KMEM_CACHE(pid, 53874bd59bbSPavel Emelyanov SLAB_HWCACHE_ALIGN | SLAB_PANIC); 5391da177e4SLinus Torvalds } 540