1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2006 IBM Corporation 4 * 5 * Author: Serge Hallyn <[email protected]> 6 * 7 * Jun 2006 - namespaces support 8 * OpenVZ, SWsoft Inc. 9 * Pavel Emelianov <[email protected]> 10 */ 11 12 #include <linux/slab.h> 13 #include <linux/export.h> 14 #include <linux/nsproxy.h> 15 #include <linux/init_task.h> 16 #include <linux/mnt_namespace.h> 17 #include <linux/utsname.h> 18 #include <linux/pid_namespace.h> 19 #include <net/net_namespace.h> 20 #include <linux/ipc_namespace.h> 21 #include <linux/time_namespace.h> 22 #include <linux/fs_struct.h> 23 #include <linux/proc_ns.h> 24 #include <linux/file.h> 25 #include <linux/syscalls.h> 26 #include <linux/cgroup.h> 27 #include <linux/perf_event.h> 28 29 static struct kmem_cache *nsproxy_cachep; 30 31 struct nsproxy init_nsproxy = { 32 .count = ATOMIC_INIT(1), 33 .uts_ns = &init_uts_ns, 34 #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) 35 .ipc_ns = &init_ipc_ns, 36 #endif 37 .mnt_ns = NULL, 38 .pid_ns_for_children = &init_pid_ns, 39 #ifdef CONFIG_NET 40 .net_ns = &init_net, 41 #endif 42 #ifdef CONFIG_CGROUPS 43 .cgroup_ns = &init_cgroup_ns, 44 #endif 45 #ifdef CONFIG_TIME_NS 46 .time_ns = &init_time_ns, 47 .time_ns_for_children = &init_time_ns, 48 #endif 49 }; 50 51 static inline struct nsproxy *create_nsproxy(void) 52 { 53 struct nsproxy *nsproxy; 54 55 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 56 if (nsproxy) 57 atomic_set(&nsproxy->count, 1); 58 return nsproxy; 59 } 60 61 /* 62 * Create new nsproxy and all of its the associated namespaces. 63 * Return the newly created nsproxy. Do not attach this to the task, 64 * leave it to the caller to do proper locking and attach it to task. 65 */ 66 static struct nsproxy *create_new_namespaces(unsigned long flags, 67 struct task_struct *tsk, struct user_namespace *user_ns, 68 struct fs_struct *new_fs) 69 { 70 struct nsproxy *new_nsp; 71 int err; 72 73 new_nsp = create_nsproxy(); 74 if (!new_nsp) 75 return ERR_PTR(-ENOMEM); 76 77 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); 78 if (IS_ERR(new_nsp->mnt_ns)) { 79 err = PTR_ERR(new_nsp->mnt_ns); 80 goto out_ns; 81 } 82 83 new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns); 84 if (IS_ERR(new_nsp->uts_ns)) { 85 err = PTR_ERR(new_nsp->uts_ns); 86 goto out_uts; 87 } 88 89 new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns); 90 if (IS_ERR(new_nsp->ipc_ns)) { 91 err = PTR_ERR(new_nsp->ipc_ns); 92 goto out_ipc; 93 } 94 95 new_nsp->pid_ns_for_children = 96 copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); 97 if (IS_ERR(new_nsp->pid_ns_for_children)) { 98 err = PTR_ERR(new_nsp->pid_ns_for_children); 99 goto out_pid; 100 } 101 102 new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns, 103 tsk->nsproxy->cgroup_ns); 104 if (IS_ERR(new_nsp->cgroup_ns)) { 105 err = PTR_ERR(new_nsp->cgroup_ns); 106 goto out_cgroup; 107 } 108 109 new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns); 110 if (IS_ERR(new_nsp->net_ns)) { 111 err = PTR_ERR(new_nsp->net_ns); 112 goto out_net; 113 } 114 115 new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns, 116 tsk->nsproxy->time_ns_for_children); 117 if (IS_ERR(new_nsp->time_ns_for_children)) { 118 err = PTR_ERR(new_nsp->time_ns_for_children); 119 goto out_time; 120 } 121 new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns); 122 123 return new_nsp; 124 125 out_time: 126 put_net(new_nsp->net_ns); 127 out_net: 128 put_cgroup_ns(new_nsp->cgroup_ns); 129 out_cgroup: 130 if (new_nsp->pid_ns_for_children) 131 put_pid_ns(new_nsp->pid_ns_for_children); 132 out_pid: 133 if (new_nsp->ipc_ns) 134 put_ipc_ns(new_nsp->ipc_ns); 135 out_ipc: 136 if (new_nsp->uts_ns) 137 put_uts_ns(new_nsp->uts_ns); 138 out_uts: 139 if (new_nsp->mnt_ns) 140 put_mnt_ns(new_nsp->mnt_ns); 141 out_ns: 142 kmem_cache_free(nsproxy_cachep, new_nsp); 143 return ERR_PTR(err); 144 } 145 146 /* 147 * called from clone. This now handles copy for nsproxy and all 148 * namespaces therein. 149 */ 150 int copy_namespaces(unsigned long flags, struct task_struct *tsk) 151 { 152 struct nsproxy *old_ns = tsk->nsproxy; 153 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); 154 struct nsproxy *new_ns; 155 int ret; 156 157 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 158 CLONE_NEWPID | CLONE_NEWNET | 159 CLONE_NEWCGROUP | CLONE_NEWTIME)))) { 160 if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { 161 get_nsproxy(old_ns); 162 return 0; 163 } 164 } else if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 165 return -EPERM; 166 167 /* 168 * CLONE_NEWIPC must detach from the undolist: after switching 169 * to a new ipc namespace, the semaphore arrays from the old 170 * namespace are unreachable. In clone parlance, CLONE_SYSVSEM 171 * means share undolist with parent, so we must forbid using 172 * it along with CLONE_NEWIPC. 173 */ 174 if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) == 175 (CLONE_NEWIPC | CLONE_SYSVSEM)) 176 return -EINVAL; 177 178 new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); 179 if (IS_ERR(new_ns)) 180 return PTR_ERR(new_ns); 181 182 ret = timens_on_fork(new_ns, tsk); 183 if (ret) { 184 free_nsproxy(new_ns); 185 return ret; 186 } 187 188 tsk->nsproxy = new_ns; 189 return 0; 190 } 191 192 void free_nsproxy(struct nsproxy *ns) 193 { 194 if (ns->mnt_ns) 195 put_mnt_ns(ns->mnt_ns); 196 if (ns->uts_ns) 197 put_uts_ns(ns->uts_ns); 198 if (ns->ipc_ns) 199 put_ipc_ns(ns->ipc_ns); 200 if (ns->pid_ns_for_children) 201 put_pid_ns(ns->pid_ns_for_children); 202 if (ns->time_ns) 203 put_time_ns(ns->time_ns); 204 if (ns->time_ns_for_children) 205 put_time_ns(ns->time_ns_for_children); 206 put_cgroup_ns(ns->cgroup_ns); 207 put_net(ns->net_ns); 208 kmem_cache_free(nsproxy_cachep, ns); 209 } 210 211 /* 212 * Called from unshare. Unshare all the namespaces part of nsproxy. 213 * On success, returns the new nsproxy. 214 */ 215 int unshare_nsproxy_namespaces(unsigned long unshare_flags, 216 struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) 217 { 218 struct user_namespace *user_ns; 219 int err = 0; 220 221 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 222 CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | 223 CLONE_NEWTIME))) 224 return 0; 225 226 user_ns = new_cred ? new_cred->user_ns : current_user_ns(); 227 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 228 return -EPERM; 229 230 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, 231 new_fs ? new_fs : current->fs); 232 if (IS_ERR(*new_nsp)) { 233 err = PTR_ERR(*new_nsp); 234 goto out; 235 } 236 237 out: 238 return err; 239 } 240 241 void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) 242 { 243 struct nsproxy *ns; 244 245 might_sleep(); 246 247 task_lock(p); 248 ns = p->nsproxy; 249 p->nsproxy = new; 250 task_unlock(p); 251 252 if (ns && atomic_dec_and_test(&ns->count)) 253 free_nsproxy(ns); 254 } 255 256 void exit_task_namespaces(struct task_struct *p) 257 { 258 switch_task_namespaces(p, NULL); 259 } 260 261 static void put_nsset(struct nsset *nsset) 262 { 263 unsigned flags = nsset->flags; 264 265 if (flags & CLONE_NEWUSER) 266 put_cred(nsset_cred(nsset)); 267 if (nsset->nsproxy) 268 free_nsproxy(nsset->nsproxy); 269 } 270 271 static int prepare_nsset(int nstype, struct nsset *nsset) 272 { 273 struct task_struct *me = current; 274 275 nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs); 276 if (IS_ERR(nsset->nsproxy)) 277 return PTR_ERR(nsset->nsproxy); 278 279 if (nstype == CLONE_NEWUSER) 280 nsset->cred = prepare_creds(); 281 else 282 nsset->cred = current_cred(); 283 if (!nsset->cred) 284 goto out; 285 286 if (nstype == CLONE_NEWNS) 287 nsset->fs = me->fs; 288 289 nsset->flags = nstype; 290 return 0; 291 292 out: 293 put_nsset(nsset); 294 return -ENOMEM; 295 } 296 297 /* 298 * This is the point of no return. There are just a few namespaces 299 * that do some actual work here and it's sufficiently minimal that 300 * a separate ns_common operation seems unnecessary for now. 301 * Unshare is doing the same thing. If we'll end up needing to do 302 * more in a given namespace or a helper here is ultimately not 303 * exported anymore a simple commit handler for each namespace 304 * should be added to ns_common. 305 */ 306 static void commit_nsset(struct nsset *nsset) 307 { 308 unsigned flags = nsset->flags; 309 struct task_struct *me = current; 310 311 #ifdef CONFIG_USER_NS 312 if (flags & CLONE_NEWUSER) { 313 /* transfer ownership */ 314 commit_creds(nsset_cred(nsset)); 315 nsset->cred = NULL; 316 } 317 #endif 318 319 #ifdef CONFIG_IPC_NS 320 if (flags & CLONE_NEWIPC) 321 exit_sem(me); 322 #endif 323 324 /* transfer ownership */ 325 switch_task_namespaces(me, nsset->nsproxy); 326 nsset->nsproxy = NULL; 327 } 328 329 SYSCALL_DEFINE2(setns, int, fd, int, nstype) 330 { 331 struct file *file; 332 struct ns_common *ns; 333 struct nsset nsset = {}; 334 int err; 335 336 file = proc_ns_fget(fd); 337 if (IS_ERR(file)) 338 return PTR_ERR(file); 339 340 err = -EINVAL; 341 ns = get_proc_ns(file_inode(file)); 342 if (nstype && (ns->ops->type != nstype)) 343 goto out; 344 345 err = prepare_nsset(ns->ops->type, &nsset); 346 if (err) 347 goto out; 348 349 err = ns->ops->install(&nsset, ns); 350 if (!err) { 351 commit_nsset(&nsset); 352 perf_event_namespaces(current); 353 } 354 put_nsset(&nsset); 355 out: 356 fput(file); 357 return err; 358 } 359 360 int __init nsproxy_cache_init(void) 361 { 362 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 363 return 0; 364 } 365