xref: /linux-6.15/drivers/base/devtmpfs.c (revision 5e6ded2e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * devtmpfs - kernel-maintained tmpfs-based /dev
4  *
5  * Copyright (C) 2009, Kay Sievers <[email protected]>
6  *
7  * During bootup, before any driver core device is registered,
8  * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
9  * device which requests a device node, will add a node in this
10  * filesystem.
11  * By default, all devices are named after the name of the device,
12  * owned by root and have a default mode of 0600. Subsystems can
13  * overwrite the default setting if needed.
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/syscalls.h>
18 #include <linux/mount.h>
19 #include <linux/device.h>
20 #include <linux/genhd.h>
21 #include <linux/namei.h>
22 #include <linux/fs.h>
23 #include <linux/shmem_fs.h>
24 #include <linux/ramfs.h>
25 #include <linux/sched.h>
26 #include <linux/slab.h>
27 #include <linux/kthread.h>
28 #include <linux/init_syscalls.h>
29 #include <uapi/linux/mount.h>
30 #include "base.h"
31 
32 #ifdef CONFIG_DEVTMPFS_SAFE
33 #define DEVTMPFS_MFLAGS       (MS_SILENT | MS_NOEXEC | MS_NOSUID)
34 #else
35 #define DEVTMPFS_MFLAGS       (MS_SILENT)
36 #endif
37 
38 static struct task_struct *thread;
39 
40 static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT);
41 
42 static DEFINE_SPINLOCK(req_lock);
43 
44 static struct req {
45 	struct req *next;
46 	struct completion done;
47 	int err;
48 	const char *name;
49 	umode_t mode;	/* 0 => delete */
50 	kuid_t uid;
51 	kgid_t gid;
52 	struct device *dev;
53 } *requests;
54 
55 static int __init mount_param(char *str)
56 {
57 	mount_dev = simple_strtoul(str, NULL, 0);
58 	return 1;
59 }
60 __setup("devtmpfs.mount=", mount_param);
61 
62 static struct vfsmount *mnt;
63 
64 static struct dentry *public_dev_mount(struct file_system_type *fs_type, int flags,
65 		      const char *dev_name, void *data)
66 {
67 	struct super_block *s = mnt->mnt_sb;
68 	atomic_inc(&s->s_active);
69 	down_write(&s->s_umount);
70 	return dget(s->s_root);
71 }
72 
73 static struct file_system_type internal_fs_type = {
74 	.name = "devtmpfs",
75 #ifdef CONFIG_TMPFS
76 	.init_fs_context = shmem_init_fs_context,
77 	.parameters	= shmem_fs_parameters,
78 #else
79 	.init_fs_context = ramfs_init_fs_context,
80 	.parameters	= ramfs_fs_parameters,
81 #endif
82 	.kill_sb = kill_litter_super,
83 };
84 
85 static struct file_system_type dev_fs_type = {
86 	.name = "devtmpfs",
87 	.mount = public_dev_mount,
88 };
89 
90 #ifdef CONFIG_BLOCK
91 static inline int is_blockdev(struct device *dev)
92 {
93 	return dev->class == &block_class;
94 }
95 #else
96 static inline int is_blockdev(struct device *dev) { return 0; }
97 #endif
98 
99 static int devtmpfs_submit_req(struct req *req, const char *tmp)
100 {
101 	init_completion(&req->done);
102 
103 	spin_lock(&req_lock);
104 	req->next = requests;
105 	requests = req;
106 	spin_unlock(&req_lock);
107 
108 	wake_up_process(thread);
109 	wait_for_completion(&req->done);
110 
111 	kfree(tmp);
112 
113 	return req->err;
114 }
115 
116 int devtmpfs_create_node(struct device *dev)
117 {
118 	const char *tmp = NULL;
119 	struct req req;
120 
121 	if (!thread)
122 		return 0;
123 
124 	req.mode = 0;
125 	req.uid = GLOBAL_ROOT_UID;
126 	req.gid = GLOBAL_ROOT_GID;
127 	req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp);
128 	if (!req.name)
129 		return -ENOMEM;
130 
131 	if (req.mode == 0)
132 		req.mode = 0600;
133 	if (is_blockdev(dev))
134 		req.mode |= S_IFBLK;
135 	else
136 		req.mode |= S_IFCHR;
137 
138 	req.dev = dev;
139 
140 	return devtmpfs_submit_req(&req, tmp);
141 }
142 
143 int devtmpfs_delete_node(struct device *dev)
144 {
145 	const char *tmp = NULL;
146 	struct req req;
147 
148 	if (!thread)
149 		return 0;
150 
151 	req.name = device_get_devnode(dev, NULL, NULL, NULL, &tmp);
152 	if (!req.name)
153 		return -ENOMEM;
154 
155 	req.mode = 0;
156 	req.dev = dev;
157 
158 	return devtmpfs_submit_req(&req, tmp);
159 }
160 
161 static int dev_mkdir(const char *name, umode_t mode)
162 {
163 	struct dentry *dentry;
164 	struct path path;
165 	int err;
166 
167 	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
168 	if (IS_ERR(dentry))
169 		return PTR_ERR(dentry);
170 
171 	err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode);
172 	if (!err)
173 		/* mark as kernel-created inode */
174 		d_inode(dentry)->i_private = &thread;
175 	done_path_create(&path, dentry);
176 	return err;
177 }
178 
179 static int create_path(const char *nodepath)
180 {
181 	char *path;
182 	char *s;
183 	int err = 0;
184 
185 	/* parent directories do not exist, create them */
186 	path = kstrdup(nodepath, GFP_KERNEL);
187 	if (!path)
188 		return -ENOMEM;
189 
190 	s = path;
191 	for (;;) {
192 		s = strchr(s, '/');
193 		if (!s)
194 			break;
195 		s[0] = '\0';
196 		err = dev_mkdir(path, 0755);
197 		if (err && err != -EEXIST)
198 			break;
199 		s[0] = '/';
200 		s++;
201 	}
202 	kfree(path);
203 	return err;
204 }
205 
206 static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
207 			 kgid_t gid, struct device *dev)
208 {
209 	struct dentry *dentry;
210 	struct path path;
211 	int err;
212 
213 	dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
214 	if (dentry == ERR_PTR(-ENOENT)) {
215 		create_path(nodename);
216 		dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
217 	}
218 	if (IS_ERR(dentry))
219 		return PTR_ERR(dentry);
220 
221 	err = vfs_mknod(&init_user_ns, d_inode(path.dentry), dentry, mode,
222 			dev->devt);
223 	if (!err) {
224 		struct iattr newattrs;
225 
226 		newattrs.ia_mode = mode;
227 		newattrs.ia_uid = uid;
228 		newattrs.ia_gid = gid;
229 		newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
230 		inode_lock(d_inode(dentry));
231 		notify_change(&init_user_ns, dentry, &newattrs, NULL);
232 		inode_unlock(d_inode(dentry));
233 
234 		/* mark as kernel-created inode */
235 		d_inode(dentry)->i_private = &thread;
236 	}
237 	done_path_create(&path, dentry);
238 	return err;
239 }
240 
241 static int dev_rmdir(const char *name)
242 {
243 	struct path parent;
244 	struct dentry *dentry;
245 	int err;
246 
247 	dentry = kern_path_locked(name, &parent);
248 	if (IS_ERR(dentry))
249 		return PTR_ERR(dentry);
250 	if (d_really_is_positive(dentry)) {
251 		if (d_inode(dentry)->i_private == &thread)
252 			err = vfs_rmdir(&init_user_ns, d_inode(parent.dentry),
253 					dentry);
254 		else
255 			err = -EPERM;
256 	} else {
257 		err = -ENOENT;
258 	}
259 	dput(dentry);
260 	inode_unlock(d_inode(parent.dentry));
261 	path_put(&parent);
262 	return err;
263 }
264 
265 static int delete_path(const char *nodepath)
266 {
267 	char *path;
268 	int err = 0;
269 
270 	path = kstrdup(nodepath, GFP_KERNEL);
271 	if (!path)
272 		return -ENOMEM;
273 
274 	for (;;) {
275 		char *base;
276 
277 		base = strrchr(path, '/');
278 		if (!base)
279 			break;
280 		base[0] = '\0';
281 		err = dev_rmdir(path);
282 		if (err)
283 			break;
284 	}
285 
286 	kfree(path);
287 	return err;
288 }
289 
290 static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
291 {
292 	/* did we create it */
293 	if (inode->i_private != &thread)
294 		return 0;
295 
296 	/* does the dev_t match */
297 	if (is_blockdev(dev)) {
298 		if (!S_ISBLK(stat->mode))
299 			return 0;
300 	} else {
301 		if (!S_ISCHR(stat->mode))
302 			return 0;
303 	}
304 	if (stat->rdev != dev->devt)
305 		return 0;
306 
307 	/* ours */
308 	return 1;
309 }
310 
311 static int handle_remove(const char *nodename, struct device *dev)
312 {
313 	struct path parent;
314 	struct dentry *dentry;
315 	int deleted = 0;
316 	int err;
317 
318 	dentry = kern_path_locked(nodename, &parent);
319 	if (IS_ERR(dentry))
320 		return PTR_ERR(dentry);
321 
322 	if (d_really_is_positive(dentry)) {
323 		struct kstat stat;
324 		struct path p = {.mnt = parent.mnt, .dentry = dentry};
325 		err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
326 				  AT_STATX_SYNC_AS_STAT);
327 		if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
328 			struct iattr newattrs;
329 			/*
330 			 * before unlinking this node, reset permissions
331 			 * of possible references like hardlinks
332 			 */
333 			newattrs.ia_uid = GLOBAL_ROOT_UID;
334 			newattrs.ia_gid = GLOBAL_ROOT_GID;
335 			newattrs.ia_mode = stat.mode & ~0777;
336 			newattrs.ia_valid =
337 				ATTR_UID|ATTR_GID|ATTR_MODE;
338 			inode_lock(d_inode(dentry));
339 			notify_change(&init_user_ns, dentry, &newattrs, NULL);
340 			inode_unlock(d_inode(dentry));
341 			err = vfs_unlink(&init_user_ns, d_inode(parent.dentry),
342 					 dentry, NULL);
343 			if (!err || err == -ENOENT)
344 				deleted = 1;
345 		}
346 	} else {
347 		err = -ENOENT;
348 	}
349 	dput(dentry);
350 	inode_unlock(d_inode(parent.dentry));
351 
352 	path_put(&parent);
353 	if (deleted && strchr(nodename, '/'))
354 		delete_path(nodename);
355 	return err;
356 }
357 
358 /*
359  * If configured, or requested by the commandline, devtmpfs will be
360  * auto-mounted after the kernel mounted the root filesystem.
361  */
362 int __init devtmpfs_mount(void)
363 {
364 	int err;
365 
366 	if (!mount_dev)
367 		return 0;
368 
369 	if (!thread)
370 		return 0;
371 
372 	err = init_mount("devtmpfs", "dev", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
373 	if (err)
374 		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
375 	else
376 		printk(KERN_INFO "devtmpfs: mounted\n");
377 	return err;
378 }
379 
380 static __initdata DECLARE_COMPLETION(setup_done);
381 
382 static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid,
383 		  struct device *dev)
384 {
385 	if (mode)
386 		return handle_create(name, mode, uid, gid, dev);
387 	else
388 		return handle_remove(name, dev);
389 }
390 
391 static void __noreturn devtmpfs_work_loop(void)
392 {
393 	while (1) {
394 		spin_lock(&req_lock);
395 		while (requests) {
396 			struct req *req = requests;
397 			requests = NULL;
398 			spin_unlock(&req_lock);
399 			while (req) {
400 				struct req *next = req->next;
401 				req->err = handle(req->name, req->mode,
402 						  req->uid, req->gid, req->dev);
403 				complete(&req->done);
404 				req = next;
405 			}
406 			spin_lock(&req_lock);
407 		}
408 		__set_current_state(TASK_INTERRUPTIBLE);
409 		spin_unlock(&req_lock);
410 		schedule();
411 	}
412 }
413 
414 static noinline int __init devtmpfs_setup(void *p)
415 {
416 	int err;
417 
418 	err = ksys_unshare(CLONE_NEWNS);
419 	if (err)
420 		goto out;
421 	err = init_mount("devtmpfs", "/", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
422 	if (err)
423 		goto out;
424 	init_chdir("/.."); /* will traverse into overmounted root */
425 	init_chroot(".");
426 out:
427 	*(int *)p = err;
428 	return err;
429 }
430 
431 /*
432  * The __ref is because devtmpfs_setup needs to be __init for the routines it
433  * calls.  That call is done while devtmpfs_init, which is marked __init,
434  * synchronously waits for it to complete.
435  */
436 static int __ref devtmpfsd(void *p)
437 {
438 	int err = devtmpfs_setup(p);
439 
440 	complete(&setup_done);
441 	if (err)
442 		return err;
443 	devtmpfs_work_loop();
444 	return 0;
445 }
446 
447 /*
448  * Create devtmpfs instance, driver-core devices will add their device
449  * nodes here.
450  */
451 int __init devtmpfs_init(void)
452 {
453 	char opts[] = "mode=0755";
454 	int err;
455 
456 	mnt = vfs_kern_mount(&internal_fs_type, 0, "devtmpfs", opts);
457 	if (IS_ERR(mnt)) {
458 		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %ld\n",
459 				PTR_ERR(mnt));
460 		return PTR_ERR(mnt);
461 	}
462 	err = register_filesystem(&dev_fs_type);
463 	if (err) {
464 		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
465 		       "type %i\n", err);
466 		return err;
467 	}
468 
469 	thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
470 	if (!IS_ERR(thread)) {
471 		wait_for_completion(&setup_done);
472 	} else {
473 		err = PTR_ERR(thread);
474 		thread = NULL;
475 	}
476 
477 	if (err) {
478 		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
479 		unregister_filesystem(&dev_fs_type);
480 		return err;
481 	}
482 
483 	printk(KERN_INFO "devtmpfs: initialized\n");
484 	return 0;
485 }
486