1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15 #include <linux/close_range.h>
16 
17 #include "../kselftest_harness.h"
18 #include "../clone3/clone3_selftests.h"
19 
20 
21 #ifndef F_LINUX_SPECIFIC_BASE
22 #define F_LINUX_SPECIFIC_BASE 1024
23 #endif
24 
25 #ifndef F_DUPFD_QUERY
26 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
27 #endif
28 
29 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
30 				  unsigned int flags)
31 {
32 	return syscall(__NR_close_range, fd, max_fd, flags);
33 }
34 
35 TEST(core_close_range)
36 {
37 	int i, ret;
38 	int open_fds[101];
39 
40 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
41 		int fd;
42 
43 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
44 		ASSERT_GE(fd, 0) {
45 			if (errno == ENOENT)
46 				SKIP(return, "Skipping test since /dev/null does not exist");
47 		}
48 
49 		open_fds[i] = fd;
50 	}
51 
52 	EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
53 		if (errno == ENOSYS)
54 			SKIP(return, "close_range() syscall not supported");
55 	}
56 
57 	for (i = 0; i < 100; i++) {
58 		ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
59 		if (ret < 0) {
60 			EXPECT_EQ(errno, EINVAL);
61 		} else {
62 			EXPECT_EQ(ret, 0);
63 		}
64 	}
65 
66 	EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
67 
68 	for (i = 0; i <= 50; i++)
69 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
70 
71 	for (i = 51; i <= 100; i++)
72 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
73 
74 	/* create a couple of gaps */
75 	close(57);
76 	close(78);
77 	close(81);
78 	close(82);
79 	close(84);
80 	close(90);
81 
82 	EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
83 
84 	for (i = 51; i <= 92; i++)
85 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
86 
87 	for (i = 93; i <= 100; i++)
88 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
89 
90 	/* test that the kernel caps and still closes all fds */
91 	EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
92 
93 	for (i = 93; i <= 99; i++)
94 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
95 
96 	EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
97 
98 	EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
99 
100 	EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
101 }
102 
103 TEST(close_range_unshare)
104 {
105 	int i, ret, status;
106 	pid_t pid;
107 	int open_fds[101];
108 	struct __clone_args args = {
109 		.flags = CLONE_FILES,
110 		.exit_signal = SIGCHLD,
111 	};
112 
113 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
114 		int fd;
115 
116 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
117 		ASSERT_GE(fd, 0) {
118 			if (errno == ENOENT)
119 				SKIP(return, "Skipping test since /dev/null does not exist");
120 		}
121 
122 		open_fds[i] = fd;
123 	}
124 
125 	pid = sys_clone3(&args, sizeof(args));
126 	ASSERT_GE(pid, 0);
127 
128 	if (pid == 0) {
129 		ret = sys_close_range(open_fds[0], open_fds[50],
130 				      CLOSE_RANGE_UNSHARE);
131 		if (ret)
132 			exit(EXIT_FAILURE);
133 
134 		for (i = 0; i <= 50; i++)
135 			if (fcntl(open_fds[i], F_GETFL) != -1)
136 				exit(EXIT_FAILURE);
137 
138 		for (i = 51; i <= 100; i++)
139 			if (fcntl(open_fds[i], F_GETFL) == -1)
140 				exit(EXIT_FAILURE);
141 
142 		/* create a couple of gaps */
143 		close(57);
144 		close(78);
145 		close(81);
146 		close(82);
147 		close(84);
148 		close(90);
149 
150 		ret = sys_close_range(open_fds[51], open_fds[92],
151 				      CLOSE_RANGE_UNSHARE);
152 		if (ret)
153 			exit(EXIT_FAILURE);
154 
155 		for (i = 51; i <= 92; i++)
156 			if (fcntl(open_fds[i], F_GETFL) != -1)
157 				exit(EXIT_FAILURE);
158 
159 		for (i = 93; i <= 100; i++)
160 			if (fcntl(open_fds[i], F_GETFL) == -1)
161 				exit(EXIT_FAILURE);
162 
163 		/* test that the kernel caps and still closes all fds */
164 		ret = sys_close_range(open_fds[93], open_fds[99],
165 				      CLOSE_RANGE_UNSHARE);
166 		if (ret)
167 			exit(EXIT_FAILURE);
168 
169 		for (i = 93; i <= 99; i++)
170 			if (fcntl(open_fds[i], F_GETFL) != -1)
171 				exit(EXIT_FAILURE);
172 
173 		if (fcntl(open_fds[100], F_GETFL) == -1)
174 			exit(EXIT_FAILURE);
175 
176 		ret = sys_close_range(open_fds[100], open_fds[100],
177 				      CLOSE_RANGE_UNSHARE);
178 		if (ret)
179 			exit(EXIT_FAILURE);
180 
181 		if (fcntl(open_fds[100], F_GETFL) != -1)
182 			exit(EXIT_FAILURE);
183 
184 		exit(EXIT_SUCCESS);
185 	}
186 
187 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
188 	EXPECT_EQ(true, WIFEXITED(status));
189 	EXPECT_EQ(0, WEXITSTATUS(status));
190 }
191 
192 TEST(close_range_unshare_capped)
193 {
194 	int i, ret, status;
195 	pid_t pid;
196 	int open_fds[101];
197 	struct __clone_args args = {
198 		.flags = CLONE_FILES,
199 		.exit_signal = SIGCHLD,
200 	};
201 
202 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
203 		int fd;
204 
205 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
206 		ASSERT_GE(fd, 0) {
207 			if (errno == ENOENT)
208 				SKIP(return, "Skipping test since /dev/null does not exist");
209 		}
210 
211 		open_fds[i] = fd;
212 	}
213 
214 	pid = sys_clone3(&args, sizeof(args));
215 	ASSERT_GE(pid, 0);
216 
217 	if (pid == 0) {
218 		ret = sys_close_range(open_fds[0], UINT_MAX,
219 				      CLOSE_RANGE_UNSHARE);
220 		if (ret)
221 			exit(EXIT_FAILURE);
222 
223 		for (i = 0; i <= 100; i++)
224 			if (fcntl(open_fds[i], F_GETFL) != -1)
225 				exit(EXIT_FAILURE);
226 
227 		exit(EXIT_SUCCESS);
228 	}
229 
230 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
231 	EXPECT_EQ(true, WIFEXITED(status));
232 	EXPECT_EQ(0, WEXITSTATUS(status));
233 }
234 
235 TEST(close_range_cloexec)
236 {
237 	int i, ret;
238 	int open_fds[101];
239 	struct rlimit rlimit;
240 
241 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
242 		int fd;
243 
244 		fd = open("/dev/null", O_RDONLY);
245 		ASSERT_GE(fd, 0) {
246 			if (errno == ENOENT)
247 				SKIP(return, "Skipping test since /dev/null does not exist");
248 		}
249 
250 		open_fds[i] = fd;
251 	}
252 
253 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
254 	if (ret < 0) {
255 		if (errno == ENOSYS)
256 			SKIP(return, "close_range() syscall not supported");
257 		if (errno == EINVAL)
258 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
259 	}
260 
261 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
262 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
263 	rlimit.rlim_cur = 25;
264 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
265 
266 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
267 	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
268 	ASSERT_EQ(0, ret);
269 	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
270 	ASSERT_EQ(0, ret);
271 
272 	for (i = 0; i <= 50; i++) {
273 		int flags = fcntl(open_fds[i], F_GETFD);
274 
275 		EXPECT_GT(flags, -1);
276 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
277 	}
278 
279 	for (i = 51; i <= 74; i++) {
280 		int flags = fcntl(open_fds[i], F_GETFD);
281 
282 		EXPECT_GT(flags, -1);
283 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
284 	}
285 
286 	for (i = 75; i <= 100; i++) {
287 		int flags = fcntl(open_fds[i], F_GETFD);
288 
289 		EXPECT_GT(flags, -1);
290 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
291 	}
292 
293 	/* Test a common pattern.  */
294 	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
295 	for (i = 0; i <= 100; i++) {
296 		int flags = fcntl(open_fds[i], F_GETFD);
297 
298 		EXPECT_GT(flags, -1);
299 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
300 	}
301 }
302 
303 TEST(close_range_cloexec_unshare)
304 {
305 	int i, ret;
306 	int open_fds[101];
307 	struct rlimit rlimit;
308 
309 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
310 		int fd;
311 
312 		fd = open("/dev/null", O_RDONLY);
313 		ASSERT_GE(fd, 0) {
314 			if (errno == ENOENT)
315 				SKIP(return, "Skipping test since /dev/null does not exist");
316 		}
317 
318 		open_fds[i] = fd;
319 	}
320 
321 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
322 	if (ret < 0) {
323 		if (errno == ENOSYS)
324 			SKIP(return, "close_range() syscall not supported");
325 		if (errno == EINVAL)
326 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
327 	}
328 
329 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
330 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
331 	rlimit.rlim_cur = 25;
332 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
333 
334 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
335 	ret = sys_close_range(open_fds[0], open_fds[50],
336 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
337 	ASSERT_EQ(0, ret);
338 	ret = sys_close_range(open_fds[75], open_fds[100],
339 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
340 	ASSERT_EQ(0, ret);
341 
342 	for (i = 0; i <= 50; i++) {
343 		int flags = fcntl(open_fds[i], F_GETFD);
344 
345 		EXPECT_GT(flags, -1);
346 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
347 	}
348 
349 	for (i = 51; i <= 74; i++) {
350 		int flags = fcntl(open_fds[i], F_GETFD);
351 
352 		EXPECT_GT(flags, -1);
353 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
354 	}
355 
356 	for (i = 75; i <= 100; i++) {
357 		int flags = fcntl(open_fds[i], F_GETFD);
358 
359 		EXPECT_GT(flags, -1);
360 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
361 	}
362 
363 	/* Test a common pattern.  */
364 	ret = sys_close_range(3, UINT_MAX,
365 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
366 	for (i = 0; i <= 100; i++) {
367 		int flags = fcntl(open_fds[i], F_GETFD);
368 
369 		EXPECT_GT(flags, -1);
370 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
371 	}
372 }
373 
374 /*
375  * Regression test for [email protected]
376  */
377 TEST(close_range_cloexec_syzbot)
378 {
379 	int fd1, fd2, fd3, fd4, flags, ret, status;
380 	pid_t pid;
381 	struct __clone_args args = {
382 		.flags = CLONE_FILES,
383 		.exit_signal = SIGCHLD,
384 	};
385 
386 	/* Create a huge gap in the fd table. */
387 	fd1 = open("/dev/null", O_RDWR);
388 	EXPECT_GT(fd1, 0);
389 
390 	fd2 = dup2(fd1, 1000);
391 	EXPECT_GT(fd2, 0);
392 
393 	flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
394 	if (flags < 0) {
395 		EXPECT_EQ(errno, EINVAL);
396 	} else {
397 		EXPECT_EQ(flags, 1);
398 	}
399 
400 	pid = sys_clone3(&args, sizeof(args));
401 	ASSERT_GE(pid, 0);
402 
403 	if (pid == 0) {
404 		ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
405 		if (ret)
406 			exit(EXIT_FAILURE);
407 
408 		/*
409 			 * We now have a private file descriptor table and all
410 			 * our open fds should still be open but made
411 			 * close-on-exec.
412 			 */
413 		flags = fcntl(fd1, F_GETFD);
414 		EXPECT_GT(flags, -1);
415 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
416 
417 		flags = fcntl(fd2, F_GETFD);
418 		EXPECT_GT(flags, -1);
419 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
420 
421 		fd3 = dup2(fd1, 42);
422 		EXPECT_GT(fd3, 0);
423 
424 		flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
425 		if (flags < 0) {
426 			EXPECT_EQ(errno, EINVAL);
427 		} else {
428 			EXPECT_EQ(flags, 1);
429 		}
430 
431 
432 
433 		/*
434 			 * Duplicating the file descriptor must remove the
435 			 * FD_CLOEXEC flag.
436 			 */
437 		flags = fcntl(fd3, F_GETFD);
438 		EXPECT_GT(flags, -1);
439 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
440 
441 		exit(EXIT_SUCCESS);
442 	}
443 
444 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
445 	EXPECT_EQ(true, WIFEXITED(status));
446 	EXPECT_EQ(0, WEXITSTATUS(status));
447 
448 	/*
449 	 * We had a shared file descriptor table before along with requesting
450 	 * close-on-exec so the original fds must not be close-on-exec.
451 	 */
452 	flags = fcntl(fd1, F_GETFD);
453 	EXPECT_GT(flags, -1);
454 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
455 
456 	flags = fcntl(fd2, F_GETFD);
457 	EXPECT_GT(flags, -1);
458 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
459 
460 	fd3 = dup2(fd1, 42);
461 	EXPECT_GT(fd3, 0);
462 
463 	flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
464 	if (flags < 0) {
465 		EXPECT_EQ(errno, EINVAL);
466 	} else {
467 		EXPECT_EQ(flags, 1);
468 	}
469 
470 	fd4 = open("/dev/null", O_RDWR);
471 	EXPECT_GT(fd4, 0);
472 
473 	/* Same inode, different file pointers. */
474 	flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
475 	if (flags < 0) {
476 		EXPECT_EQ(errno, EINVAL);
477 	} else {
478 		EXPECT_EQ(flags, 0);
479 	}
480 
481 	flags = fcntl(fd3, F_GETFD);
482 	EXPECT_GT(flags, -1);
483 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
484 
485 	EXPECT_EQ(close(fd1), 0);
486 	EXPECT_EQ(close(fd2), 0);
487 	EXPECT_EQ(close(fd3), 0);
488 	EXPECT_EQ(close(fd4), 0);
489 }
490 
491 /*
492  * Regression test for [email protected]
493  */
494 TEST(close_range_cloexec_unshare_syzbot)
495 {
496 	int i, fd1, fd2, fd3, flags, ret, status;
497 	pid_t pid;
498 	struct __clone_args args = {
499 		.flags = CLONE_FILES,
500 		.exit_signal = SIGCHLD,
501 	};
502 
503 	/*
504 	 * Create a huge gap in the fd table. When we now call
505 	 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
506 	 * bound the kernel will only copy up to fd1 file descriptors into the
507 	 * new fd table. If the kernel is buggy and doesn't handle
508 	 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
509 	 * descriptors and we will oops!
510 	 *
511 	 * On a buggy kernel this should immediately oops. But let's loop just
512 	 * to be sure.
513 	 */
514 	fd1 = open("/dev/null", O_RDWR);
515 	EXPECT_GT(fd1, 0);
516 
517 	fd2 = dup2(fd1, 1000);
518 	EXPECT_GT(fd2, 0);
519 
520 	for (i = 0; i < 100; i++) {
521 
522 		pid = sys_clone3(&args, sizeof(args));
523 		ASSERT_GE(pid, 0);
524 
525 		if (pid == 0) {
526 			ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
527 						      CLOSE_RANGE_CLOEXEC);
528 			if (ret)
529 				exit(EXIT_FAILURE);
530 
531 			/*
532 			 * We now have a private file descriptor table and all
533 			 * our open fds should still be open but made
534 			 * close-on-exec.
535 			 */
536 			flags = fcntl(fd1, F_GETFD);
537 			EXPECT_GT(flags, -1);
538 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
539 
540 			flags = fcntl(fd2, F_GETFD);
541 			EXPECT_GT(flags, -1);
542 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
543 
544 			fd3 = dup2(fd1, 42);
545 			EXPECT_GT(fd3, 0);
546 
547 			/*
548 			 * Duplicating the file descriptor must remove the
549 			 * FD_CLOEXEC flag.
550 			 */
551 			flags = fcntl(fd3, F_GETFD);
552 			EXPECT_GT(flags, -1);
553 			EXPECT_EQ(flags & FD_CLOEXEC, 0);
554 
555 			EXPECT_EQ(close(fd1), 0);
556 			EXPECT_EQ(close(fd2), 0);
557 			EXPECT_EQ(close(fd3), 0);
558 
559 			exit(EXIT_SUCCESS);
560 		}
561 
562 		EXPECT_EQ(waitpid(pid, &status, 0), pid);
563 		EXPECT_EQ(true, WIFEXITED(status));
564 		EXPECT_EQ(0, WEXITSTATUS(status));
565 	}
566 
567 	/*
568 	 * We created a private file descriptor table before along with
569 	 * requesting close-on-exec so the original fds must not be
570 	 * close-on-exec.
571 	 */
572 	flags = fcntl(fd1, F_GETFD);
573 	EXPECT_GT(flags, -1);
574 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
575 
576 	flags = fcntl(fd2, F_GETFD);
577 	EXPECT_GT(flags, -1);
578 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
579 
580 	fd3 = dup2(fd1, 42);
581 	EXPECT_GT(fd3, 0);
582 
583 	flags = fcntl(fd3, F_GETFD);
584 	EXPECT_GT(flags, -1);
585 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
586 
587 	EXPECT_EQ(close(fd1), 0);
588 	EXPECT_EQ(close(fd2), 0);
589 	EXPECT_EQ(close(fd3), 0);
590 }
591 
592 TEST(close_range_bitmap_corruption)
593 {
594 	pid_t pid;
595 	int status;
596 	struct __clone_args args = {
597 		.flags = CLONE_FILES,
598 		.exit_signal = SIGCHLD,
599 	};
600 
601 	/* get the first 128 descriptors open */
602 	for (int i = 2; i < 128; i++)
603 		EXPECT_GE(dup2(0, i), 0);
604 
605 	/* get descriptor table shared */
606 	pid = sys_clone3(&args, sizeof(args));
607 	ASSERT_GE(pid, 0);
608 
609 	if (pid == 0) {
610 		/* unshare and truncate descriptor table down to 64 */
611 		if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
612 			exit(EXIT_FAILURE);
613 
614 		ASSERT_EQ(fcntl(64, F_GETFD), -1);
615 		/* ... and verify that the range 64..127 is not
616 		   stuck "fully used" according to secondary bitmap */
617 		EXPECT_EQ(dup(0), 64)
618 			exit(EXIT_FAILURE);
619 		exit(EXIT_SUCCESS);
620 	}
621 
622 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
623 	EXPECT_EQ(true, WIFEXITED(status));
624 	EXPECT_EQ(0, WEXITSTATUS(status));
625 }
626 
627 TEST_HARNESS_MAIN
628