1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <linux/kernel.h> 7 #include <limits.h> 8 #include <stdbool.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <sys/resource.h> 15 16 #include "../kselftest_harness.h" 17 #include "../clone3/clone3_selftests.h" 18 19 #ifndef __NR_close_range 20 #if defined __alpha__ 21 #define __NR_close_range 546 22 #elif defined _MIPS_SIM 23 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ 24 #define __NR_close_range (436 + 4000) 25 #endif 26 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ 27 #define __NR_close_range (436 + 6000) 28 #endif 29 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ 30 #define __NR_close_range (436 + 5000) 31 #endif 32 #elif defined __ia64__ 33 #define __NR_close_range (436 + 1024) 34 #else 35 #define __NR_close_range 436 36 #endif 37 #endif 38 39 #ifndef CLOSE_RANGE_UNSHARE 40 #define CLOSE_RANGE_UNSHARE (1U << 1) 41 #endif 42 43 #ifndef CLOSE_RANGE_CLOEXEC 44 #define CLOSE_RANGE_CLOEXEC (1U << 2) 45 #endif 46 47 static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 48 unsigned int flags) 49 { 50 return syscall(__NR_close_range, fd, max_fd, flags); 51 } 52 53 #ifndef ARRAY_SIZE 54 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 55 #endif 56 57 TEST(close_range) 58 { 59 int i, ret; 60 int open_fds[101]; 61 62 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 63 int fd; 64 65 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 66 ASSERT_GE(fd, 0) { 67 if (errno == ENOENT) 68 SKIP(return, "Skipping test since /dev/null does not exist"); 69 } 70 71 open_fds[i] = fd; 72 } 73 74 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 75 if (errno == ENOSYS) 76 SKIP(return, "close_range() syscall not supported"); 77 } 78 79 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); 80 81 for (i = 0; i <= 50; i++) 82 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 83 84 for (i = 51; i <= 100; i++) 85 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 86 87 /* create a couple of gaps */ 88 close(57); 89 close(78); 90 close(81); 91 close(82); 92 close(84); 93 close(90); 94 95 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); 96 97 for (i = 51; i <= 92; i++) 98 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 99 100 for (i = 93; i <= 100; i++) 101 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 102 103 /* test that the kernel caps and still closes all fds */ 104 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); 105 106 for (i = 93; i <= 99; i++) 107 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 108 109 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 110 111 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); 112 113 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); 114 } 115 116 TEST(close_range_unshare) 117 { 118 int i, ret, status; 119 pid_t pid; 120 int open_fds[101]; 121 struct __clone_args args = { 122 .flags = CLONE_FILES, 123 .exit_signal = SIGCHLD, 124 }; 125 126 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 127 int fd; 128 129 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 130 ASSERT_GE(fd, 0) { 131 if (errno == ENOENT) 132 SKIP(return, "Skipping test since /dev/null does not exist"); 133 } 134 135 open_fds[i] = fd; 136 } 137 138 pid = sys_clone3(&args, sizeof(args)); 139 ASSERT_GE(pid, 0); 140 141 if (pid == 0) { 142 ret = sys_close_range(open_fds[0], open_fds[50], 143 CLOSE_RANGE_UNSHARE); 144 if (ret) 145 exit(EXIT_FAILURE); 146 147 for (i = 0; i <= 50; i++) 148 if (fcntl(open_fds[i], F_GETFL) != -1) 149 exit(EXIT_FAILURE); 150 151 for (i = 51; i <= 100; i++) 152 if (fcntl(open_fds[i], F_GETFL) == -1) 153 exit(EXIT_FAILURE); 154 155 /* create a couple of gaps */ 156 close(57); 157 close(78); 158 close(81); 159 close(82); 160 close(84); 161 close(90); 162 163 ret = sys_close_range(open_fds[51], open_fds[92], 164 CLOSE_RANGE_UNSHARE); 165 if (ret) 166 exit(EXIT_FAILURE); 167 168 for (i = 51; i <= 92; i++) 169 if (fcntl(open_fds[i], F_GETFL) != -1) 170 exit(EXIT_FAILURE); 171 172 for (i = 93; i <= 100; i++) 173 if (fcntl(open_fds[i], F_GETFL) == -1) 174 exit(EXIT_FAILURE); 175 176 /* test that the kernel caps and still closes all fds */ 177 ret = sys_close_range(open_fds[93], open_fds[99], 178 CLOSE_RANGE_UNSHARE); 179 if (ret) 180 exit(EXIT_FAILURE); 181 182 for (i = 93; i <= 99; i++) 183 if (fcntl(open_fds[i], F_GETFL) != -1) 184 exit(EXIT_FAILURE); 185 186 if (fcntl(open_fds[100], F_GETFL) == -1) 187 exit(EXIT_FAILURE); 188 189 ret = sys_close_range(open_fds[100], open_fds[100], 190 CLOSE_RANGE_UNSHARE); 191 if (ret) 192 exit(EXIT_FAILURE); 193 194 if (fcntl(open_fds[100], F_GETFL) != -1) 195 exit(EXIT_FAILURE); 196 197 exit(EXIT_SUCCESS); 198 } 199 200 EXPECT_EQ(waitpid(pid, &status, 0), pid); 201 EXPECT_EQ(true, WIFEXITED(status)); 202 EXPECT_EQ(0, WEXITSTATUS(status)); 203 } 204 205 TEST(close_range_unshare_capped) 206 { 207 int i, ret, status; 208 pid_t pid; 209 int open_fds[101]; 210 struct __clone_args args = { 211 .flags = CLONE_FILES, 212 .exit_signal = SIGCHLD, 213 }; 214 215 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 216 int fd; 217 218 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 219 ASSERT_GE(fd, 0) { 220 if (errno == ENOENT) 221 SKIP(return, "Skipping test since /dev/null does not exist"); 222 } 223 224 open_fds[i] = fd; 225 } 226 227 pid = sys_clone3(&args, sizeof(args)); 228 ASSERT_GE(pid, 0); 229 230 if (pid == 0) { 231 ret = sys_close_range(open_fds[0], UINT_MAX, 232 CLOSE_RANGE_UNSHARE); 233 if (ret) 234 exit(EXIT_FAILURE); 235 236 for (i = 0; i <= 100; i++) 237 if (fcntl(open_fds[i], F_GETFL) != -1) 238 exit(EXIT_FAILURE); 239 240 exit(EXIT_SUCCESS); 241 } 242 243 EXPECT_EQ(waitpid(pid, &status, 0), pid); 244 EXPECT_EQ(true, WIFEXITED(status)); 245 EXPECT_EQ(0, WEXITSTATUS(status)); 246 } 247 248 TEST(close_range_cloexec) 249 { 250 int i, ret; 251 int open_fds[101]; 252 struct rlimit rlimit; 253 254 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 255 int fd; 256 257 fd = open("/dev/null", O_RDONLY); 258 ASSERT_GE(fd, 0) { 259 if (errno == ENOENT) 260 SKIP(return, "Skipping test since /dev/null does not exist"); 261 } 262 263 open_fds[i] = fd; 264 } 265 266 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 267 if (ret < 0) { 268 if (errno == ENOSYS) 269 SKIP(return, "close_range() syscall not supported"); 270 if (errno == EINVAL) 271 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 272 } 273 274 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 275 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 276 rlimit.rlim_cur = 25; 277 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 278 279 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 280 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); 281 ASSERT_EQ(0, ret); 282 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); 283 ASSERT_EQ(0, ret); 284 285 for (i = 0; i <= 50; i++) { 286 int flags = fcntl(open_fds[i], F_GETFD); 287 288 EXPECT_GT(flags, -1); 289 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 290 } 291 292 for (i = 51; i <= 74; i++) { 293 int flags = fcntl(open_fds[i], F_GETFD); 294 295 EXPECT_GT(flags, -1); 296 EXPECT_EQ(flags & FD_CLOEXEC, 0); 297 } 298 299 for (i = 75; i <= 100; i++) { 300 int flags = fcntl(open_fds[i], F_GETFD); 301 302 EXPECT_GT(flags, -1); 303 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 304 } 305 306 /* Test a common pattern. */ 307 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); 308 for (i = 0; i <= 100; i++) { 309 int flags = fcntl(open_fds[i], F_GETFD); 310 311 EXPECT_GT(flags, -1); 312 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 313 } 314 } 315 316 TEST(close_range_cloexec_unshare) 317 { 318 int i, ret; 319 int open_fds[101]; 320 struct rlimit rlimit; 321 322 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 323 int fd; 324 325 fd = open("/dev/null", O_RDONLY); 326 ASSERT_GE(fd, 0) { 327 if (errno == ENOENT) 328 SKIP(return, "Skipping test since /dev/null does not exist"); 329 } 330 331 open_fds[i] = fd; 332 } 333 334 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 335 if (ret < 0) { 336 if (errno == ENOSYS) 337 SKIP(return, "close_range() syscall not supported"); 338 if (errno == EINVAL) 339 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 340 } 341 342 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 343 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 344 rlimit.rlim_cur = 25; 345 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 346 347 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 348 ret = sys_close_range(open_fds[0], open_fds[50], 349 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 350 ASSERT_EQ(0, ret); 351 ret = sys_close_range(open_fds[75], open_fds[100], 352 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 353 ASSERT_EQ(0, ret); 354 355 for (i = 0; i <= 50; i++) { 356 int flags = fcntl(open_fds[i], F_GETFD); 357 358 EXPECT_GT(flags, -1); 359 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 360 } 361 362 for (i = 51; i <= 74; i++) { 363 int flags = fcntl(open_fds[i], F_GETFD); 364 365 EXPECT_GT(flags, -1); 366 EXPECT_EQ(flags & FD_CLOEXEC, 0); 367 } 368 369 for (i = 75; i <= 100; i++) { 370 int flags = fcntl(open_fds[i], F_GETFD); 371 372 EXPECT_GT(flags, -1); 373 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 374 } 375 376 /* Test a common pattern. */ 377 ret = sys_close_range(3, UINT_MAX, 378 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 379 for (i = 0; i <= 100; i++) { 380 int flags = fcntl(open_fds[i], F_GETFD); 381 382 EXPECT_GT(flags, -1); 383 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 384 } 385 } 386 387 /* 388 * Regression test for [email protected] 389 */ 390 TEST(close_range_cloexec_syzbot) 391 { 392 int fd1, fd2, fd3, flags, ret, status; 393 pid_t pid; 394 struct __clone_args args = { 395 .flags = CLONE_FILES, 396 .exit_signal = SIGCHLD, 397 }; 398 399 /* Create a huge gap in the fd table. */ 400 fd1 = open("/dev/null", O_RDWR); 401 EXPECT_GT(fd1, 0); 402 403 fd2 = dup2(fd1, 1000); 404 EXPECT_GT(fd2, 0); 405 406 pid = sys_clone3(&args, sizeof(args)); 407 ASSERT_GE(pid, 0); 408 409 if (pid == 0) { 410 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); 411 if (ret) 412 exit(EXIT_FAILURE); 413 414 /* 415 * We now have a private file descriptor table and all 416 * our open fds should still be open but made 417 * close-on-exec. 418 */ 419 flags = fcntl(fd1, F_GETFD); 420 EXPECT_GT(flags, -1); 421 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 422 423 flags = fcntl(fd2, F_GETFD); 424 EXPECT_GT(flags, -1); 425 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 426 427 fd3 = dup2(fd1, 42); 428 EXPECT_GT(fd3, 0); 429 430 /* 431 * Duplicating the file descriptor must remove the 432 * FD_CLOEXEC flag. 433 */ 434 flags = fcntl(fd3, F_GETFD); 435 EXPECT_GT(flags, -1); 436 EXPECT_EQ(flags & FD_CLOEXEC, 0); 437 438 exit(EXIT_SUCCESS); 439 } 440 441 EXPECT_EQ(waitpid(pid, &status, 0), pid); 442 EXPECT_EQ(true, WIFEXITED(status)); 443 EXPECT_EQ(0, WEXITSTATUS(status)); 444 445 /* 446 * We had a shared file descriptor table before along with requesting 447 * close-on-exec so the original fds must not be close-on-exec. 448 */ 449 flags = fcntl(fd1, F_GETFD); 450 EXPECT_GT(flags, -1); 451 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 452 453 flags = fcntl(fd2, F_GETFD); 454 EXPECT_GT(flags, -1); 455 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 456 457 fd3 = dup2(fd1, 42); 458 EXPECT_GT(fd3, 0); 459 460 flags = fcntl(fd3, F_GETFD); 461 EXPECT_GT(flags, -1); 462 EXPECT_EQ(flags & FD_CLOEXEC, 0); 463 464 EXPECT_EQ(close(fd1), 0); 465 EXPECT_EQ(close(fd2), 0); 466 EXPECT_EQ(close(fd3), 0); 467 } 468 469 /* 470 * Regression test for [email protected] 471 */ 472 TEST(close_range_cloexec_unshare_syzbot) 473 { 474 int i, fd1, fd2, fd3, flags, ret, status; 475 pid_t pid; 476 struct __clone_args args = { 477 .flags = CLONE_FILES, 478 .exit_signal = SIGCHLD, 479 }; 480 481 /* 482 * Create a huge gap in the fd table. When we now call 483 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper 484 * bound the kernel will only copy up to fd1 file descriptors into the 485 * new fd table. If the kernel is buggy and doesn't handle 486 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file 487 * descriptors and we will oops! 488 * 489 * On a buggy kernel this should immediately oops. But let's loop just 490 * to be sure. 491 */ 492 fd1 = open("/dev/null", O_RDWR); 493 EXPECT_GT(fd1, 0); 494 495 fd2 = dup2(fd1, 1000); 496 EXPECT_GT(fd2, 0); 497 498 for (i = 0; i < 100; i++) { 499 500 pid = sys_clone3(&args, sizeof(args)); 501 ASSERT_GE(pid, 0); 502 503 if (pid == 0) { 504 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | 505 CLOSE_RANGE_CLOEXEC); 506 if (ret) 507 exit(EXIT_FAILURE); 508 509 /* 510 * We now have a private file descriptor table and all 511 * our open fds should still be open but made 512 * close-on-exec. 513 */ 514 flags = fcntl(fd1, F_GETFD); 515 EXPECT_GT(flags, -1); 516 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 517 518 flags = fcntl(fd2, F_GETFD); 519 EXPECT_GT(flags, -1); 520 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 521 522 fd3 = dup2(fd1, 42); 523 EXPECT_GT(fd3, 0); 524 525 /* 526 * Duplicating the file descriptor must remove the 527 * FD_CLOEXEC flag. 528 */ 529 flags = fcntl(fd3, F_GETFD); 530 EXPECT_GT(flags, -1); 531 EXPECT_EQ(flags & FD_CLOEXEC, 0); 532 533 EXPECT_EQ(close(fd1), 0); 534 EXPECT_EQ(close(fd2), 0); 535 EXPECT_EQ(close(fd3), 0); 536 537 exit(EXIT_SUCCESS); 538 } 539 540 EXPECT_EQ(waitpid(pid, &status, 0), pid); 541 EXPECT_EQ(true, WIFEXITED(status)); 542 EXPECT_EQ(0, WEXITSTATUS(status)); 543 } 544 545 /* 546 * We created a private file descriptor table before along with 547 * requesting close-on-exec so the original fds must not be 548 * close-on-exec. 549 */ 550 flags = fcntl(fd1, F_GETFD); 551 EXPECT_GT(flags, -1); 552 EXPECT_EQ(flags & FD_CLOEXEC, 0); 553 554 flags = fcntl(fd2, F_GETFD); 555 EXPECT_GT(flags, -1); 556 EXPECT_EQ(flags & FD_CLOEXEC, 0); 557 558 fd3 = dup2(fd1, 42); 559 EXPECT_GT(fd3, 0); 560 561 flags = fcntl(fd3, F_GETFD); 562 EXPECT_GT(flags, -1); 563 EXPECT_EQ(flags & FD_CLOEXEC, 0); 564 565 EXPECT_EQ(close(fd1), 0); 566 EXPECT_EQ(close(fd2), 0); 567 EXPECT_EQ(close(fd3), 0); 568 } 569 570 TEST_HARNESS_MAIN 571