1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <linux/kernel.h> 7 #include <limits.h> 8 #include <stdbool.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <sys/resource.h> 15 16 #include "../kselftest_harness.h" 17 #include "../clone3/clone3_selftests.h" 18 19 static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 20 unsigned int flags) 21 { 22 return syscall(__NR_close_range, fd, max_fd, flags); 23 } 24 25 TEST(core_close_range) 26 { 27 int i, ret; 28 int open_fds[101]; 29 30 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 31 int fd; 32 33 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 34 ASSERT_GE(fd, 0) { 35 if (errno == ENOENT) 36 SKIP(return, "Skipping test since /dev/null does not exist"); 37 } 38 39 open_fds[i] = fd; 40 } 41 42 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 43 if (errno == ENOSYS) 44 SKIP(return, "close_range() syscall not supported"); 45 } 46 47 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); 48 49 for (i = 0; i <= 50; i++) 50 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 51 52 for (i = 51; i <= 100; i++) 53 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 54 55 /* create a couple of gaps */ 56 close(57); 57 close(78); 58 close(81); 59 close(82); 60 close(84); 61 close(90); 62 63 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); 64 65 for (i = 51; i <= 92; i++) 66 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 67 68 for (i = 93; i <= 100; i++) 69 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 70 71 /* test that the kernel caps and still closes all fds */ 72 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); 73 74 for (i = 93; i <= 99; i++) 75 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 76 77 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 78 79 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); 80 81 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); 82 } 83 84 TEST(close_range_unshare) 85 { 86 int i, ret, status; 87 pid_t pid; 88 int open_fds[101]; 89 struct __clone_args args = { 90 .flags = CLONE_FILES, 91 .exit_signal = SIGCHLD, 92 }; 93 94 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 95 int fd; 96 97 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 98 ASSERT_GE(fd, 0) { 99 if (errno == ENOENT) 100 SKIP(return, "Skipping test since /dev/null does not exist"); 101 } 102 103 open_fds[i] = fd; 104 } 105 106 pid = sys_clone3(&args, sizeof(args)); 107 ASSERT_GE(pid, 0); 108 109 if (pid == 0) { 110 ret = sys_close_range(open_fds[0], open_fds[50], 111 CLOSE_RANGE_UNSHARE); 112 if (ret) 113 exit(EXIT_FAILURE); 114 115 for (i = 0; i <= 50; i++) 116 if (fcntl(open_fds[i], F_GETFL) != -1) 117 exit(EXIT_FAILURE); 118 119 for (i = 51; i <= 100; i++) 120 if (fcntl(open_fds[i], F_GETFL) == -1) 121 exit(EXIT_FAILURE); 122 123 /* create a couple of gaps */ 124 close(57); 125 close(78); 126 close(81); 127 close(82); 128 close(84); 129 close(90); 130 131 ret = sys_close_range(open_fds[51], open_fds[92], 132 CLOSE_RANGE_UNSHARE); 133 if (ret) 134 exit(EXIT_FAILURE); 135 136 for (i = 51; i <= 92; i++) 137 if (fcntl(open_fds[i], F_GETFL) != -1) 138 exit(EXIT_FAILURE); 139 140 for (i = 93; i <= 100; i++) 141 if (fcntl(open_fds[i], F_GETFL) == -1) 142 exit(EXIT_FAILURE); 143 144 /* test that the kernel caps and still closes all fds */ 145 ret = sys_close_range(open_fds[93], open_fds[99], 146 CLOSE_RANGE_UNSHARE); 147 if (ret) 148 exit(EXIT_FAILURE); 149 150 for (i = 93; i <= 99; i++) 151 if (fcntl(open_fds[i], F_GETFL) != -1) 152 exit(EXIT_FAILURE); 153 154 if (fcntl(open_fds[100], F_GETFL) == -1) 155 exit(EXIT_FAILURE); 156 157 ret = sys_close_range(open_fds[100], open_fds[100], 158 CLOSE_RANGE_UNSHARE); 159 if (ret) 160 exit(EXIT_FAILURE); 161 162 if (fcntl(open_fds[100], F_GETFL) != -1) 163 exit(EXIT_FAILURE); 164 165 exit(EXIT_SUCCESS); 166 } 167 168 EXPECT_EQ(waitpid(pid, &status, 0), pid); 169 EXPECT_EQ(true, WIFEXITED(status)); 170 EXPECT_EQ(0, WEXITSTATUS(status)); 171 } 172 173 TEST(close_range_unshare_capped) 174 { 175 int i, ret, status; 176 pid_t pid; 177 int open_fds[101]; 178 struct __clone_args args = { 179 .flags = CLONE_FILES, 180 .exit_signal = SIGCHLD, 181 }; 182 183 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 184 int fd; 185 186 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 187 ASSERT_GE(fd, 0) { 188 if (errno == ENOENT) 189 SKIP(return, "Skipping test since /dev/null does not exist"); 190 } 191 192 open_fds[i] = fd; 193 } 194 195 pid = sys_clone3(&args, sizeof(args)); 196 ASSERT_GE(pid, 0); 197 198 if (pid == 0) { 199 ret = sys_close_range(open_fds[0], UINT_MAX, 200 CLOSE_RANGE_UNSHARE); 201 if (ret) 202 exit(EXIT_FAILURE); 203 204 for (i = 0; i <= 100; i++) 205 if (fcntl(open_fds[i], F_GETFL) != -1) 206 exit(EXIT_FAILURE); 207 208 exit(EXIT_SUCCESS); 209 } 210 211 EXPECT_EQ(waitpid(pid, &status, 0), pid); 212 EXPECT_EQ(true, WIFEXITED(status)); 213 EXPECT_EQ(0, WEXITSTATUS(status)); 214 } 215 216 TEST(close_range_cloexec) 217 { 218 int i, ret; 219 int open_fds[101]; 220 struct rlimit rlimit; 221 222 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 223 int fd; 224 225 fd = open("/dev/null", O_RDONLY); 226 ASSERT_GE(fd, 0) { 227 if (errno == ENOENT) 228 SKIP(return, "Skipping test since /dev/null does not exist"); 229 } 230 231 open_fds[i] = fd; 232 } 233 234 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 235 if (ret < 0) { 236 if (errno == ENOSYS) 237 SKIP(return, "close_range() syscall not supported"); 238 if (errno == EINVAL) 239 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 240 } 241 242 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 243 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 244 rlimit.rlim_cur = 25; 245 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 246 247 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 248 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); 249 ASSERT_EQ(0, ret); 250 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); 251 ASSERT_EQ(0, ret); 252 253 for (i = 0; i <= 50; i++) { 254 int flags = fcntl(open_fds[i], F_GETFD); 255 256 EXPECT_GT(flags, -1); 257 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 258 } 259 260 for (i = 51; i <= 74; i++) { 261 int flags = fcntl(open_fds[i], F_GETFD); 262 263 EXPECT_GT(flags, -1); 264 EXPECT_EQ(flags & FD_CLOEXEC, 0); 265 } 266 267 for (i = 75; i <= 100; i++) { 268 int flags = fcntl(open_fds[i], F_GETFD); 269 270 EXPECT_GT(flags, -1); 271 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 272 } 273 274 /* Test a common pattern. */ 275 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); 276 for (i = 0; i <= 100; i++) { 277 int flags = fcntl(open_fds[i], F_GETFD); 278 279 EXPECT_GT(flags, -1); 280 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 281 } 282 } 283 284 TEST(close_range_cloexec_unshare) 285 { 286 int i, ret; 287 int open_fds[101]; 288 struct rlimit rlimit; 289 290 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 291 int fd; 292 293 fd = open("/dev/null", O_RDONLY); 294 ASSERT_GE(fd, 0) { 295 if (errno == ENOENT) 296 SKIP(return, "Skipping test since /dev/null does not exist"); 297 } 298 299 open_fds[i] = fd; 300 } 301 302 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 303 if (ret < 0) { 304 if (errno == ENOSYS) 305 SKIP(return, "close_range() syscall not supported"); 306 if (errno == EINVAL) 307 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 308 } 309 310 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 311 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 312 rlimit.rlim_cur = 25; 313 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 314 315 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 316 ret = sys_close_range(open_fds[0], open_fds[50], 317 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 318 ASSERT_EQ(0, ret); 319 ret = sys_close_range(open_fds[75], open_fds[100], 320 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 321 ASSERT_EQ(0, ret); 322 323 for (i = 0; i <= 50; i++) { 324 int flags = fcntl(open_fds[i], F_GETFD); 325 326 EXPECT_GT(flags, -1); 327 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 328 } 329 330 for (i = 51; i <= 74; i++) { 331 int flags = fcntl(open_fds[i], F_GETFD); 332 333 EXPECT_GT(flags, -1); 334 EXPECT_EQ(flags & FD_CLOEXEC, 0); 335 } 336 337 for (i = 75; i <= 100; i++) { 338 int flags = fcntl(open_fds[i], F_GETFD); 339 340 EXPECT_GT(flags, -1); 341 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 342 } 343 344 /* Test a common pattern. */ 345 ret = sys_close_range(3, UINT_MAX, 346 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 347 for (i = 0; i <= 100; i++) { 348 int flags = fcntl(open_fds[i], F_GETFD); 349 350 EXPECT_GT(flags, -1); 351 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 352 } 353 } 354 355 /* 356 * Regression test for [email protected] 357 */ 358 TEST(close_range_cloexec_syzbot) 359 { 360 int fd1, fd2, fd3, flags, ret, status; 361 pid_t pid; 362 struct __clone_args args = { 363 .flags = CLONE_FILES, 364 .exit_signal = SIGCHLD, 365 }; 366 367 /* Create a huge gap in the fd table. */ 368 fd1 = open("/dev/null", O_RDWR); 369 EXPECT_GT(fd1, 0); 370 371 fd2 = dup2(fd1, 1000); 372 EXPECT_GT(fd2, 0); 373 374 pid = sys_clone3(&args, sizeof(args)); 375 ASSERT_GE(pid, 0); 376 377 if (pid == 0) { 378 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); 379 if (ret) 380 exit(EXIT_FAILURE); 381 382 /* 383 * We now have a private file descriptor table and all 384 * our open fds should still be open but made 385 * close-on-exec. 386 */ 387 flags = fcntl(fd1, F_GETFD); 388 EXPECT_GT(flags, -1); 389 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 390 391 flags = fcntl(fd2, F_GETFD); 392 EXPECT_GT(flags, -1); 393 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 394 395 fd3 = dup2(fd1, 42); 396 EXPECT_GT(fd3, 0); 397 398 /* 399 * Duplicating the file descriptor must remove the 400 * FD_CLOEXEC flag. 401 */ 402 flags = fcntl(fd3, F_GETFD); 403 EXPECT_GT(flags, -1); 404 EXPECT_EQ(flags & FD_CLOEXEC, 0); 405 406 exit(EXIT_SUCCESS); 407 } 408 409 EXPECT_EQ(waitpid(pid, &status, 0), pid); 410 EXPECT_EQ(true, WIFEXITED(status)); 411 EXPECT_EQ(0, WEXITSTATUS(status)); 412 413 /* 414 * We had a shared file descriptor table before along with requesting 415 * close-on-exec so the original fds must not be close-on-exec. 416 */ 417 flags = fcntl(fd1, F_GETFD); 418 EXPECT_GT(flags, -1); 419 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 420 421 flags = fcntl(fd2, F_GETFD); 422 EXPECT_GT(flags, -1); 423 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 424 425 fd3 = dup2(fd1, 42); 426 EXPECT_GT(fd3, 0); 427 428 flags = fcntl(fd3, F_GETFD); 429 EXPECT_GT(flags, -1); 430 EXPECT_EQ(flags & FD_CLOEXEC, 0); 431 432 EXPECT_EQ(close(fd1), 0); 433 EXPECT_EQ(close(fd2), 0); 434 EXPECT_EQ(close(fd3), 0); 435 } 436 437 /* 438 * Regression test for [email protected] 439 */ 440 TEST(close_range_cloexec_unshare_syzbot) 441 { 442 int i, fd1, fd2, fd3, flags, ret, status; 443 pid_t pid; 444 struct __clone_args args = { 445 .flags = CLONE_FILES, 446 .exit_signal = SIGCHLD, 447 }; 448 449 /* 450 * Create a huge gap in the fd table. When we now call 451 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper 452 * bound the kernel will only copy up to fd1 file descriptors into the 453 * new fd table. If the kernel is buggy and doesn't handle 454 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file 455 * descriptors and we will oops! 456 * 457 * On a buggy kernel this should immediately oops. But let's loop just 458 * to be sure. 459 */ 460 fd1 = open("/dev/null", O_RDWR); 461 EXPECT_GT(fd1, 0); 462 463 fd2 = dup2(fd1, 1000); 464 EXPECT_GT(fd2, 0); 465 466 for (i = 0; i < 100; i++) { 467 468 pid = sys_clone3(&args, sizeof(args)); 469 ASSERT_GE(pid, 0); 470 471 if (pid == 0) { 472 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | 473 CLOSE_RANGE_CLOEXEC); 474 if (ret) 475 exit(EXIT_FAILURE); 476 477 /* 478 * We now have a private file descriptor table and all 479 * our open fds should still be open but made 480 * close-on-exec. 481 */ 482 flags = fcntl(fd1, F_GETFD); 483 EXPECT_GT(flags, -1); 484 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 485 486 flags = fcntl(fd2, F_GETFD); 487 EXPECT_GT(flags, -1); 488 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 489 490 fd3 = dup2(fd1, 42); 491 EXPECT_GT(fd3, 0); 492 493 /* 494 * Duplicating the file descriptor must remove the 495 * FD_CLOEXEC flag. 496 */ 497 flags = fcntl(fd3, F_GETFD); 498 EXPECT_GT(flags, -1); 499 EXPECT_EQ(flags & FD_CLOEXEC, 0); 500 501 EXPECT_EQ(close(fd1), 0); 502 EXPECT_EQ(close(fd2), 0); 503 EXPECT_EQ(close(fd3), 0); 504 505 exit(EXIT_SUCCESS); 506 } 507 508 EXPECT_EQ(waitpid(pid, &status, 0), pid); 509 EXPECT_EQ(true, WIFEXITED(status)); 510 EXPECT_EQ(0, WEXITSTATUS(status)); 511 } 512 513 /* 514 * We created a private file descriptor table before along with 515 * requesting close-on-exec so the original fds must not be 516 * close-on-exec. 517 */ 518 flags = fcntl(fd1, F_GETFD); 519 EXPECT_GT(flags, -1); 520 EXPECT_EQ(flags & FD_CLOEXEC, 0); 521 522 flags = fcntl(fd2, F_GETFD); 523 EXPECT_GT(flags, -1); 524 EXPECT_EQ(flags & FD_CLOEXEC, 0); 525 526 fd3 = dup2(fd1, 42); 527 EXPECT_GT(fd3, 0); 528 529 flags = fcntl(fd3, F_GETFD); 530 EXPECT_GT(flags, -1); 531 EXPECT_EQ(flags & FD_CLOEXEC, 0); 532 533 EXPECT_EQ(close(fd1), 0); 534 EXPECT_EQ(close(fd2), 0); 535 EXPECT_EQ(close(fd3), 0); 536 } 537 538 TEST_HARNESS_MAIN 539