1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/module.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/smp_lock.h> 12 #include <linux/notifier.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/kexec.h> 20 #include <linux/workqueue.h> 21 #include <linux/capability.h> 22 #include <linux/device.h> 23 #include <linux/key.h> 24 #include <linux/times.h> 25 #include <linux/posix-timers.h> 26 #include <linux/security.h> 27 #include <linux/dcookies.h> 28 #include <linux/suspend.h> 29 #include <linux/tty.h> 30 #include <linux/signal.h> 31 #include <linux/cn_proc.h> 32 #include <linux/getcpu.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/seccomp.h> 35 #include <linux/cpu.h> 36 37 #include <linux/compat.h> 38 #include <linux/syscalls.h> 39 #include <linux/kprobes.h> 40 #include <linux/user_namespace.h> 41 42 #include <asm/uaccess.h> 43 #include <asm/io.h> 44 #include <asm/unistd.h> 45 46 #ifndef SET_UNALIGN_CTL 47 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 48 #endif 49 #ifndef GET_UNALIGN_CTL 50 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 51 #endif 52 #ifndef SET_FPEMU_CTL 53 # define SET_FPEMU_CTL(a,b) (-EINVAL) 54 #endif 55 #ifndef GET_FPEMU_CTL 56 # define GET_FPEMU_CTL(a,b) (-EINVAL) 57 #endif 58 #ifndef SET_FPEXC_CTL 59 # define SET_FPEXC_CTL(a,b) (-EINVAL) 60 #endif 61 #ifndef GET_FPEXC_CTL 62 # define GET_FPEXC_CTL(a,b) (-EINVAL) 63 #endif 64 #ifndef GET_ENDIAN 65 # define GET_ENDIAN(a,b) (-EINVAL) 66 #endif 67 #ifndef SET_ENDIAN 68 # define SET_ENDIAN(a,b) (-EINVAL) 69 #endif 70 #ifndef GET_TSC_CTL 71 # define GET_TSC_CTL(a) (-EINVAL) 72 #endif 73 #ifndef SET_TSC_CTL 74 # define SET_TSC_CTL(a) (-EINVAL) 75 #endif 76 77 /* 78 * this is where the system-wide overflow UID and GID are defined, for 79 * architectures that now have 32-bit UID/GID but didn't in the past 80 */ 81 82 int overflowuid = DEFAULT_OVERFLOWUID; 83 int overflowgid = DEFAULT_OVERFLOWGID; 84 85 #ifdef CONFIG_UID16 86 EXPORT_SYMBOL(overflowuid); 87 EXPORT_SYMBOL(overflowgid); 88 #endif 89 90 /* 91 * the same as above, but for filesystems which can only store a 16-bit 92 * UID and GID. as such, this is needed on all architectures 93 */ 94 95 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 96 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 97 98 EXPORT_SYMBOL(fs_overflowuid); 99 EXPORT_SYMBOL(fs_overflowgid); 100 101 /* 102 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 103 */ 104 105 int C_A_D = 1; 106 struct pid *cad_pid; 107 EXPORT_SYMBOL(cad_pid); 108 109 /* 110 * If set, this is used for preparing the system to power off. 111 */ 112 113 void (*pm_power_off_prepare)(void); 114 115 /* 116 * set the priority of a task 117 * - the caller must hold the RCU read lock 118 */ 119 static int set_one_prio(struct task_struct *p, int niceval, int error) 120 { 121 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 122 int no_nice; 123 124 if (pcred->uid != cred->euid && 125 pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { 126 error = -EPERM; 127 goto out; 128 } 129 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 130 error = -EACCES; 131 goto out; 132 } 133 no_nice = security_task_setnice(p, niceval); 134 if (no_nice) { 135 error = no_nice; 136 goto out; 137 } 138 if (error == -ESRCH) 139 error = 0; 140 set_user_nice(p, niceval); 141 out: 142 return error; 143 } 144 145 asmlinkage long sys_setpriority(int which, int who, int niceval) 146 { 147 struct task_struct *g, *p; 148 struct user_struct *user; 149 const struct cred *cred = current_cred(); 150 int error = -EINVAL; 151 struct pid *pgrp; 152 153 if (which > PRIO_USER || which < PRIO_PROCESS) 154 goto out; 155 156 /* normalize: avoid signed division (rounding problems) */ 157 error = -ESRCH; 158 if (niceval < -20) 159 niceval = -20; 160 if (niceval > 19) 161 niceval = 19; 162 163 read_lock(&tasklist_lock); 164 switch (which) { 165 case PRIO_PROCESS: 166 if (who) 167 p = find_task_by_vpid(who); 168 else 169 p = current; 170 if (p) 171 error = set_one_prio(p, niceval, error); 172 break; 173 case PRIO_PGRP: 174 if (who) 175 pgrp = find_vpid(who); 176 else 177 pgrp = task_pgrp(current); 178 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 179 error = set_one_prio(p, niceval, error); 180 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 181 break; 182 case PRIO_USER: 183 user = (struct user_struct *) cred->user; 184 if (!who) 185 who = cred->uid; 186 else if ((who != cred->uid) && 187 !(user = find_user(who))) 188 goto out_unlock; /* No processes for this user */ 189 190 do_each_thread(g, p) 191 if (__task_cred(p)->uid == who) 192 error = set_one_prio(p, niceval, error); 193 while_each_thread(g, p); 194 if (who != cred->uid) 195 free_uid(user); /* For find_user() */ 196 break; 197 } 198 out_unlock: 199 read_unlock(&tasklist_lock); 200 out: 201 return error; 202 } 203 204 /* 205 * Ugh. To avoid negative return values, "getpriority()" will 206 * not return the normal nice-value, but a negated value that 207 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 208 * to stay compatible. 209 */ 210 asmlinkage long sys_getpriority(int which, int who) 211 { 212 struct task_struct *g, *p; 213 struct user_struct *user; 214 const struct cred *cred = current_cred(); 215 long niceval, retval = -ESRCH; 216 struct pid *pgrp; 217 218 if (which > PRIO_USER || which < PRIO_PROCESS) 219 return -EINVAL; 220 221 read_lock(&tasklist_lock); 222 switch (which) { 223 case PRIO_PROCESS: 224 if (who) 225 p = find_task_by_vpid(who); 226 else 227 p = current; 228 if (p) { 229 niceval = 20 - task_nice(p); 230 if (niceval > retval) 231 retval = niceval; 232 } 233 break; 234 case PRIO_PGRP: 235 if (who) 236 pgrp = find_vpid(who); 237 else 238 pgrp = task_pgrp(current); 239 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 240 niceval = 20 - task_nice(p); 241 if (niceval > retval) 242 retval = niceval; 243 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 244 break; 245 case PRIO_USER: 246 user = (struct user_struct *) cred->user; 247 if (!who) 248 who = cred->uid; 249 else if ((who != cred->uid) && 250 !(user = find_user(who))) 251 goto out_unlock; /* No processes for this user */ 252 253 do_each_thread(g, p) 254 if (__task_cred(p)->uid == who) { 255 niceval = 20 - task_nice(p); 256 if (niceval > retval) 257 retval = niceval; 258 } 259 while_each_thread(g, p); 260 if (who != cred->uid) 261 free_uid(user); /* for find_user() */ 262 break; 263 } 264 out_unlock: 265 read_unlock(&tasklist_lock); 266 267 return retval; 268 } 269 270 /** 271 * emergency_restart - reboot the system 272 * 273 * Without shutting down any hardware or taking any locks 274 * reboot the system. This is called when we know we are in 275 * trouble so this is our best effort to reboot. This is 276 * safe to call in interrupt context. 277 */ 278 void emergency_restart(void) 279 { 280 machine_emergency_restart(); 281 } 282 EXPORT_SYMBOL_GPL(emergency_restart); 283 284 void kernel_restart_prepare(char *cmd) 285 { 286 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 287 system_state = SYSTEM_RESTART; 288 device_shutdown(); 289 sysdev_shutdown(); 290 } 291 292 /** 293 * kernel_restart - reboot the system 294 * @cmd: pointer to buffer containing command to execute for restart 295 * or %NULL 296 * 297 * Shutdown everything and perform a clean reboot. 298 * This is not safe to call in interrupt context. 299 */ 300 void kernel_restart(char *cmd) 301 { 302 kernel_restart_prepare(cmd); 303 if (!cmd) 304 printk(KERN_EMERG "Restarting system.\n"); 305 else 306 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 307 machine_restart(cmd); 308 } 309 EXPORT_SYMBOL_GPL(kernel_restart); 310 311 static void kernel_shutdown_prepare(enum system_states state) 312 { 313 blocking_notifier_call_chain(&reboot_notifier_list, 314 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 315 system_state = state; 316 device_shutdown(); 317 } 318 /** 319 * kernel_halt - halt the system 320 * 321 * Shutdown everything and perform a clean system halt. 322 */ 323 void kernel_halt(void) 324 { 325 kernel_shutdown_prepare(SYSTEM_HALT); 326 sysdev_shutdown(); 327 printk(KERN_EMERG "System halted.\n"); 328 machine_halt(); 329 } 330 331 EXPORT_SYMBOL_GPL(kernel_halt); 332 333 /** 334 * kernel_power_off - power_off the system 335 * 336 * Shutdown everything and perform a clean system power_off. 337 */ 338 void kernel_power_off(void) 339 { 340 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 341 if (pm_power_off_prepare) 342 pm_power_off_prepare(); 343 disable_nonboot_cpus(); 344 sysdev_shutdown(); 345 printk(KERN_EMERG "Power down.\n"); 346 machine_power_off(); 347 } 348 EXPORT_SYMBOL_GPL(kernel_power_off); 349 /* 350 * Reboot system call: for obvious reasons only root may call it, 351 * and even root needs to set up some magic numbers in the registers 352 * so that some mistake won't make this reboot the whole machine. 353 * You can also set the meaning of the ctrl-alt-del-key here. 354 * 355 * reboot doesn't sync: do that yourself before calling this. 356 */ 357 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) 358 { 359 char buffer[256]; 360 361 /* We only trust the superuser with rebooting the system. */ 362 if (!capable(CAP_SYS_BOOT)) 363 return -EPERM; 364 365 /* For safety, we require "magic" arguments. */ 366 if (magic1 != LINUX_REBOOT_MAGIC1 || 367 (magic2 != LINUX_REBOOT_MAGIC2 && 368 magic2 != LINUX_REBOOT_MAGIC2A && 369 magic2 != LINUX_REBOOT_MAGIC2B && 370 magic2 != LINUX_REBOOT_MAGIC2C)) 371 return -EINVAL; 372 373 /* Instead of trying to make the power_off code look like 374 * halt when pm_power_off is not set do it the easy way. 375 */ 376 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 377 cmd = LINUX_REBOOT_CMD_HALT; 378 379 lock_kernel(); 380 switch (cmd) { 381 case LINUX_REBOOT_CMD_RESTART: 382 kernel_restart(NULL); 383 break; 384 385 case LINUX_REBOOT_CMD_CAD_ON: 386 C_A_D = 1; 387 break; 388 389 case LINUX_REBOOT_CMD_CAD_OFF: 390 C_A_D = 0; 391 break; 392 393 case LINUX_REBOOT_CMD_HALT: 394 kernel_halt(); 395 unlock_kernel(); 396 do_exit(0); 397 break; 398 399 case LINUX_REBOOT_CMD_POWER_OFF: 400 kernel_power_off(); 401 unlock_kernel(); 402 do_exit(0); 403 break; 404 405 case LINUX_REBOOT_CMD_RESTART2: 406 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 407 unlock_kernel(); 408 return -EFAULT; 409 } 410 buffer[sizeof(buffer) - 1] = '\0'; 411 412 kernel_restart(buffer); 413 break; 414 415 #ifdef CONFIG_KEXEC 416 case LINUX_REBOOT_CMD_KEXEC: 417 { 418 int ret; 419 ret = kernel_kexec(); 420 unlock_kernel(); 421 return ret; 422 } 423 #endif 424 425 #ifdef CONFIG_HIBERNATION 426 case LINUX_REBOOT_CMD_SW_SUSPEND: 427 { 428 int ret = hibernate(); 429 unlock_kernel(); 430 return ret; 431 } 432 #endif 433 434 default: 435 unlock_kernel(); 436 return -EINVAL; 437 } 438 unlock_kernel(); 439 return 0; 440 } 441 442 static void deferred_cad(struct work_struct *dummy) 443 { 444 kernel_restart(NULL); 445 } 446 447 /* 448 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 449 * As it's called within an interrupt, it may NOT sync: the only choice 450 * is whether to reboot at once, or just ignore the ctrl-alt-del. 451 */ 452 void ctrl_alt_del(void) 453 { 454 static DECLARE_WORK(cad_work, deferred_cad); 455 456 if (C_A_D) 457 schedule_work(&cad_work); 458 else 459 kill_cad_pid(SIGINT, 1); 460 } 461 462 /* 463 * Unprivileged users may change the real gid to the effective gid 464 * or vice versa. (BSD-style) 465 * 466 * If you set the real gid at all, or set the effective gid to a value not 467 * equal to the real gid, then the saved gid is set to the new effective gid. 468 * 469 * This makes it possible for a setgid program to completely drop its 470 * privileges, which is often a useful assertion to make when you are doing 471 * a security audit over a program. 472 * 473 * The general idea is that a program which uses just setregid() will be 474 * 100% compatible with BSD. A program which uses just setgid() will be 475 * 100% compatible with POSIX with saved IDs. 476 * 477 * SMP: There are not races, the GIDs are checked only by filesystem 478 * operations (as far as semantic preservation is concerned). 479 */ 480 asmlinkage long sys_setregid(gid_t rgid, gid_t egid) 481 { 482 const struct cred *old; 483 struct cred *new; 484 int retval; 485 486 new = prepare_creds(); 487 if (!new) 488 return -ENOMEM; 489 old = current_cred(); 490 491 retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); 492 if (retval) 493 goto error; 494 495 retval = -EPERM; 496 if (rgid != (gid_t) -1) { 497 if (old->gid == rgid || 498 old->egid == rgid || 499 capable(CAP_SETGID)) 500 new->gid = rgid; 501 else 502 goto error; 503 } 504 if (egid != (gid_t) -1) { 505 if (old->gid == egid || 506 old->egid == egid || 507 old->sgid == egid || 508 capable(CAP_SETGID)) 509 new->egid = egid; 510 else 511 goto error; 512 } 513 514 if (rgid != (gid_t) -1 || 515 (egid != (gid_t) -1 && egid != old->gid)) 516 new->sgid = new->egid; 517 new->fsgid = new->egid; 518 519 return commit_creds(new); 520 521 error: 522 abort_creds(new); 523 return retval; 524 } 525 526 /* 527 * setgid() is implemented like SysV w/ SAVED_IDS 528 * 529 * SMP: Same implicit races as above. 530 */ 531 asmlinkage long sys_setgid(gid_t gid) 532 { 533 const struct cred *old; 534 struct cred *new; 535 int retval; 536 537 new = prepare_creds(); 538 if (!new) 539 return -ENOMEM; 540 old = current_cred(); 541 542 retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); 543 if (retval) 544 goto error; 545 546 retval = -EPERM; 547 if (capable(CAP_SETGID)) 548 new->gid = new->egid = new->sgid = new->fsgid = gid; 549 else if (gid == old->gid || gid == old->sgid) 550 new->egid = new->fsgid = gid; 551 else 552 goto error; 553 554 return commit_creds(new); 555 556 error: 557 abort_creds(new); 558 return retval; 559 } 560 561 /* 562 * change the user struct in a credentials set to match the new UID 563 */ 564 static int set_user(struct cred *new) 565 { 566 struct user_struct *new_user; 567 568 new_user = alloc_uid(current_user_ns(), new->uid); 569 if (!new_user) 570 return -EAGAIN; 571 572 if (atomic_read(&new_user->processes) >= 573 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 574 new_user != INIT_USER) { 575 free_uid(new_user); 576 return -EAGAIN; 577 } 578 579 free_uid(new->user); 580 new->user = new_user; 581 return 0; 582 } 583 584 /* 585 * Unprivileged users may change the real uid to the effective uid 586 * or vice versa. (BSD-style) 587 * 588 * If you set the real uid at all, or set the effective uid to a value not 589 * equal to the real uid, then the saved uid is set to the new effective uid. 590 * 591 * This makes it possible for a setuid program to completely drop its 592 * privileges, which is often a useful assertion to make when you are doing 593 * a security audit over a program. 594 * 595 * The general idea is that a program which uses just setreuid() will be 596 * 100% compatible with BSD. A program which uses just setuid() will be 597 * 100% compatible with POSIX with saved IDs. 598 */ 599 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) 600 { 601 const struct cred *old; 602 struct cred *new; 603 int retval; 604 605 new = prepare_creds(); 606 if (!new) 607 return -ENOMEM; 608 old = current_cred(); 609 610 retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); 611 if (retval) 612 goto error; 613 614 retval = -EPERM; 615 if (ruid != (uid_t) -1) { 616 new->uid = ruid; 617 if (old->uid != ruid && 618 old->euid != ruid && 619 !capable(CAP_SETUID)) 620 goto error; 621 } 622 623 if (euid != (uid_t) -1) { 624 new->euid = euid; 625 if (old->uid != euid && 626 old->euid != euid && 627 old->suid != euid && 628 !capable(CAP_SETUID)) 629 goto error; 630 } 631 632 retval = -EAGAIN; 633 if (new->uid != old->uid && set_user(new) < 0) 634 goto error; 635 636 if (ruid != (uid_t) -1 || 637 (euid != (uid_t) -1 && euid != old->uid)) 638 new->suid = new->euid; 639 new->fsuid = new->euid; 640 641 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 642 if (retval < 0) 643 goto error; 644 645 return commit_creds(new); 646 647 error: 648 abort_creds(new); 649 return retval; 650 } 651 652 /* 653 * setuid() is implemented like SysV with SAVED_IDS 654 * 655 * Note that SAVED_ID's is deficient in that a setuid root program 656 * like sendmail, for example, cannot set its uid to be a normal 657 * user and then switch back, because if you're root, setuid() sets 658 * the saved uid too. If you don't like this, blame the bright people 659 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 660 * will allow a root program to temporarily drop privileges and be able to 661 * regain them by swapping the real and effective uid. 662 */ 663 asmlinkage long sys_setuid(uid_t uid) 664 { 665 const struct cred *old; 666 struct cred *new; 667 int retval; 668 669 new = prepare_creds(); 670 if (!new) 671 return -ENOMEM; 672 old = current_cred(); 673 674 retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); 675 if (retval) 676 goto error; 677 678 retval = -EPERM; 679 if (capable(CAP_SETUID)) { 680 new->suid = new->uid = uid; 681 if (uid != old->uid && set_user(new) < 0) { 682 retval = -EAGAIN; 683 goto error; 684 } 685 } else if (uid != old->uid && uid != new->suid) { 686 goto error; 687 } 688 689 new->fsuid = new->euid = uid; 690 691 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 692 if (retval < 0) 693 goto error; 694 695 return commit_creds(new); 696 697 error: 698 abort_creds(new); 699 return retval; 700 } 701 702 703 /* 704 * This function implements a generic ability to update ruid, euid, 705 * and suid. This allows you to implement the 4.4 compatible seteuid(). 706 */ 707 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 708 { 709 const struct cred *old; 710 struct cred *new; 711 int retval; 712 713 new = prepare_creds(); 714 if (!new) 715 return -ENOMEM; 716 717 retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); 718 if (retval) 719 goto error; 720 old = current_cred(); 721 722 retval = -EPERM; 723 if (!capable(CAP_SETUID)) { 724 if (ruid != (uid_t) -1 && ruid != old->uid && 725 ruid != old->euid && ruid != old->suid) 726 goto error; 727 if (euid != (uid_t) -1 && euid != old->uid && 728 euid != old->euid && euid != old->suid) 729 goto error; 730 if (suid != (uid_t) -1 && suid != old->uid && 731 suid != old->euid && suid != old->suid) 732 goto error; 733 } 734 735 retval = -EAGAIN; 736 if (ruid != (uid_t) -1) { 737 new->uid = ruid; 738 if (ruid != old->uid && set_user(new) < 0) 739 goto error; 740 } 741 if (euid != (uid_t) -1) 742 new->euid = euid; 743 if (suid != (uid_t) -1) 744 new->suid = suid; 745 new->fsuid = new->euid; 746 747 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 748 if (retval < 0) 749 goto error; 750 751 return commit_creds(new); 752 753 error: 754 abort_creds(new); 755 return retval; 756 } 757 758 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) 759 { 760 const struct cred *cred = current_cred(); 761 int retval; 762 763 if (!(retval = put_user(cred->uid, ruid)) && 764 !(retval = put_user(cred->euid, euid))) 765 retval = put_user(cred->suid, suid); 766 767 return retval; 768 } 769 770 /* 771 * Same as above, but for rgid, egid, sgid. 772 */ 773 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 774 { 775 const struct cred *old; 776 struct cred *new; 777 int retval; 778 779 new = prepare_creds(); 780 if (!new) 781 return -ENOMEM; 782 old = current_cred(); 783 784 retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); 785 if (retval) 786 goto error; 787 788 retval = -EPERM; 789 if (!capable(CAP_SETGID)) { 790 if (rgid != (gid_t) -1 && rgid != old->gid && 791 rgid != old->egid && rgid != old->sgid) 792 goto error; 793 if (egid != (gid_t) -1 && egid != old->gid && 794 egid != old->egid && egid != old->sgid) 795 goto error; 796 if (sgid != (gid_t) -1 && sgid != old->gid && 797 sgid != old->egid && sgid != old->sgid) 798 goto error; 799 } 800 801 if (rgid != (gid_t) -1) 802 new->gid = rgid; 803 if (egid != (gid_t) -1) 804 new->egid = egid; 805 if (sgid != (gid_t) -1) 806 new->sgid = sgid; 807 new->fsgid = new->egid; 808 809 return commit_creds(new); 810 811 error: 812 abort_creds(new); 813 return retval; 814 } 815 816 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) 817 { 818 const struct cred *cred = current_cred(); 819 int retval; 820 821 if (!(retval = put_user(cred->gid, rgid)) && 822 !(retval = put_user(cred->egid, egid))) 823 retval = put_user(cred->sgid, sgid); 824 825 return retval; 826 } 827 828 829 /* 830 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 831 * is used for "access()" and for the NFS daemon (letting nfsd stay at 832 * whatever uid it wants to). It normally shadows "euid", except when 833 * explicitly set by setfsuid() or for access.. 834 */ 835 asmlinkage long sys_setfsuid(uid_t uid) 836 { 837 const struct cred *old; 838 struct cred *new; 839 uid_t old_fsuid; 840 841 new = prepare_creds(); 842 if (!new) 843 return current_fsuid(); 844 old = current_cred(); 845 old_fsuid = old->fsuid; 846 847 if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0) 848 goto error; 849 850 if (uid == old->uid || uid == old->euid || 851 uid == old->suid || uid == old->fsuid || 852 capable(CAP_SETUID)) { 853 if (uid != old_fsuid) { 854 new->fsuid = uid; 855 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 856 goto change_okay; 857 } 858 } 859 860 error: 861 abort_creds(new); 862 return old_fsuid; 863 864 change_okay: 865 commit_creds(new); 866 return old_fsuid; 867 } 868 869 /* 870 * Samma på svenska.. 871 */ 872 asmlinkage long sys_setfsgid(gid_t gid) 873 { 874 const struct cred *old; 875 struct cred *new; 876 gid_t old_fsgid; 877 878 new = prepare_creds(); 879 if (!new) 880 return current_fsgid(); 881 old = current_cred(); 882 old_fsgid = old->fsgid; 883 884 if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) 885 goto error; 886 887 if (gid == old->gid || gid == old->egid || 888 gid == old->sgid || gid == old->fsgid || 889 capable(CAP_SETGID)) { 890 if (gid != old_fsgid) { 891 new->fsgid = gid; 892 goto change_okay; 893 } 894 } 895 896 error: 897 abort_creds(new); 898 return old_fsgid; 899 900 change_okay: 901 commit_creds(new); 902 return old_fsgid; 903 } 904 905 void do_sys_times(struct tms *tms) 906 { 907 struct task_cputime cputime; 908 cputime_t cutime, cstime; 909 910 thread_group_cputime(current, &cputime); 911 spin_lock_irq(¤t->sighand->siglock); 912 cutime = current->signal->cutime; 913 cstime = current->signal->cstime; 914 spin_unlock_irq(¤t->sighand->siglock); 915 tms->tms_utime = cputime_to_clock_t(cputime.utime); 916 tms->tms_stime = cputime_to_clock_t(cputime.stime); 917 tms->tms_cutime = cputime_to_clock_t(cutime); 918 tms->tms_cstime = cputime_to_clock_t(cstime); 919 } 920 921 asmlinkage long sys_times(struct tms __user * tbuf) 922 { 923 if (tbuf) { 924 struct tms tmp; 925 926 do_sys_times(&tmp); 927 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 928 return -EFAULT; 929 } 930 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 931 } 932 933 /* 934 * This needs some heavy checking ... 935 * I just haven't the stomach for it. I also don't fully 936 * understand sessions/pgrp etc. Let somebody who does explain it. 937 * 938 * OK, I think I have the protection semantics right.... this is really 939 * only important on a multi-user system anyway, to make sure one user 940 * can't send a signal to a process owned by another. -TYT, 12/12/91 941 * 942 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 943 * LBT 04.03.94 944 */ 945 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 946 { 947 struct task_struct *p; 948 struct task_struct *group_leader = current->group_leader; 949 struct pid *pgrp; 950 int err; 951 952 if (!pid) 953 pid = task_pid_vnr(group_leader); 954 if (!pgid) 955 pgid = pid; 956 if (pgid < 0) 957 return -EINVAL; 958 959 /* From this point forward we keep holding onto the tasklist lock 960 * so that our parent does not change from under us. -DaveM 961 */ 962 write_lock_irq(&tasklist_lock); 963 964 err = -ESRCH; 965 p = find_task_by_vpid(pid); 966 if (!p) 967 goto out; 968 969 err = -EINVAL; 970 if (!thread_group_leader(p)) 971 goto out; 972 973 if (same_thread_group(p->real_parent, group_leader)) { 974 err = -EPERM; 975 if (task_session(p) != task_session(group_leader)) 976 goto out; 977 err = -EACCES; 978 if (p->did_exec) 979 goto out; 980 } else { 981 err = -ESRCH; 982 if (p != group_leader) 983 goto out; 984 } 985 986 err = -EPERM; 987 if (p->signal->leader) 988 goto out; 989 990 pgrp = task_pid(p); 991 if (pgid != pid) { 992 struct task_struct *g; 993 994 pgrp = find_vpid(pgid); 995 g = pid_task(pgrp, PIDTYPE_PGID); 996 if (!g || task_session(g) != task_session(group_leader)) 997 goto out; 998 } 999 1000 err = security_task_setpgid(p, pgid); 1001 if (err) 1002 goto out; 1003 1004 if (task_pgrp(p) != pgrp) { 1005 change_pid(p, PIDTYPE_PGID, pgrp); 1006 set_task_pgrp(p, pid_nr(pgrp)); 1007 } 1008 1009 err = 0; 1010 out: 1011 /* All paths lead to here, thus we are safe. -DaveM */ 1012 write_unlock_irq(&tasklist_lock); 1013 return err; 1014 } 1015 1016 asmlinkage long sys_getpgid(pid_t pid) 1017 { 1018 struct task_struct *p; 1019 struct pid *grp; 1020 int retval; 1021 1022 rcu_read_lock(); 1023 if (!pid) 1024 grp = task_pgrp(current); 1025 else { 1026 retval = -ESRCH; 1027 p = find_task_by_vpid(pid); 1028 if (!p) 1029 goto out; 1030 grp = task_pgrp(p); 1031 if (!grp) 1032 goto out; 1033 1034 retval = security_task_getpgid(p); 1035 if (retval) 1036 goto out; 1037 } 1038 retval = pid_vnr(grp); 1039 out: 1040 rcu_read_unlock(); 1041 return retval; 1042 } 1043 1044 #ifdef __ARCH_WANT_SYS_GETPGRP 1045 1046 asmlinkage long sys_getpgrp(void) 1047 { 1048 return sys_getpgid(0); 1049 } 1050 1051 #endif 1052 1053 asmlinkage long sys_getsid(pid_t pid) 1054 { 1055 struct task_struct *p; 1056 struct pid *sid; 1057 int retval; 1058 1059 rcu_read_lock(); 1060 if (!pid) 1061 sid = task_session(current); 1062 else { 1063 retval = -ESRCH; 1064 p = find_task_by_vpid(pid); 1065 if (!p) 1066 goto out; 1067 sid = task_session(p); 1068 if (!sid) 1069 goto out; 1070 1071 retval = security_task_getsid(p); 1072 if (retval) 1073 goto out; 1074 } 1075 retval = pid_vnr(sid); 1076 out: 1077 rcu_read_unlock(); 1078 return retval; 1079 } 1080 1081 asmlinkage long sys_setsid(void) 1082 { 1083 struct task_struct *group_leader = current->group_leader; 1084 struct pid *sid = task_pid(group_leader); 1085 pid_t session = pid_vnr(sid); 1086 int err = -EPERM; 1087 1088 write_lock_irq(&tasklist_lock); 1089 /* Fail if I am already a session leader */ 1090 if (group_leader->signal->leader) 1091 goto out; 1092 1093 /* Fail if a process group id already exists that equals the 1094 * proposed session id. 1095 */ 1096 if (pid_task(sid, PIDTYPE_PGID)) 1097 goto out; 1098 1099 group_leader->signal->leader = 1; 1100 __set_special_pids(sid); 1101 1102 proc_clear_tty(group_leader); 1103 1104 err = session; 1105 out: 1106 write_unlock_irq(&tasklist_lock); 1107 return err; 1108 } 1109 1110 /* 1111 * Supplementary group IDs 1112 */ 1113 1114 /* init to 2 - one for init_task, one to ensure it is never freed */ 1115 struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; 1116 1117 struct group_info *groups_alloc(int gidsetsize) 1118 { 1119 struct group_info *group_info; 1120 int nblocks; 1121 int i; 1122 1123 nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK; 1124 /* Make sure we always allocate at least one indirect block pointer */ 1125 nblocks = nblocks ? : 1; 1126 group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER); 1127 if (!group_info) 1128 return NULL; 1129 group_info->ngroups = gidsetsize; 1130 group_info->nblocks = nblocks; 1131 atomic_set(&group_info->usage, 1); 1132 1133 if (gidsetsize <= NGROUPS_SMALL) 1134 group_info->blocks[0] = group_info->small_block; 1135 else { 1136 for (i = 0; i < nblocks; i++) { 1137 gid_t *b; 1138 b = (void *)__get_free_page(GFP_USER); 1139 if (!b) 1140 goto out_undo_partial_alloc; 1141 group_info->blocks[i] = b; 1142 } 1143 } 1144 return group_info; 1145 1146 out_undo_partial_alloc: 1147 while (--i >= 0) { 1148 free_page((unsigned long)group_info->blocks[i]); 1149 } 1150 kfree(group_info); 1151 return NULL; 1152 } 1153 1154 EXPORT_SYMBOL(groups_alloc); 1155 1156 void groups_free(struct group_info *group_info) 1157 { 1158 if (group_info->blocks[0] != group_info->small_block) { 1159 int i; 1160 for (i = 0; i < group_info->nblocks; i++) 1161 free_page((unsigned long)group_info->blocks[i]); 1162 } 1163 kfree(group_info); 1164 } 1165 1166 EXPORT_SYMBOL(groups_free); 1167 1168 /* export the group_info to a user-space array */ 1169 static int groups_to_user(gid_t __user *grouplist, 1170 const struct group_info *group_info) 1171 { 1172 int i; 1173 unsigned int count = group_info->ngroups; 1174 1175 for (i = 0; i < group_info->nblocks; i++) { 1176 unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); 1177 unsigned int len = cp_count * sizeof(*grouplist); 1178 1179 if (copy_to_user(grouplist, group_info->blocks[i], len)) 1180 return -EFAULT; 1181 1182 grouplist += NGROUPS_PER_BLOCK; 1183 count -= cp_count; 1184 } 1185 return 0; 1186 } 1187 1188 /* fill a group_info from a user-space array - it must be allocated already */ 1189 static int groups_from_user(struct group_info *group_info, 1190 gid_t __user *grouplist) 1191 { 1192 int i; 1193 unsigned int count = group_info->ngroups; 1194 1195 for (i = 0; i < group_info->nblocks; i++) { 1196 unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); 1197 unsigned int len = cp_count * sizeof(*grouplist); 1198 1199 if (copy_from_user(group_info->blocks[i], grouplist, len)) 1200 return -EFAULT; 1201 1202 grouplist += NGROUPS_PER_BLOCK; 1203 count -= cp_count; 1204 } 1205 return 0; 1206 } 1207 1208 /* a simple Shell sort */ 1209 static void groups_sort(struct group_info *group_info) 1210 { 1211 int base, max, stride; 1212 int gidsetsize = group_info->ngroups; 1213 1214 for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) 1215 ; /* nothing */ 1216 stride /= 3; 1217 1218 while (stride) { 1219 max = gidsetsize - stride; 1220 for (base = 0; base < max; base++) { 1221 int left = base; 1222 int right = left + stride; 1223 gid_t tmp = GROUP_AT(group_info, right); 1224 1225 while (left >= 0 && GROUP_AT(group_info, left) > tmp) { 1226 GROUP_AT(group_info, right) = 1227 GROUP_AT(group_info, left); 1228 right = left; 1229 left -= stride; 1230 } 1231 GROUP_AT(group_info, right) = tmp; 1232 } 1233 stride /= 3; 1234 } 1235 } 1236 1237 /* a simple bsearch */ 1238 int groups_search(const struct group_info *group_info, gid_t grp) 1239 { 1240 unsigned int left, right; 1241 1242 if (!group_info) 1243 return 0; 1244 1245 left = 0; 1246 right = group_info->ngroups; 1247 while (left < right) { 1248 unsigned int mid = (left+right)/2; 1249 int cmp = grp - GROUP_AT(group_info, mid); 1250 if (cmp > 0) 1251 left = mid + 1; 1252 else if (cmp < 0) 1253 right = mid; 1254 else 1255 return 1; 1256 } 1257 return 0; 1258 } 1259 1260 /** 1261 * set_groups - Change a group subscription in a set of credentials 1262 * @new: The newly prepared set of credentials to alter 1263 * @group_info: The group list to install 1264 * 1265 * Validate a group subscription and, if valid, insert it into a set 1266 * of credentials. 1267 */ 1268 int set_groups(struct cred *new, struct group_info *group_info) 1269 { 1270 int retval; 1271 1272 retval = security_task_setgroups(group_info); 1273 if (retval) 1274 return retval; 1275 1276 put_group_info(new->group_info); 1277 groups_sort(group_info); 1278 get_group_info(group_info); 1279 new->group_info = group_info; 1280 return 0; 1281 } 1282 1283 EXPORT_SYMBOL(set_groups); 1284 1285 /** 1286 * set_current_groups - Change current's group subscription 1287 * @group_info: The group list to impose 1288 * 1289 * Validate a group subscription and, if valid, impose it upon current's task 1290 * security record. 1291 */ 1292 int set_current_groups(struct group_info *group_info) 1293 { 1294 struct cred *new; 1295 int ret; 1296 1297 new = prepare_creds(); 1298 if (!new) 1299 return -ENOMEM; 1300 1301 ret = set_groups(new, group_info); 1302 if (ret < 0) { 1303 abort_creds(new); 1304 return ret; 1305 } 1306 1307 return commit_creds(new); 1308 } 1309 1310 EXPORT_SYMBOL(set_current_groups); 1311 1312 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) 1313 { 1314 const struct cred *cred = current_cred(); 1315 int i; 1316 1317 if (gidsetsize < 0) 1318 return -EINVAL; 1319 1320 /* no need to grab task_lock here; it cannot change */ 1321 i = cred->group_info->ngroups; 1322 if (gidsetsize) { 1323 if (i > gidsetsize) { 1324 i = -EINVAL; 1325 goto out; 1326 } 1327 if (groups_to_user(grouplist, cred->group_info)) { 1328 i = -EFAULT; 1329 goto out; 1330 } 1331 } 1332 out: 1333 return i; 1334 } 1335 1336 /* 1337 * SMP: Our groups are copy-on-write. We can set them safely 1338 * without another task interfering. 1339 */ 1340 1341 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) 1342 { 1343 struct group_info *group_info; 1344 int retval; 1345 1346 if (!capable(CAP_SETGID)) 1347 return -EPERM; 1348 if ((unsigned)gidsetsize > NGROUPS_MAX) 1349 return -EINVAL; 1350 1351 group_info = groups_alloc(gidsetsize); 1352 if (!group_info) 1353 return -ENOMEM; 1354 retval = groups_from_user(group_info, grouplist); 1355 if (retval) { 1356 put_group_info(group_info); 1357 return retval; 1358 } 1359 1360 retval = set_current_groups(group_info); 1361 put_group_info(group_info); 1362 1363 return retval; 1364 } 1365 1366 /* 1367 * Check whether we're fsgid/egid or in the supplemental group.. 1368 */ 1369 int in_group_p(gid_t grp) 1370 { 1371 const struct cred *cred = current_cred(); 1372 int retval = 1; 1373 1374 if (grp != cred->fsgid) 1375 retval = groups_search(cred->group_info, grp); 1376 return retval; 1377 } 1378 1379 EXPORT_SYMBOL(in_group_p); 1380 1381 int in_egroup_p(gid_t grp) 1382 { 1383 const struct cred *cred = current_cred(); 1384 int retval = 1; 1385 1386 if (grp != cred->egid) 1387 retval = groups_search(cred->group_info, grp); 1388 return retval; 1389 } 1390 1391 EXPORT_SYMBOL(in_egroup_p); 1392 1393 DECLARE_RWSEM(uts_sem); 1394 1395 asmlinkage long sys_newuname(struct new_utsname __user * name) 1396 { 1397 int errno = 0; 1398 1399 down_read(&uts_sem); 1400 if (copy_to_user(name, utsname(), sizeof *name)) 1401 errno = -EFAULT; 1402 up_read(&uts_sem); 1403 return errno; 1404 } 1405 1406 asmlinkage long sys_sethostname(char __user *name, int len) 1407 { 1408 int errno; 1409 char tmp[__NEW_UTS_LEN]; 1410 1411 if (!capable(CAP_SYS_ADMIN)) 1412 return -EPERM; 1413 if (len < 0 || len > __NEW_UTS_LEN) 1414 return -EINVAL; 1415 down_write(&uts_sem); 1416 errno = -EFAULT; 1417 if (!copy_from_user(tmp, name, len)) { 1418 struct new_utsname *u = utsname(); 1419 1420 memcpy(u->nodename, tmp, len); 1421 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1422 errno = 0; 1423 } 1424 up_write(&uts_sem); 1425 return errno; 1426 } 1427 1428 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1429 1430 asmlinkage long sys_gethostname(char __user *name, int len) 1431 { 1432 int i, errno; 1433 struct new_utsname *u; 1434 1435 if (len < 0) 1436 return -EINVAL; 1437 down_read(&uts_sem); 1438 u = utsname(); 1439 i = 1 + strlen(u->nodename); 1440 if (i > len) 1441 i = len; 1442 errno = 0; 1443 if (copy_to_user(name, u->nodename, i)) 1444 errno = -EFAULT; 1445 up_read(&uts_sem); 1446 return errno; 1447 } 1448 1449 #endif 1450 1451 /* 1452 * Only setdomainname; getdomainname can be implemented by calling 1453 * uname() 1454 */ 1455 asmlinkage long sys_setdomainname(char __user *name, int len) 1456 { 1457 int errno; 1458 char tmp[__NEW_UTS_LEN]; 1459 1460 if (!capable(CAP_SYS_ADMIN)) 1461 return -EPERM; 1462 if (len < 0 || len > __NEW_UTS_LEN) 1463 return -EINVAL; 1464 1465 down_write(&uts_sem); 1466 errno = -EFAULT; 1467 if (!copy_from_user(tmp, name, len)) { 1468 struct new_utsname *u = utsname(); 1469 1470 memcpy(u->domainname, tmp, len); 1471 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1472 errno = 0; 1473 } 1474 up_write(&uts_sem); 1475 return errno; 1476 } 1477 1478 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1479 { 1480 if (resource >= RLIM_NLIMITS) 1481 return -EINVAL; 1482 else { 1483 struct rlimit value; 1484 task_lock(current->group_leader); 1485 value = current->signal->rlim[resource]; 1486 task_unlock(current->group_leader); 1487 return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1488 } 1489 } 1490 1491 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1492 1493 /* 1494 * Back compatibility for getrlimit. Needed for some apps. 1495 */ 1496 1497 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1498 { 1499 struct rlimit x; 1500 if (resource >= RLIM_NLIMITS) 1501 return -EINVAL; 1502 1503 task_lock(current->group_leader); 1504 x = current->signal->rlim[resource]; 1505 task_unlock(current->group_leader); 1506 if (x.rlim_cur > 0x7FFFFFFF) 1507 x.rlim_cur = 0x7FFFFFFF; 1508 if (x.rlim_max > 0x7FFFFFFF) 1509 x.rlim_max = 0x7FFFFFFF; 1510 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1511 } 1512 1513 #endif 1514 1515 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1516 { 1517 struct rlimit new_rlim, *old_rlim; 1518 int retval; 1519 1520 if (resource >= RLIM_NLIMITS) 1521 return -EINVAL; 1522 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1523 return -EFAULT; 1524 old_rlim = current->signal->rlim + resource; 1525 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1526 !capable(CAP_SYS_RESOURCE)) 1527 return -EPERM; 1528 1529 if (resource == RLIMIT_NOFILE) { 1530 if (new_rlim.rlim_max == RLIM_INFINITY) 1531 new_rlim.rlim_max = sysctl_nr_open; 1532 if (new_rlim.rlim_cur == RLIM_INFINITY) 1533 new_rlim.rlim_cur = sysctl_nr_open; 1534 if (new_rlim.rlim_max > sysctl_nr_open) 1535 return -EPERM; 1536 } 1537 1538 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1539 return -EINVAL; 1540 1541 retval = security_task_setrlimit(resource, &new_rlim); 1542 if (retval) 1543 return retval; 1544 1545 if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) { 1546 /* 1547 * The caller is asking for an immediate RLIMIT_CPU 1548 * expiry. But we use the zero value to mean "it was 1549 * never set". So let's cheat and make it one second 1550 * instead 1551 */ 1552 new_rlim.rlim_cur = 1; 1553 } 1554 1555 task_lock(current->group_leader); 1556 *old_rlim = new_rlim; 1557 task_unlock(current->group_leader); 1558 1559 if (resource != RLIMIT_CPU) 1560 goto out; 1561 1562 /* 1563 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1564 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1565 * very long-standing error, and fixing it now risks breakage of 1566 * applications, so we live with it 1567 */ 1568 if (new_rlim.rlim_cur == RLIM_INFINITY) 1569 goto out; 1570 1571 update_rlimit_cpu(new_rlim.rlim_cur); 1572 out: 1573 return 0; 1574 } 1575 1576 /* 1577 * It would make sense to put struct rusage in the task_struct, 1578 * except that would make the task_struct be *really big*. After 1579 * task_struct gets moved into malloc'ed memory, it would 1580 * make sense to do this. It will make moving the rest of the information 1581 * a lot simpler! (Which we're not doing right now because we're not 1582 * measuring them yet). 1583 * 1584 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1585 * races with threads incrementing their own counters. But since word 1586 * reads are atomic, we either get new values or old values and we don't 1587 * care which for the sums. We always take the siglock to protect reading 1588 * the c* fields from p->signal from races with exit.c updating those 1589 * fields when reaping, so a sample either gets all the additions of a 1590 * given child after it's reaped, or none so this sample is before reaping. 1591 * 1592 * Locking: 1593 * We need to take the siglock for CHILDEREN, SELF and BOTH 1594 * for the cases current multithreaded, non-current single threaded 1595 * non-current multithreaded. Thread traversal is now safe with 1596 * the siglock held. 1597 * Strictly speaking, we donot need to take the siglock if we are current and 1598 * single threaded, as no one else can take our signal_struct away, no one 1599 * else can reap the children to update signal->c* counters, and no one else 1600 * can race with the signal-> fields. If we do not take any lock, the 1601 * signal-> fields could be read out of order while another thread was just 1602 * exiting. So we should place a read memory barrier when we avoid the lock. 1603 * On the writer side, write memory barrier is implied in __exit_signal 1604 * as __exit_signal releases the siglock spinlock after updating the signal-> 1605 * fields. But we don't do this yet to keep things simple. 1606 * 1607 */ 1608 1609 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1610 { 1611 r->ru_nvcsw += t->nvcsw; 1612 r->ru_nivcsw += t->nivcsw; 1613 r->ru_minflt += t->min_flt; 1614 r->ru_majflt += t->maj_flt; 1615 r->ru_inblock += task_io_get_inblock(t); 1616 r->ru_oublock += task_io_get_oublock(t); 1617 } 1618 1619 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1620 { 1621 struct task_struct *t; 1622 unsigned long flags; 1623 cputime_t utime, stime; 1624 struct task_cputime cputime; 1625 1626 memset((char *) r, 0, sizeof *r); 1627 utime = stime = cputime_zero; 1628 1629 if (who == RUSAGE_THREAD) { 1630 accumulate_thread_rusage(p, r); 1631 goto out; 1632 } 1633 1634 if (!lock_task_sighand(p, &flags)) 1635 return; 1636 1637 switch (who) { 1638 case RUSAGE_BOTH: 1639 case RUSAGE_CHILDREN: 1640 utime = p->signal->cutime; 1641 stime = p->signal->cstime; 1642 r->ru_nvcsw = p->signal->cnvcsw; 1643 r->ru_nivcsw = p->signal->cnivcsw; 1644 r->ru_minflt = p->signal->cmin_flt; 1645 r->ru_majflt = p->signal->cmaj_flt; 1646 r->ru_inblock = p->signal->cinblock; 1647 r->ru_oublock = p->signal->coublock; 1648 1649 if (who == RUSAGE_CHILDREN) 1650 break; 1651 1652 case RUSAGE_SELF: 1653 thread_group_cputime(p, &cputime); 1654 utime = cputime_add(utime, cputime.utime); 1655 stime = cputime_add(stime, cputime.stime); 1656 r->ru_nvcsw += p->signal->nvcsw; 1657 r->ru_nivcsw += p->signal->nivcsw; 1658 r->ru_minflt += p->signal->min_flt; 1659 r->ru_majflt += p->signal->maj_flt; 1660 r->ru_inblock += p->signal->inblock; 1661 r->ru_oublock += p->signal->oublock; 1662 t = p; 1663 do { 1664 accumulate_thread_rusage(t, r); 1665 t = next_thread(t); 1666 } while (t != p); 1667 break; 1668 1669 default: 1670 BUG(); 1671 } 1672 unlock_task_sighand(p, &flags); 1673 1674 out: 1675 cputime_to_timeval(utime, &r->ru_utime); 1676 cputime_to_timeval(stime, &r->ru_stime); 1677 } 1678 1679 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1680 { 1681 struct rusage r; 1682 k_getrusage(p, who, &r); 1683 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1684 } 1685 1686 asmlinkage long sys_getrusage(int who, struct rusage __user *ru) 1687 { 1688 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1689 who != RUSAGE_THREAD) 1690 return -EINVAL; 1691 return getrusage(current, who, ru); 1692 } 1693 1694 asmlinkage long sys_umask(int mask) 1695 { 1696 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1697 return mask; 1698 } 1699 1700 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 1701 unsigned long arg4, unsigned long arg5) 1702 { 1703 struct task_struct *me = current; 1704 unsigned char comm[sizeof(me->comm)]; 1705 long error; 1706 1707 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 1708 if (error != -ENOSYS) 1709 return error; 1710 1711 error = 0; 1712 switch (option) { 1713 case PR_SET_PDEATHSIG: 1714 if (!valid_signal(arg2)) { 1715 error = -EINVAL; 1716 break; 1717 } 1718 me->pdeath_signal = arg2; 1719 error = 0; 1720 break; 1721 case PR_GET_PDEATHSIG: 1722 error = put_user(me->pdeath_signal, (int __user *)arg2); 1723 break; 1724 case PR_GET_DUMPABLE: 1725 error = get_dumpable(me->mm); 1726 break; 1727 case PR_SET_DUMPABLE: 1728 if (arg2 < 0 || arg2 > 1) { 1729 error = -EINVAL; 1730 break; 1731 } 1732 set_dumpable(me->mm, arg2); 1733 error = 0; 1734 break; 1735 1736 case PR_SET_UNALIGN: 1737 error = SET_UNALIGN_CTL(me, arg2); 1738 break; 1739 case PR_GET_UNALIGN: 1740 error = GET_UNALIGN_CTL(me, arg2); 1741 break; 1742 case PR_SET_FPEMU: 1743 error = SET_FPEMU_CTL(me, arg2); 1744 break; 1745 case PR_GET_FPEMU: 1746 error = GET_FPEMU_CTL(me, arg2); 1747 break; 1748 case PR_SET_FPEXC: 1749 error = SET_FPEXC_CTL(me, arg2); 1750 break; 1751 case PR_GET_FPEXC: 1752 error = GET_FPEXC_CTL(me, arg2); 1753 break; 1754 case PR_GET_TIMING: 1755 error = PR_TIMING_STATISTICAL; 1756 break; 1757 case PR_SET_TIMING: 1758 if (arg2 != PR_TIMING_STATISTICAL) 1759 error = -EINVAL; 1760 else 1761 error = 0; 1762 break; 1763 1764 case PR_SET_NAME: 1765 comm[sizeof(me->comm)-1] = 0; 1766 if (strncpy_from_user(comm, (char __user *)arg2, 1767 sizeof(me->comm) - 1) < 0) 1768 return -EFAULT; 1769 set_task_comm(me, comm); 1770 return 0; 1771 case PR_GET_NAME: 1772 get_task_comm(comm, me); 1773 if (copy_to_user((char __user *)arg2, comm, 1774 sizeof(comm))) 1775 return -EFAULT; 1776 return 0; 1777 case PR_GET_ENDIAN: 1778 error = GET_ENDIAN(me, arg2); 1779 break; 1780 case PR_SET_ENDIAN: 1781 error = SET_ENDIAN(me, arg2); 1782 break; 1783 1784 case PR_GET_SECCOMP: 1785 error = prctl_get_seccomp(); 1786 break; 1787 case PR_SET_SECCOMP: 1788 error = prctl_set_seccomp(arg2); 1789 break; 1790 case PR_GET_TSC: 1791 error = GET_TSC_CTL(arg2); 1792 break; 1793 case PR_SET_TSC: 1794 error = SET_TSC_CTL(arg2); 1795 break; 1796 case PR_GET_TIMERSLACK: 1797 error = current->timer_slack_ns; 1798 break; 1799 case PR_SET_TIMERSLACK: 1800 if (arg2 <= 0) 1801 current->timer_slack_ns = 1802 current->default_timer_slack_ns; 1803 else 1804 current->timer_slack_ns = arg2; 1805 error = 0; 1806 break; 1807 default: 1808 error = -EINVAL; 1809 break; 1810 } 1811 return error; 1812 } 1813 1814 asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, 1815 struct getcpu_cache __user *unused) 1816 { 1817 int err = 0; 1818 int cpu = raw_smp_processor_id(); 1819 if (cpup) 1820 err |= put_user(cpu, cpup); 1821 if (nodep) 1822 err |= put_user(cpu_to_node(cpu), nodep); 1823 return err ? -EFAULT : 0; 1824 } 1825 1826 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 1827 1828 static void argv_cleanup(char **argv, char **envp) 1829 { 1830 argv_free(argv); 1831 } 1832 1833 /** 1834 * orderly_poweroff - Trigger an orderly system poweroff 1835 * @force: force poweroff if command execution fails 1836 * 1837 * This may be called from any context to trigger a system shutdown. 1838 * If the orderly shutdown fails, it will force an immediate shutdown. 1839 */ 1840 int orderly_poweroff(bool force) 1841 { 1842 int argc; 1843 char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 1844 static char *envp[] = { 1845 "HOME=/", 1846 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 1847 NULL 1848 }; 1849 int ret = -ENOMEM; 1850 struct subprocess_info *info; 1851 1852 if (argv == NULL) { 1853 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 1854 __func__, poweroff_cmd); 1855 goto out; 1856 } 1857 1858 info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); 1859 if (info == NULL) { 1860 argv_free(argv); 1861 goto out; 1862 } 1863 1864 call_usermodehelper_setcleanup(info, argv_cleanup); 1865 1866 ret = call_usermodehelper_exec(info, UMH_NO_WAIT); 1867 1868 out: 1869 if (ret && force) { 1870 printk(KERN_WARNING "Failed to start orderly shutdown: " 1871 "forcing the issue\n"); 1872 1873 /* I guess this should try to kick off some daemon to 1874 sync and poweroff asap. Or not even bother syncing 1875 if we're doing an emergency shutdown? */ 1876 emergency_sync(); 1877 kernel_power_off(); 1878 } 1879 1880 return ret; 1881 } 1882 EXPORT_SYMBOL_GPL(orderly_poweroff); 1883