1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/procctl.h> 55 #include <sys/reboot.h> 56 #include <sys/racct.h> 57 #include <sys/random.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/sysproto.h> 66 #include <sys/systm.h> 67 #include <sys/time.h> 68 #include <sys/vmmeter.h> 69 #include <sys/vnode.h> 70 #include <sys/wait.h> 71 #include <sys/cpuset.h> 72 #include <sys/uio.h> 73 74 #include <security/mac/mac_framework.h> 75 76 #include <vm/vm.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_map.h> 80 #include <vm/vm_extern.h> 81 #include <vm/vm_object.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 /** 103 * Special DTrace provider for the linuxulator. 104 * 105 * In this file we define the provider for the entire linuxulator. All 106 * modules (= files of the linuxulator) use it. 107 * 108 * We define a different name depending on the emulated bitsize, see 109 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 110 * native bitsize = linuxulator 111 * amd64, 32bit emulation = linuxulator32 112 */ 113 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 114 115 int stclohz; /* Statistics clock frequency */ 116 117 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 118 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 119 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 120 RLIMIT_MEMLOCK, RLIMIT_AS 121 }; 122 123 struct l_sysinfo { 124 l_long uptime; /* Seconds since boot */ 125 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 126 #define LINUX_SYSINFO_LOADS_SCALE 65536 127 l_ulong totalram; /* Total usable main memory size */ 128 l_ulong freeram; /* Available memory size */ 129 l_ulong sharedram; /* Amount of shared memory */ 130 l_ulong bufferram; /* Memory used by buffers */ 131 l_ulong totalswap; /* Total swap space size */ 132 l_ulong freeswap; /* swap space still available */ 133 l_ushort procs; /* Number of current processes */ 134 l_ushort pads; 135 l_ulong totalbig; 136 l_ulong freebig; 137 l_uint mem_unit; 138 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 139 }; 140 141 struct l_pselect6arg { 142 l_uintptr_t ss; 143 l_size_t ss_len; 144 }; 145 146 static int linux_utimensat_nsec_valid(l_long); 147 148 149 int 150 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 151 { 152 struct l_sysinfo sysinfo; 153 vm_object_t object; 154 int i, j; 155 struct timespec ts; 156 157 bzero(&sysinfo, sizeof(sysinfo)); 158 getnanouptime(&ts); 159 if (ts.tv_nsec != 0) 160 ts.tv_sec++; 161 sysinfo.uptime = ts.tv_sec; 162 163 /* Use the information from the mib to get our load averages */ 164 for (i = 0; i < 3; i++) 165 sysinfo.loads[i] = averunnable.ldavg[i] * 166 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 167 168 sysinfo.totalram = physmem * PAGE_SIZE; 169 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; 170 171 sysinfo.sharedram = 0; 172 mtx_lock(&vm_object_list_mtx); 173 TAILQ_FOREACH(object, &vm_object_list, object_list) 174 if (object->shadow_count > 1) 175 sysinfo.sharedram += object->resident_page_count; 176 mtx_unlock(&vm_object_list_mtx); 177 178 sysinfo.sharedram *= PAGE_SIZE; 179 sysinfo.bufferram = 0; 180 181 swap_pager_status(&i, &j); 182 sysinfo.totalswap = i * PAGE_SIZE; 183 sysinfo.freeswap = (i - j) * PAGE_SIZE; 184 185 sysinfo.procs = nprocs; 186 187 /* The following are only present in newer Linux kernels. */ 188 sysinfo.totalbig = 0; 189 sysinfo.freebig = 0; 190 sysinfo.mem_unit = 1; 191 192 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 193 } 194 195 #ifdef LINUX_LEGACY_SYSCALLS 196 int 197 linux_alarm(struct thread *td, struct linux_alarm_args *args) 198 { 199 struct itimerval it, old_it; 200 u_int secs; 201 int error; 202 203 #ifdef DEBUG 204 if (ldebug(alarm)) 205 printf(ARGS(alarm, "%u"), args->secs); 206 #endif 207 secs = args->secs; 208 /* 209 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 210 * to match kern_setitimer()'s limit to avoid error from it. 211 * 212 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 213 * platforms. 214 */ 215 if (secs > INT32_MAX / 2) 216 secs = INT32_MAX / 2; 217 218 it.it_value.tv_sec = secs; 219 it.it_value.tv_usec = 0; 220 timevalclear(&it.it_interval); 221 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 222 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 223 224 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 225 old_it.it_value.tv_usec >= 500000) 226 old_it.it_value.tv_sec++; 227 td->td_retval[0] = old_it.it_value.tv_sec; 228 return (0); 229 } 230 #endif 231 232 int 233 linux_brk(struct thread *td, struct linux_brk_args *args) 234 { 235 struct vmspace *vm = td->td_proc->p_vmspace; 236 uintptr_t new, old; 237 238 #ifdef DEBUG 239 if (ldebug(brk)) 240 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 241 #endif 242 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 243 new = (uintptr_t)args->dsend; 244 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 245 td->td_retval[0] = (register_t)new; 246 else 247 td->td_retval[0] = (register_t)old; 248 249 return (0); 250 } 251 252 #if defined(__i386__) 253 /* XXX: what about amd64/linux32? */ 254 255 int 256 linux_uselib(struct thread *td, struct linux_uselib_args *args) 257 { 258 struct nameidata ni; 259 struct vnode *vp; 260 struct exec *a_out; 261 vm_map_t map; 262 vm_map_entry_t entry; 263 struct vattr attr; 264 vm_offset_t vmaddr; 265 unsigned long file_offset; 266 unsigned long bss_size; 267 char *library; 268 ssize_t aresid; 269 int error; 270 bool locked, opened, textset; 271 272 LCONVPATHEXIST(td, args->library, &library); 273 274 #ifdef DEBUG 275 if (ldebug(uselib)) 276 printf(ARGS(uselib, "%s"), library); 277 #endif 278 279 a_out = NULL; 280 vp = NULL; 281 locked = false; 282 textset = false; 283 opened = false; 284 285 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 286 UIO_SYSSPACE, library, td); 287 error = namei(&ni); 288 LFREEPATH(library); 289 if (error) 290 goto cleanup; 291 292 vp = ni.ni_vp; 293 NDFREE(&ni, NDF_ONLY_PNBUF); 294 295 /* 296 * From here on down, we have a locked vnode that must be unlocked. 297 * XXX: The code below largely duplicates exec_check_permissions(). 298 */ 299 locked = true; 300 301 /* Executable? */ 302 error = VOP_GETATTR(vp, &attr, td->td_ucred); 303 if (error) 304 goto cleanup; 305 306 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 307 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 308 /* EACCESS is what exec(2) returns. */ 309 error = ENOEXEC; 310 goto cleanup; 311 } 312 313 /* Sensible size? */ 314 if (attr.va_size == 0) { 315 error = ENOEXEC; 316 goto cleanup; 317 } 318 319 /* Can we access it? */ 320 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 321 if (error) 322 goto cleanup; 323 324 /* 325 * XXX: This should use vn_open() so that it is properly authorized, 326 * and to reduce code redundancy all over the place here. 327 * XXX: Not really, it duplicates far more of exec_check_permissions() 328 * than vn_open(). 329 */ 330 #ifdef MAC 331 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 332 if (error) 333 goto cleanup; 334 #endif 335 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 336 if (error) 337 goto cleanup; 338 opened = true; 339 340 /* Pull in executable header into exec_map */ 341 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 342 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 343 if (error) 344 goto cleanup; 345 346 /* Is it a Linux binary ? */ 347 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 348 error = ENOEXEC; 349 goto cleanup; 350 } 351 352 /* 353 * While we are here, we should REALLY do some more checks 354 */ 355 356 /* Set file/virtual offset based on a.out variant. */ 357 switch ((int)(a_out->a_magic & 0xffff)) { 358 case 0413: /* ZMAGIC */ 359 file_offset = 1024; 360 break; 361 case 0314: /* QMAGIC */ 362 file_offset = 0; 363 break; 364 default: 365 error = ENOEXEC; 366 goto cleanup; 367 } 368 369 bss_size = round_page(a_out->a_bss); 370 371 /* Check various fields in header for validity/bounds. */ 372 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 373 error = ENOEXEC; 374 goto cleanup; 375 } 376 377 /* text + data can't exceed file size */ 378 if (a_out->a_data + a_out->a_text > attr.va_size) { 379 error = EFAULT; 380 goto cleanup; 381 } 382 383 /* 384 * text/data/bss must not exceed limits 385 * XXX - this is not complete. it should check current usage PLUS 386 * the resources needed by this library. 387 */ 388 PROC_LOCK(td->td_proc); 389 if (a_out->a_text > maxtsiz || 390 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 391 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 392 bss_size) != 0) { 393 PROC_UNLOCK(td->td_proc); 394 error = ENOMEM; 395 goto cleanup; 396 } 397 PROC_UNLOCK(td->td_proc); 398 399 /* 400 * Prevent more writers. 401 */ 402 error = VOP_SET_TEXT(vp); 403 if (error != 0) 404 goto cleanup; 405 textset = true; 406 407 /* 408 * Lock no longer needed 409 */ 410 locked = false; 411 VOP_UNLOCK(vp, 0); 412 413 /* 414 * Check if file_offset page aligned. Currently we cannot handle 415 * misalinged file offsets, and so we read in the entire image 416 * (what a waste). 417 */ 418 if (file_offset & PAGE_MASK) { 419 #ifdef DEBUG 420 printf("uselib: Non page aligned binary %lu\n", file_offset); 421 #endif 422 /* Map text+data read/write/execute */ 423 424 /* a_entry is the load address and is page aligned */ 425 vmaddr = trunc_page(a_out->a_entry); 426 427 /* get anon user mapping, read+write+execute */ 428 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 429 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 430 VM_PROT_ALL, VM_PROT_ALL, 0); 431 if (error) 432 goto cleanup; 433 434 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 435 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 436 td->td_ucred, NOCRED, &aresid, td); 437 if (error != 0) 438 goto cleanup; 439 if (aresid != 0) { 440 error = ENOEXEC; 441 goto cleanup; 442 } 443 } else { 444 #ifdef DEBUG 445 printf("uselib: Page aligned binary %lu\n", file_offset); 446 #endif 447 /* 448 * for QMAGIC, a_entry is 20 bytes beyond the load address 449 * to skip the executable header 450 */ 451 vmaddr = trunc_page(a_out->a_entry); 452 453 /* 454 * Map it all into the process's space as a single 455 * copy-on-write "data" segment. 456 */ 457 map = &td->td_proc->p_vmspace->vm_map; 458 error = vm_mmap(map, &vmaddr, 459 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 460 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 461 if (error) 462 goto cleanup; 463 vm_map_lock(map); 464 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 465 vm_map_unlock(map); 466 error = EDOOFUS; 467 goto cleanup; 468 } 469 entry->eflags |= MAP_ENTRY_VN_EXEC; 470 vm_map_unlock(map); 471 textset = false; 472 } 473 #ifdef DEBUG 474 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 475 ((long *)vmaddr)[1]); 476 #endif 477 if (bss_size != 0) { 478 /* Calculate BSS start address */ 479 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 480 a_out->a_data; 481 482 /* allocate some 'anon' space */ 483 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 484 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 485 VM_PROT_ALL, 0); 486 if (error) 487 goto cleanup; 488 } 489 490 cleanup: 491 if (opened) { 492 if (locked) 493 VOP_UNLOCK(vp, 0); 494 locked = false; 495 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 496 } 497 if (textset) { 498 if (!locked) { 499 locked = true; 500 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 501 } 502 VOP_UNSET_TEXT_CHECKED(vp); 503 } 504 if (locked) 505 VOP_UNLOCK(vp, 0); 506 507 /* Release the temporary mapping. */ 508 if (a_out) 509 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 510 511 return (error); 512 } 513 514 #endif /* __i386__ */ 515 516 #ifdef LINUX_LEGACY_SYSCALLS 517 int 518 linux_select(struct thread *td, struct linux_select_args *args) 519 { 520 l_timeval ltv; 521 struct timeval tv0, tv1, utv, *tvp; 522 int error; 523 524 #ifdef DEBUG 525 if (ldebug(select)) 526 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 527 (void *)args->readfds, (void *)args->writefds, 528 (void *)args->exceptfds, (void *)args->timeout); 529 #endif 530 531 /* 532 * Store current time for computation of the amount of 533 * time left. 534 */ 535 if (args->timeout) { 536 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 537 goto select_out; 538 utv.tv_sec = ltv.tv_sec; 539 utv.tv_usec = ltv.tv_usec; 540 #ifdef DEBUG 541 if (ldebug(select)) 542 printf(LMSG("incoming timeout (%jd/%ld)"), 543 (intmax_t)utv.tv_sec, utv.tv_usec); 544 #endif 545 546 if (itimerfix(&utv)) { 547 /* 548 * The timeval was invalid. Convert it to something 549 * valid that will act as it does under Linux. 550 */ 551 utv.tv_sec += utv.tv_usec / 1000000; 552 utv.tv_usec %= 1000000; 553 if (utv.tv_usec < 0) { 554 utv.tv_sec -= 1; 555 utv.tv_usec += 1000000; 556 } 557 if (utv.tv_sec < 0) 558 timevalclear(&utv); 559 } 560 microtime(&tv0); 561 tvp = &utv; 562 } else 563 tvp = NULL; 564 565 error = kern_select(td, args->nfds, args->readfds, args->writefds, 566 args->exceptfds, tvp, LINUX_NFDBITS); 567 568 #ifdef DEBUG 569 if (ldebug(select)) 570 printf(LMSG("real select returns %d"), error); 571 #endif 572 if (error) 573 goto select_out; 574 575 if (args->timeout) { 576 if (td->td_retval[0]) { 577 /* 578 * Compute how much time was left of the timeout, 579 * by subtracting the current time and the time 580 * before we started the call, and subtracting 581 * that result from the user-supplied value. 582 */ 583 microtime(&tv1); 584 timevalsub(&tv1, &tv0); 585 timevalsub(&utv, &tv1); 586 if (utv.tv_sec < 0) 587 timevalclear(&utv); 588 } else 589 timevalclear(&utv); 590 #ifdef DEBUG 591 if (ldebug(select)) 592 printf(LMSG("outgoing timeout (%jd/%ld)"), 593 (intmax_t)utv.tv_sec, utv.tv_usec); 594 #endif 595 ltv.tv_sec = utv.tv_sec; 596 ltv.tv_usec = utv.tv_usec; 597 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 598 goto select_out; 599 } 600 601 select_out: 602 #ifdef DEBUG 603 if (ldebug(select)) 604 printf(LMSG("select_out -> %d"), error); 605 #endif 606 return (error); 607 } 608 #endif 609 610 int 611 linux_mremap(struct thread *td, struct linux_mremap_args *args) 612 { 613 uintptr_t addr; 614 size_t len; 615 int error = 0; 616 617 #ifdef DEBUG 618 if (ldebug(mremap)) 619 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 620 (void *)(uintptr_t)args->addr, 621 (unsigned long)args->old_len, 622 (unsigned long)args->new_len, 623 (unsigned long)args->flags); 624 #endif 625 626 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 627 td->td_retval[0] = 0; 628 return (EINVAL); 629 } 630 631 /* 632 * Check for the page alignment. 633 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 634 */ 635 if (args->addr & PAGE_MASK) { 636 td->td_retval[0] = 0; 637 return (EINVAL); 638 } 639 640 args->new_len = round_page(args->new_len); 641 args->old_len = round_page(args->old_len); 642 643 if (args->new_len > args->old_len) { 644 td->td_retval[0] = 0; 645 return (ENOMEM); 646 } 647 648 if (args->new_len < args->old_len) { 649 addr = args->addr + args->new_len; 650 len = args->old_len - args->new_len; 651 error = kern_munmap(td, addr, len); 652 } 653 654 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 655 return (error); 656 } 657 658 #define LINUX_MS_ASYNC 0x0001 659 #define LINUX_MS_INVALIDATE 0x0002 660 #define LINUX_MS_SYNC 0x0004 661 662 int 663 linux_msync(struct thread *td, struct linux_msync_args *args) 664 { 665 666 return (kern_msync(td, args->addr, args->len, 667 args->fl & ~LINUX_MS_SYNC)); 668 } 669 670 #ifdef LINUX_LEGACY_SYSCALLS 671 int 672 linux_time(struct thread *td, struct linux_time_args *args) 673 { 674 struct timeval tv; 675 l_time_t tm; 676 int error; 677 678 #ifdef DEBUG 679 if (ldebug(time)) 680 printf(ARGS(time, "*")); 681 #endif 682 683 microtime(&tv); 684 tm = tv.tv_sec; 685 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 686 return (error); 687 td->td_retval[0] = tm; 688 return (0); 689 } 690 #endif 691 692 struct l_times_argv { 693 l_clock_t tms_utime; 694 l_clock_t tms_stime; 695 l_clock_t tms_cutime; 696 l_clock_t tms_cstime; 697 }; 698 699 700 /* 701 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 702 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 703 * auxiliary vector entry. 704 */ 705 #define CLK_TCK 100 706 707 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 708 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 709 710 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 711 CONVNTCK(r) : CONVOTCK(r)) 712 713 int 714 linux_times(struct thread *td, struct linux_times_args *args) 715 { 716 struct timeval tv, utime, stime, cutime, cstime; 717 struct l_times_argv tms; 718 struct proc *p; 719 int error; 720 721 #ifdef DEBUG 722 if (ldebug(times)) 723 printf(ARGS(times, "*")); 724 #endif 725 726 if (args->buf != NULL) { 727 p = td->td_proc; 728 PROC_LOCK(p); 729 PROC_STATLOCK(p); 730 calcru(p, &utime, &stime); 731 PROC_STATUNLOCK(p); 732 calccru(p, &cutime, &cstime); 733 PROC_UNLOCK(p); 734 735 tms.tms_utime = CONVTCK(utime); 736 tms.tms_stime = CONVTCK(stime); 737 738 tms.tms_cutime = CONVTCK(cutime); 739 tms.tms_cstime = CONVTCK(cstime); 740 741 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 742 return (error); 743 } 744 745 microuptime(&tv); 746 td->td_retval[0] = (int)CONVTCK(tv); 747 return (0); 748 } 749 750 int 751 linux_newuname(struct thread *td, struct linux_newuname_args *args) 752 { 753 struct l_new_utsname utsname; 754 char osname[LINUX_MAX_UTSNAME]; 755 char osrelease[LINUX_MAX_UTSNAME]; 756 char *p; 757 758 #ifdef DEBUG 759 if (ldebug(newuname)) 760 printf(ARGS(newuname, "*")); 761 #endif 762 763 linux_get_osname(td, osname); 764 linux_get_osrelease(td, osrelease); 765 766 bzero(&utsname, sizeof(utsname)); 767 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 768 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 769 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 770 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 771 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 772 for (p = utsname.version; *p != '\0'; ++p) 773 if (*p == '\n') { 774 *p = '\0'; 775 break; 776 } 777 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 778 779 return (copyout(&utsname, args->buf, sizeof(utsname))); 780 } 781 782 struct l_utimbuf { 783 l_time_t l_actime; 784 l_time_t l_modtime; 785 }; 786 787 #ifdef LINUX_LEGACY_SYSCALLS 788 int 789 linux_utime(struct thread *td, struct linux_utime_args *args) 790 { 791 struct timeval tv[2], *tvp; 792 struct l_utimbuf lut; 793 char *fname; 794 int error; 795 796 LCONVPATHEXIST(td, args->fname, &fname); 797 798 #ifdef DEBUG 799 if (ldebug(utime)) 800 printf(ARGS(utime, "%s, *"), fname); 801 #endif 802 803 if (args->times) { 804 if ((error = copyin(args->times, &lut, sizeof lut))) { 805 LFREEPATH(fname); 806 return (error); 807 } 808 tv[0].tv_sec = lut.l_actime; 809 tv[0].tv_usec = 0; 810 tv[1].tv_sec = lut.l_modtime; 811 tv[1].tv_usec = 0; 812 tvp = tv; 813 } else 814 tvp = NULL; 815 816 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 817 UIO_SYSSPACE); 818 LFREEPATH(fname); 819 return (error); 820 } 821 #endif 822 823 #ifdef LINUX_LEGACY_SYSCALLS 824 int 825 linux_utimes(struct thread *td, struct linux_utimes_args *args) 826 { 827 l_timeval ltv[2]; 828 struct timeval tv[2], *tvp = NULL; 829 char *fname; 830 int error; 831 832 LCONVPATHEXIST(td, args->fname, &fname); 833 834 #ifdef DEBUG 835 if (ldebug(utimes)) 836 printf(ARGS(utimes, "%s, *"), fname); 837 #endif 838 839 if (args->tptr != NULL) { 840 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 841 LFREEPATH(fname); 842 return (error); 843 } 844 tv[0].tv_sec = ltv[0].tv_sec; 845 tv[0].tv_usec = ltv[0].tv_usec; 846 tv[1].tv_sec = ltv[1].tv_sec; 847 tv[1].tv_usec = ltv[1].tv_usec; 848 tvp = tv; 849 } 850 851 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 852 tvp, UIO_SYSSPACE); 853 LFREEPATH(fname); 854 return (error); 855 } 856 #endif 857 858 static int 859 linux_utimensat_nsec_valid(l_long nsec) 860 { 861 862 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 863 return (0); 864 if (nsec >= 0 && nsec <= 999999999) 865 return (0); 866 return (1); 867 } 868 869 int 870 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 871 { 872 struct l_timespec l_times[2]; 873 struct timespec times[2], *timesp = NULL; 874 char *path = NULL; 875 int error, dfd, flags = 0; 876 877 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 878 879 #ifdef DEBUG 880 if (ldebug(utimensat)) 881 printf(ARGS(utimensat, "%d, *"), dfd); 882 #endif 883 884 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 885 return (EINVAL); 886 887 if (args->times != NULL) { 888 error = copyin(args->times, l_times, sizeof(l_times)); 889 if (error != 0) 890 return (error); 891 892 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 893 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 894 return (EINVAL); 895 896 times[0].tv_sec = l_times[0].tv_sec; 897 switch (l_times[0].tv_nsec) 898 { 899 case LINUX_UTIME_OMIT: 900 times[0].tv_nsec = UTIME_OMIT; 901 break; 902 case LINUX_UTIME_NOW: 903 times[0].tv_nsec = UTIME_NOW; 904 break; 905 default: 906 times[0].tv_nsec = l_times[0].tv_nsec; 907 } 908 909 times[1].tv_sec = l_times[1].tv_sec; 910 switch (l_times[1].tv_nsec) 911 { 912 case LINUX_UTIME_OMIT: 913 times[1].tv_nsec = UTIME_OMIT; 914 break; 915 case LINUX_UTIME_NOW: 916 times[1].tv_nsec = UTIME_NOW; 917 break; 918 default: 919 times[1].tv_nsec = l_times[1].tv_nsec; 920 break; 921 } 922 timesp = times; 923 924 /* This breaks POSIX, but is what the Linux kernel does 925 * _on purpose_ (documented in the man page for utimensat(2)), 926 * so we must follow that behaviour. */ 927 if (times[0].tv_nsec == UTIME_OMIT && 928 times[1].tv_nsec == UTIME_OMIT) 929 return (0); 930 } 931 932 if (args->pathname != NULL) 933 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 934 else if (args->flags != 0) 935 return (EINVAL); 936 937 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 938 flags |= AT_SYMLINK_NOFOLLOW; 939 940 if (path == NULL) 941 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 942 else { 943 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 944 UIO_SYSSPACE, flags); 945 LFREEPATH(path); 946 } 947 948 return (error); 949 } 950 951 #ifdef LINUX_LEGACY_SYSCALLS 952 int 953 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 954 { 955 l_timeval ltv[2]; 956 struct timeval tv[2], *tvp = NULL; 957 char *fname; 958 int error, dfd; 959 960 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 961 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 962 963 #ifdef DEBUG 964 if (ldebug(futimesat)) 965 printf(ARGS(futimesat, "%s, *"), fname); 966 #endif 967 968 if (args->utimes != NULL) { 969 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 970 LFREEPATH(fname); 971 return (error); 972 } 973 tv[0].tv_sec = ltv[0].tv_sec; 974 tv[0].tv_usec = ltv[0].tv_usec; 975 tv[1].tv_sec = ltv[1].tv_sec; 976 tv[1].tv_usec = ltv[1].tv_usec; 977 tvp = tv; 978 } 979 980 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 981 LFREEPATH(fname); 982 return (error); 983 } 984 #endif 985 986 int 987 linux_common_wait(struct thread *td, int pid, int *status, 988 int options, struct rusage *ru) 989 { 990 int error, tmpstat; 991 992 error = kern_wait(td, pid, &tmpstat, options, ru); 993 if (error) 994 return (error); 995 996 if (status) { 997 tmpstat &= 0xffff; 998 if (WIFSIGNALED(tmpstat)) 999 tmpstat = (tmpstat & 0xffffff80) | 1000 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1001 else if (WIFSTOPPED(tmpstat)) 1002 tmpstat = (tmpstat & 0xffff00ff) | 1003 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1004 else if (WIFCONTINUED(tmpstat)) 1005 tmpstat = 0xffff; 1006 error = copyout(&tmpstat, status, sizeof(int)); 1007 } 1008 1009 return (error); 1010 } 1011 1012 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1013 int 1014 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1015 { 1016 struct linux_wait4_args wait4_args; 1017 1018 #ifdef DEBUG 1019 if (ldebug(waitpid)) 1020 printf(ARGS(waitpid, "%d, %p, %d"), 1021 args->pid, (void *)args->status, args->options); 1022 #endif 1023 1024 wait4_args.pid = args->pid; 1025 wait4_args.status = args->status; 1026 wait4_args.options = args->options; 1027 wait4_args.rusage = NULL; 1028 1029 return (linux_wait4(td, &wait4_args)); 1030 } 1031 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1032 1033 int 1034 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1035 { 1036 int error, options; 1037 struct rusage ru, *rup; 1038 1039 #ifdef DEBUG 1040 if (ldebug(wait4)) 1041 printf(ARGS(wait4, "%d, %p, %d, %p"), 1042 args->pid, (void *)args->status, args->options, 1043 (void *)args->rusage); 1044 #endif 1045 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1046 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1047 return (EINVAL); 1048 1049 options = WEXITED; 1050 linux_to_bsd_waitopts(args->options, &options); 1051 1052 if (args->rusage != NULL) 1053 rup = &ru; 1054 else 1055 rup = NULL; 1056 error = linux_common_wait(td, args->pid, args->status, options, rup); 1057 if (error != 0) 1058 return (error); 1059 if (args->rusage != NULL) 1060 error = linux_copyout_rusage(&ru, args->rusage); 1061 return (error); 1062 } 1063 1064 int 1065 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1066 { 1067 int status, options, sig; 1068 struct __wrusage wru; 1069 siginfo_t siginfo; 1070 l_siginfo_t lsi; 1071 idtype_t idtype; 1072 struct proc *p; 1073 int error; 1074 1075 options = 0; 1076 linux_to_bsd_waitopts(args->options, &options); 1077 1078 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1079 return (EINVAL); 1080 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1081 return (EINVAL); 1082 1083 switch (args->idtype) { 1084 case LINUX_P_ALL: 1085 idtype = P_ALL; 1086 break; 1087 case LINUX_P_PID: 1088 if (args->id <= 0) 1089 return (EINVAL); 1090 idtype = P_PID; 1091 break; 1092 case LINUX_P_PGID: 1093 if (args->id <= 0) 1094 return (EINVAL); 1095 idtype = P_PGID; 1096 break; 1097 default: 1098 return (EINVAL); 1099 } 1100 1101 error = kern_wait6(td, idtype, args->id, &status, options, 1102 &wru, &siginfo); 1103 if (error != 0) 1104 return (error); 1105 if (args->rusage != NULL) { 1106 error = linux_copyout_rusage(&wru.wru_children, 1107 args->rusage); 1108 if (error != 0) 1109 return (error); 1110 } 1111 if (args->info != NULL) { 1112 p = td->td_proc; 1113 bzero(&lsi, sizeof(lsi)); 1114 if (td->td_retval[0] != 0) { 1115 sig = bsd_to_linux_signal(siginfo.si_signo); 1116 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1117 } 1118 error = copyout(&lsi, args->info, sizeof(lsi)); 1119 } 1120 td->td_retval[0] = 0; 1121 1122 return (error); 1123 } 1124 1125 #ifdef LINUX_LEGACY_SYSCALLS 1126 int 1127 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1128 { 1129 char *path; 1130 int error; 1131 1132 LCONVPATHCREAT(td, args->path, &path); 1133 1134 #ifdef DEBUG 1135 if (ldebug(mknod)) 1136 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1137 (uintmax_t)args->dev); 1138 #endif 1139 1140 switch (args->mode & S_IFMT) { 1141 case S_IFIFO: 1142 case S_IFSOCK: 1143 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1144 args->mode); 1145 break; 1146 1147 case S_IFCHR: 1148 case S_IFBLK: 1149 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1150 args->mode, args->dev); 1151 break; 1152 1153 case S_IFDIR: 1154 error = EPERM; 1155 break; 1156 1157 case 0: 1158 args->mode |= S_IFREG; 1159 /* FALLTHROUGH */ 1160 case S_IFREG: 1161 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1162 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1163 if (error == 0) 1164 kern_close(td, td->td_retval[0]); 1165 break; 1166 1167 default: 1168 error = EINVAL; 1169 break; 1170 } 1171 LFREEPATH(path); 1172 return (error); 1173 } 1174 #endif 1175 1176 int 1177 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1178 { 1179 char *path; 1180 int error, dfd; 1181 1182 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1183 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1184 1185 #ifdef DEBUG 1186 if (ldebug(mknodat)) 1187 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1188 #endif 1189 1190 switch (args->mode & S_IFMT) { 1191 case S_IFIFO: 1192 case S_IFSOCK: 1193 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1194 break; 1195 1196 case S_IFCHR: 1197 case S_IFBLK: 1198 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1199 args->dev); 1200 break; 1201 1202 case S_IFDIR: 1203 error = EPERM; 1204 break; 1205 1206 case 0: 1207 args->mode |= S_IFREG; 1208 /* FALLTHROUGH */ 1209 case S_IFREG: 1210 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1211 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1212 if (error == 0) 1213 kern_close(td, td->td_retval[0]); 1214 break; 1215 1216 default: 1217 error = EINVAL; 1218 break; 1219 } 1220 LFREEPATH(path); 1221 return (error); 1222 } 1223 1224 /* 1225 * UGH! This is just about the dumbest idea I've ever heard!! 1226 */ 1227 int 1228 linux_personality(struct thread *td, struct linux_personality_args *args) 1229 { 1230 struct linux_pemuldata *pem; 1231 struct proc *p = td->td_proc; 1232 uint32_t old; 1233 1234 #ifdef DEBUG 1235 if (ldebug(personality)) 1236 printf(ARGS(personality, "%u"), args->per); 1237 #endif 1238 1239 PROC_LOCK(p); 1240 pem = pem_find(p); 1241 old = pem->persona; 1242 if (args->per != 0xffffffff) 1243 pem->persona = args->per; 1244 PROC_UNLOCK(p); 1245 1246 td->td_retval[0] = old; 1247 return (0); 1248 } 1249 1250 struct l_itimerval { 1251 l_timeval it_interval; 1252 l_timeval it_value; 1253 }; 1254 1255 #define B2L_ITIMERVAL(bip, lip) \ 1256 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1257 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1258 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1259 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1260 1261 int 1262 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1263 { 1264 int error; 1265 struct l_itimerval ls; 1266 struct itimerval aitv, oitv; 1267 1268 #ifdef DEBUG 1269 if (ldebug(setitimer)) 1270 printf(ARGS(setitimer, "%p, %p"), 1271 (void *)uap->itv, (void *)uap->oitv); 1272 #endif 1273 1274 if (uap->itv == NULL) { 1275 uap->itv = uap->oitv; 1276 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1277 } 1278 1279 error = copyin(uap->itv, &ls, sizeof(ls)); 1280 if (error != 0) 1281 return (error); 1282 B2L_ITIMERVAL(&aitv, &ls); 1283 #ifdef DEBUG 1284 if (ldebug(setitimer)) { 1285 printf("setitimer: value: sec: %jd, usec: %ld\n", 1286 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1287 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1288 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1289 } 1290 #endif 1291 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1292 if (error != 0 || uap->oitv == NULL) 1293 return (error); 1294 B2L_ITIMERVAL(&ls, &oitv); 1295 1296 return (copyout(&ls, uap->oitv, sizeof(ls))); 1297 } 1298 1299 int 1300 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1301 { 1302 int error; 1303 struct l_itimerval ls; 1304 struct itimerval aitv; 1305 1306 #ifdef DEBUG 1307 if (ldebug(getitimer)) 1308 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1309 #endif 1310 error = kern_getitimer(td, uap->which, &aitv); 1311 if (error != 0) 1312 return (error); 1313 B2L_ITIMERVAL(&ls, &aitv); 1314 return (copyout(&ls, uap->itv, sizeof(ls))); 1315 } 1316 1317 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1318 int 1319 linux_nice(struct thread *td, struct linux_nice_args *args) 1320 { 1321 struct setpriority_args bsd_args; 1322 1323 bsd_args.which = PRIO_PROCESS; 1324 bsd_args.who = 0; /* current process */ 1325 bsd_args.prio = args->inc; 1326 return (sys_setpriority(td, &bsd_args)); 1327 } 1328 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1329 1330 int 1331 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1332 { 1333 struct ucred *newcred, *oldcred; 1334 l_gid_t *linux_gidset; 1335 gid_t *bsd_gidset; 1336 int ngrp, error; 1337 struct proc *p; 1338 1339 ngrp = args->gidsetsize; 1340 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1341 return (EINVAL); 1342 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1343 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1344 if (error) 1345 goto out; 1346 newcred = crget(); 1347 crextend(newcred, ngrp + 1); 1348 p = td->td_proc; 1349 PROC_LOCK(p); 1350 oldcred = p->p_ucred; 1351 crcopy(newcred, oldcred); 1352 1353 /* 1354 * cr_groups[0] holds egid. Setting the whole set from 1355 * the supplied set will cause egid to be changed too. 1356 * Keep cr_groups[0] unchanged to prevent that. 1357 */ 1358 1359 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1360 PROC_UNLOCK(p); 1361 crfree(newcred); 1362 goto out; 1363 } 1364 1365 if (ngrp > 0) { 1366 newcred->cr_ngroups = ngrp + 1; 1367 1368 bsd_gidset = newcred->cr_groups; 1369 ngrp--; 1370 while (ngrp >= 0) { 1371 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1372 ngrp--; 1373 } 1374 } else 1375 newcred->cr_ngroups = 1; 1376 1377 setsugid(p); 1378 proc_set_cred(p, newcred); 1379 PROC_UNLOCK(p); 1380 crfree(oldcred); 1381 error = 0; 1382 out: 1383 free(linux_gidset, M_LINUX); 1384 return (error); 1385 } 1386 1387 int 1388 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1389 { 1390 struct ucred *cred; 1391 l_gid_t *linux_gidset; 1392 gid_t *bsd_gidset; 1393 int bsd_gidsetsz, ngrp, error; 1394 1395 cred = td->td_ucred; 1396 bsd_gidset = cred->cr_groups; 1397 bsd_gidsetsz = cred->cr_ngroups - 1; 1398 1399 /* 1400 * cr_groups[0] holds egid. Returning the whole set 1401 * here will cause a duplicate. Exclude cr_groups[0] 1402 * to prevent that. 1403 */ 1404 1405 if ((ngrp = args->gidsetsize) == 0) { 1406 td->td_retval[0] = bsd_gidsetsz; 1407 return (0); 1408 } 1409 1410 if (ngrp < bsd_gidsetsz) 1411 return (EINVAL); 1412 1413 ngrp = 0; 1414 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1415 M_LINUX, M_WAITOK); 1416 while (ngrp < bsd_gidsetsz) { 1417 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1418 ngrp++; 1419 } 1420 1421 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1422 free(linux_gidset, M_LINUX); 1423 if (error) 1424 return (error); 1425 1426 td->td_retval[0] = ngrp; 1427 return (0); 1428 } 1429 1430 int 1431 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1432 { 1433 struct rlimit bsd_rlim; 1434 struct l_rlimit rlim; 1435 u_int which; 1436 int error; 1437 1438 #ifdef DEBUG 1439 if (ldebug(setrlimit)) 1440 printf(ARGS(setrlimit, "%d, %p"), 1441 args->resource, (void *)args->rlim); 1442 #endif 1443 1444 if (args->resource >= LINUX_RLIM_NLIMITS) 1445 return (EINVAL); 1446 1447 which = linux_to_bsd_resource[args->resource]; 1448 if (which == -1) 1449 return (EINVAL); 1450 1451 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1452 if (error) 1453 return (error); 1454 1455 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1456 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1457 return (kern_setrlimit(td, which, &bsd_rlim)); 1458 } 1459 1460 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1461 int 1462 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1463 { 1464 struct l_rlimit rlim; 1465 struct rlimit bsd_rlim; 1466 u_int which; 1467 1468 #ifdef DEBUG 1469 if (ldebug(old_getrlimit)) 1470 printf(ARGS(old_getrlimit, "%d, %p"), 1471 args->resource, (void *)args->rlim); 1472 #endif 1473 1474 if (args->resource >= LINUX_RLIM_NLIMITS) 1475 return (EINVAL); 1476 1477 which = linux_to_bsd_resource[args->resource]; 1478 if (which == -1) 1479 return (EINVAL); 1480 1481 lim_rlimit(td, which, &bsd_rlim); 1482 1483 #ifdef COMPAT_LINUX32 1484 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1485 if (rlim.rlim_cur == UINT_MAX) 1486 rlim.rlim_cur = INT_MAX; 1487 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1488 if (rlim.rlim_max == UINT_MAX) 1489 rlim.rlim_max = INT_MAX; 1490 #else 1491 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1492 if (rlim.rlim_cur == ULONG_MAX) 1493 rlim.rlim_cur = LONG_MAX; 1494 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1495 if (rlim.rlim_max == ULONG_MAX) 1496 rlim.rlim_max = LONG_MAX; 1497 #endif 1498 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1499 } 1500 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1501 1502 int 1503 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1504 { 1505 struct l_rlimit rlim; 1506 struct rlimit bsd_rlim; 1507 u_int which; 1508 1509 #ifdef DEBUG 1510 if (ldebug(getrlimit)) 1511 printf(ARGS(getrlimit, "%d, %p"), 1512 args->resource, (void *)args->rlim); 1513 #endif 1514 1515 if (args->resource >= LINUX_RLIM_NLIMITS) 1516 return (EINVAL); 1517 1518 which = linux_to_bsd_resource[args->resource]; 1519 if (which == -1) 1520 return (EINVAL); 1521 1522 lim_rlimit(td, which, &bsd_rlim); 1523 1524 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1525 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1526 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1527 } 1528 1529 int 1530 linux_sched_setscheduler(struct thread *td, 1531 struct linux_sched_setscheduler_args *args) 1532 { 1533 struct sched_param sched_param; 1534 struct thread *tdt; 1535 int error, policy; 1536 1537 #ifdef DEBUG 1538 if (ldebug(sched_setscheduler)) 1539 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1540 args->pid, args->policy, (const void *)args->param); 1541 #endif 1542 1543 switch (args->policy) { 1544 case LINUX_SCHED_OTHER: 1545 policy = SCHED_OTHER; 1546 break; 1547 case LINUX_SCHED_FIFO: 1548 policy = SCHED_FIFO; 1549 break; 1550 case LINUX_SCHED_RR: 1551 policy = SCHED_RR; 1552 break; 1553 default: 1554 return (EINVAL); 1555 } 1556 1557 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1558 if (error) 1559 return (error); 1560 1561 tdt = linux_tdfind(td, args->pid, -1); 1562 if (tdt == NULL) 1563 return (ESRCH); 1564 1565 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1566 PROC_UNLOCK(tdt->td_proc); 1567 return (error); 1568 } 1569 1570 int 1571 linux_sched_getscheduler(struct thread *td, 1572 struct linux_sched_getscheduler_args *args) 1573 { 1574 struct thread *tdt; 1575 int error, policy; 1576 1577 #ifdef DEBUG 1578 if (ldebug(sched_getscheduler)) 1579 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1580 #endif 1581 1582 tdt = linux_tdfind(td, args->pid, -1); 1583 if (tdt == NULL) 1584 return (ESRCH); 1585 1586 error = kern_sched_getscheduler(td, tdt, &policy); 1587 PROC_UNLOCK(tdt->td_proc); 1588 1589 switch (policy) { 1590 case SCHED_OTHER: 1591 td->td_retval[0] = LINUX_SCHED_OTHER; 1592 break; 1593 case SCHED_FIFO: 1594 td->td_retval[0] = LINUX_SCHED_FIFO; 1595 break; 1596 case SCHED_RR: 1597 td->td_retval[0] = LINUX_SCHED_RR; 1598 break; 1599 } 1600 return (error); 1601 } 1602 1603 int 1604 linux_sched_get_priority_max(struct thread *td, 1605 struct linux_sched_get_priority_max_args *args) 1606 { 1607 struct sched_get_priority_max_args bsd; 1608 1609 #ifdef DEBUG 1610 if (ldebug(sched_get_priority_max)) 1611 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1612 #endif 1613 1614 switch (args->policy) { 1615 case LINUX_SCHED_OTHER: 1616 bsd.policy = SCHED_OTHER; 1617 break; 1618 case LINUX_SCHED_FIFO: 1619 bsd.policy = SCHED_FIFO; 1620 break; 1621 case LINUX_SCHED_RR: 1622 bsd.policy = SCHED_RR; 1623 break; 1624 default: 1625 return (EINVAL); 1626 } 1627 return (sys_sched_get_priority_max(td, &bsd)); 1628 } 1629 1630 int 1631 linux_sched_get_priority_min(struct thread *td, 1632 struct linux_sched_get_priority_min_args *args) 1633 { 1634 struct sched_get_priority_min_args bsd; 1635 1636 #ifdef DEBUG 1637 if (ldebug(sched_get_priority_min)) 1638 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1639 #endif 1640 1641 switch (args->policy) { 1642 case LINUX_SCHED_OTHER: 1643 bsd.policy = SCHED_OTHER; 1644 break; 1645 case LINUX_SCHED_FIFO: 1646 bsd.policy = SCHED_FIFO; 1647 break; 1648 case LINUX_SCHED_RR: 1649 bsd.policy = SCHED_RR; 1650 break; 1651 default: 1652 return (EINVAL); 1653 } 1654 return (sys_sched_get_priority_min(td, &bsd)); 1655 } 1656 1657 #define REBOOT_CAD_ON 0x89abcdef 1658 #define REBOOT_CAD_OFF 0 1659 #define REBOOT_HALT 0xcdef0123 1660 #define REBOOT_RESTART 0x01234567 1661 #define REBOOT_RESTART2 0xA1B2C3D4 1662 #define REBOOT_POWEROFF 0x4321FEDC 1663 #define REBOOT_MAGIC1 0xfee1dead 1664 #define REBOOT_MAGIC2 0x28121969 1665 #define REBOOT_MAGIC2A 0x05121996 1666 #define REBOOT_MAGIC2B 0x16041998 1667 1668 int 1669 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1670 { 1671 struct reboot_args bsd_args; 1672 1673 #ifdef DEBUG 1674 if (ldebug(reboot)) 1675 printf(ARGS(reboot, "0x%x"), args->cmd); 1676 #endif 1677 1678 if (args->magic1 != REBOOT_MAGIC1) 1679 return (EINVAL); 1680 1681 switch (args->magic2) { 1682 case REBOOT_MAGIC2: 1683 case REBOOT_MAGIC2A: 1684 case REBOOT_MAGIC2B: 1685 break; 1686 default: 1687 return (EINVAL); 1688 } 1689 1690 switch (args->cmd) { 1691 case REBOOT_CAD_ON: 1692 case REBOOT_CAD_OFF: 1693 return (priv_check(td, PRIV_REBOOT)); 1694 case REBOOT_HALT: 1695 bsd_args.opt = RB_HALT; 1696 break; 1697 case REBOOT_RESTART: 1698 case REBOOT_RESTART2: 1699 bsd_args.opt = 0; 1700 break; 1701 case REBOOT_POWEROFF: 1702 bsd_args.opt = RB_POWEROFF; 1703 break; 1704 default: 1705 return (EINVAL); 1706 } 1707 return (sys_reboot(td, &bsd_args)); 1708 } 1709 1710 1711 int 1712 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1713 { 1714 1715 #ifdef DEBUG 1716 if (ldebug(getpid)) 1717 printf(ARGS(getpid, "")); 1718 #endif 1719 td->td_retval[0] = td->td_proc->p_pid; 1720 1721 return (0); 1722 } 1723 1724 int 1725 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1726 { 1727 struct linux_emuldata *em; 1728 1729 #ifdef DEBUG 1730 if (ldebug(gettid)) 1731 printf(ARGS(gettid, "")); 1732 #endif 1733 1734 em = em_find(td); 1735 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1736 1737 td->td_retval[0] = em->em_tid; 1738 1739 return (0); 1740 } 1741 1742 1743 int 1744 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1745 { 1746 1747 #ifdef DEBUG 1748 if (ldebug(getppid)) 1749 printf(ARGS(getppid, "")); 1750 #endif 1751 1752 td->td_retval[0] = kern_getppid(td); 1753 return (0); 1754 } 1755 1756 int 1757 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1758 { 1759 1760 #ifdef DEBUG 1761 if (ldebug(getgid)) 1762 printf(ARGS(getgid, "")); 1763 #endif 1764 1765 td->td_retval[0] = td->td_ucred->cr_rgid; 1766 return (0); 1767 } 1768 1769 int 1770 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1771 { 1772 1773 #ifdef DEBUG 1774 if (ldebug(getuid)) 1775 printf(ARGS(getuid, "")); 1776 #endif 1777 1778 td->td_retval[0] = td->td_ucred->cr_ruid; 1779 return (0); 1780 } 1781 1782 1783 int 1784 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1785 { 1786 struct getsid_args bsd; 1787 1788 #ifdef DEBUG 1789 if (ldebug(getsid)) 1790 printf(ARGS(getsid, "%i"), args->pid); 1791 #endif 1792 1793 bsd.pid = args->pid; 1794 return (sys_getsid(td, &bsd)); 1795 } 1796 1797 int 1798 linux_nosys(struct thread *td, struct nosys_args *ignore) 1799 { 1800 1801 return (ENOSYS); 1802 } 1803 1804 int 1805 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1806 { 1807 struct getpriority_args bsd_args; 1808 int error; 1809 1810 #ifdef DEBUG 1811 if (ldebug(getpriority)) 1812 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1813 #endif 1814 1815 bsd_args.which = args->which; 1816 bsd_args.who = args->who; 1817 error = sys_getpriority(td, &bsd_args); 1818 td->td_retval[0] = 20 - td->td_retval[0]; 1819 return (error); 1820 } 1821 1822 int 1823 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1824 { 1825 int name[2]; 1826 1827 #ifdef DEBUG 1828 if (ldebug(sethostname)) 1829 printf(ARGS(sethostname, "*, %i"), args->len); 1830 #endif 1831 1832 name[0] = CTL_KERN; 1833 name[1] = KERN_HOSTNAME; 1834 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1835 args->len, 0, 0)); 1836 } 1837 1838 int 1839 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1840 { 1841 int name[2]; 1842 1843 #ifdef DEBUG 1844 if (ldebug(setdomainname)) 1845 printf(ARGS(setdomainname, "*, %i"), args->len); 1846 #endif 1847 1848 name[0] = CTL_KERN; 1849 name[1] = KERN_NISDOMAINNAME; 1850 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1851 args->len, 0, 0)); 1852 } 1853 1854 int 1855 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1856 { 1857 1858 #ifdef DEBUG 1859 if (ldebug(exit_group)) 1860 printf(ARGS(exit_group, "%i"), args->error_code); 1861 #endif 1862 1863 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1864 args->error_code); 1865 1866 /* 1867 * XXX: we should send a signal to the parent if 1868 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1869 * as it doesnt occur often. 1870 */ 1871 exit1(td, args->error_code, 0); 1872 /* NOTREACHED */ 1873 } 1874 1875 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1876 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1877 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1878 1879 struct l_user_cap_header { 1880 l_int version; 1881 l_int pid; 1882 }; 1883 1884 struct l_user_cap_data { 1885 l_int effective; 1886 l_int permitted; 1887 l_int inheritable; 1888 }; 1889 1890 int 1891 linux_capget(struct thread *td, struct linux_capget_args *uap) 1892 { 1893 struct l_user_cap_header luch; 1894 struct l_user_cap_data lucd[2]; 1895 int error, u32s; 1896 1897 if (uap->hdrp == NULL) 1898 return (EFAULT); 1899 1900 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1901 if (error != 0) 1902 return (error); 1903 1904 switch (luch.version) { 1905 case _LINUX_CAPABILITY_VERSION_1: 1906 u32s = 1; 1907 break; 1908 case _LINUX_CAPABILITY_VERSION_2: 1909 case _LINUX_CAPABILITY_VERSION_3: 1910 u32s = 2; 1911 break; 1912 default: 1913 #ifdef DEBUG 1914 if (ldebug(capget)) 1915 printf(LMSG("invalid capget capability version 0x%x"), 1916 luch.version); 1917 #endif 1918 luch.version = _LINUX_CAPABILITY_VERSION_1; 1919 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1920 if (error) 1921 return (error); 1922 return (EINVAL); 1923 } 1924 1925 if (luch.pid) 1926 return (EPERM); 1927 1928 if (uap->datap) { 1929 /* 1930 * The current implementation doesn't support setting 1931 * a capability (it's essentially a stub) so indicate 1932 * that no capabilities are currently set or available 1933 * to request. 1934 */ 1935 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1936 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1937 } 1938 1939 return (error); 1940 } 1941 1942 int 1943 linux_capset(struct thread *td, struct linux_capset_args *uap) 1944 { 1945 struct l_user_cap_header luch; 1946 struct l_user_cap_data lucd[2]; 1947 int error, i, u32s; 1948 1949 if (uap->hdrp == NULL || uap->datap == NULL) 1950 return (EFAULT); 1951 1952 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1953 if (error != 0) 1954 return (error); 1955 1956 switch (luch.version) { 1957 case _LINUX_CAPABILITY_VERSION_1: 1958 u32s = 1; 1959 break; 1960 case _LINUX_CAPABILITY_VERSION_2: 1961 case _LINUX_CAPABILITY_VERSION_3: 1962 u32s = 2; 1963 break; 1964 default: 1965 #ifdef DEBUG 1966 if (ldebug(capset)) 1967 printf(LMSG("invalid capset capability version 0x%x"), 1968 luch.version); 1969 #endif 1970 luch.version = _LINUX_CAPABILITY_VERSION_1; 1971 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1972 if (error) 1973 return (error); 1974 return (EINVAL); 1975 } 1976 1977 if (luch.pid) 1978 return (EPERM); 1979 1980 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1981 if (error != 0) 1982 return (error); 1983 1984 /* We currently don't support setting any capabilities. */ 1985 for (i = 0; i < u32s; i++) { 1986 if (lucd[i].effective || lucd[i].permitted || 1987 lucd[i].inheritable) { 1988 linux_msg(td, 1989 "capset[%d] effective=0x%x, permitted=0x%x, " 1990 "inheritable=0x%x is not implemented", i, 1991 (int)lucd[i].effective, (int)lucd[i].permitted, 1992 (int)lucd[i].inheritable); 1993 return (EPERM); 1994 } 1995 } 1996 1997 return (0); 1998 } 1999 2000 int 2001 linux_prctl(struct thread *td, struct linux_prctl_args *args) 2002 { 2003 int error = 0, max_size; 2004 struct proc *p = td->td_proc; 2005 char comm[LINUX_MAX_COMM_LEN]; 2006 int pdeath_signal; 2007 2008 #ifdef DEBUG 2009 if (ldebug(prctl)) 2010 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 2011 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 2012 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 2013 #endif 2014 2015 switch (args->option) { 2016 case LINUX_PR_SET_PDEATHSIG: 2017 if (!LINUX_SIG_VALID(args->arg2)) 2018 return (EINVAL); 2019 pdeath_signal = linux_to_bsd_signal(args->arg2); 2020 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 2021 &pdeath_signal)); 2022 case LINUX_PR_GET_PDEATHSIG: 2023 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 2024 &pdeath_signal); 2025 if (error != 0) 2026 return (error); 2027 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 2028 return (copyout(&pdeath_signal, 2029 (void *)(register_t)args->arg2, 2030 sizeof(pdeath_signal))); 2031 break; 2032 case LINUX_PR_GET_KEEPCAPS: 2033 /* 2034 * Indicate that we always clear the effective and 2035 * permitted capability sets when the user id becomes 2036 * non-zero (actually the capability sets are simply 2037 * always zero in the current implementation). 2038 */ 2039 td->td_retval[0] = 0; 2040 break; 2041 case LINUX_PR_SET_KEEPCAPS: 2042 /* 2043 * Ignore requests to keep the effective and permitted 2044 * capability sets when the user id becomes non-zero. 2045 */ 2046 break; 2047 case LINUX_PR_SET_NAME: 2048 /* 2049 * To be on the safe side we need to make sure to not 2050 * overflow the size a Linux program expects. We already 2051 * do this here in the copyin, so that we don't need to 2052 * check on copyout. 2053 */ 2054 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2055 error = copyinstr((void *)(register_t)args->arg2, comm, 2056 max_size, NULL); 2057 2058 /* Linux silently truncates the name if it is too long. */ 2059 if (error == ENAMETOOLONG) { 2060 /* 2061 * XXX: copyinstr() isn't documented to populate the 2062 * array completely, so do a copyin() to be on the 2063 * safe side. This should be changed in case 2064 * copyinstr() is changed to guarantee this. 2065 */ 2066 error = copyin((void *)(register_t)args->arg2, comm, 2067 max_size - 1); 2068 comm[max_size - 1] = '\0'; 2069 } 2070 if (error) 2071 return (error); 2072 2073 PROC_LOCK(p); 2074 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2075 PROC_UNLOCK(p); 2076 break; 2077 case LINUX_PR_GET_NAME: 2078 PROC_LOCK(p); 2079 strlcpy(comm, p->p_comm, sizeof(comm)); 2080 PROC_UNLOCK(p); 2081 error = copyout(comm, (void *)(register_t)args->arg2, 2082 strlen(comm) + 1); 2083 break; 2084 default: 2085 error = EINVAL; 2086 break; 2087 } 2088 2089 return (error); 2090 } 2091 2092 int 2093 linux_sched_setparam(struct thread *td, 2094 struct linux_sched_setparam_args *uap) 2095 { 2096 struct sched_param sched_param; 2097 struct thread *tdt; 2098 int error; 2099 2100 #ifdef DEBUG 2101 if (ldebug(sched_setparam)) 2102 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2103 #endif 2104 2105 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2106 if (error) 2107 return (error); 2108 2109 tdt = linux_tdfind(td, uap->pid, -1); 2110 if (tdt == NULL) 2111 return (ESRCH); 2112 2113 error = kern_sched_setparam(td, tdt, &sched_param); 2114 PROC_UNLOCK(tdt->td_proc); 2115 return (error); 2116 } 2117 2118 int 2119 linux_sched_getparam(struct thread *td, 2120 struct linux_sched_getparam_args *uap) 2121 { 2122 struct sched_param sched_param; 2123 struct thread *tdt; 2124 int error; 2125 2126 #ifdef DEBUG 2127 if (ldebug(sched_getparam)) 2128 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2129 #endif 2130 2131 tdt = linux_tdfind(td, uap->pid, -1); 2132 if (tdt == NULL) 2133 return (ESRCH); 2134 2135 error = kern_sched_getparam(td, tdt, &sched_param); 2136 PROC_UNLOCK(tdt->td_proc); 2137 if (error == 0) 2138 error = copyout(&sched_param, uap->param, 2139 sizeof(sched_param)); 2140 return (error); 2141 } 2142 2143 /* 2144 * Get affinity of a process. 2145 */ 2146 int 2147 linux_sched_getaffinity(struct thread *td, 2148 struct linux_sched_getaffinity_args *args) 2149 { 2150 int error; 2151 struct thread *tdt; 2152 2153 #ifdef DEBUG 2154 if (ldebug(sched_getaffinity)) 2155 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2156 args->len); 2157 #endif 2158 if (args->len < sizeof(cpuset_t)) 2159 return (EINVAL); 2160 2161 tdt = linux_tdfind(td, args->pid, -1); 2162 if (tdt == NULL) 2163 return (ESRCH); 2164 2165 PROC_UNLOCK(tdt->td_proc); 2166 2167 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2168 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2169 if (error == 0) 2170 td->td_retval[0] = sizeof(cpuset_t); 2171 2172 return (error); 2173 } 2174 2175 /* 2176 * Set affinity of a process. 2177 */ 2178 int 2179 linux_sched_setaffinity(struct thread *td, 2180 struct linux_sched_setaffinity_args *args) 2181 { 2182 struct thread *tdt; 2183 2184 #ifdef DEBUG 2185 if (ldebug(sched_setaffinity)) 2186 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2187 args->len); 2188 #endif 2189 if (args->len < sizeof(cpuset_t)) 2190 return (EINVAL); 2191 2192 tdt = linux_tdfind(td, args->pid, -1); 2193 if (tdt == NULL) 2194 return (ESRCH); 2195 2196 PROC_UNLOCK(tdt->td_proc); 2197 2198 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2199 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2200 } 2201 2202 struct linux_rlimit64 { 2203 uint64_t rlim_cur; 2204 uint64_t rlim_max; 2205 }; 2206 2207 int 2208 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2209 { 2210 struct rlimit rlim, nrlim; 2211 struct linux_rlimit64 lrlim; 2212 struct proc *p; 2213 u_int which; 2214 int flags; 2215 int error; 2216 2217 #ifdef DEBUG 2218 if (ldebug(prlimit64)) 2219 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2220 args->resource, (void *)args->new, (void *)args->old); 2221 #endif 2222 2223 if (args->resource >= LINUX_RLIM_NLIMITS) 2224 return (EINVAL); 2225 2226 which = linux_to_bsd_resource[args->resource]; 2227 if (which == -1) 2228 return (EINVAL); 2229 2230 if (args->new != NULL) { 2231 /* 2232 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2233 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2234 * as INFINITY so we do not need a conversion even. 2235 */ 2236 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2237 if (error != 0) 2238 return (error); 2239 } 2240 2241 flags = PGET_HOLD | PGET_NOTWEXIT; 2242 if (args->new != NULL) 2243 flags |= PGET_CANDEBUG; 2244 else 2245 flags |= PGET_CANSEE; 2246 error = pget(args->pid, flags, &p); 2247 if (error != 0) 2248 return (error); 2249 2250 if (args->old != NULL) { 2251 PROC_LOCK(p); 2252 lim_rlimit_proc(p, which, &rlim); 2253 PROC_UNLOCK(p); 2254 if (rlim.rlim_cur == RLIM_INFINITY) 2255 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2256 else 2257 lrlim.rlim_cur = rlim.rlim_cur; 2258 if (rlim.rlim_max == RLIM_INFINITY) 2259 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2260 else 2261 lrlim.rlim_max = rlim.rlim_max; 2262 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2263 if (error != 0) 2264 goto out; 2265 } 2266 2267 if (args->new != NULL) 2268 error = kern_proc_setrlimit(td, p, which, &nrlim); 2269 2270 out: 2271 PRELE(p); 2272 return (error); 2273 } 2274 2275 int 2276 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2277 { 2278 struct timeval utv, tv0, tv1, *tvp; 2279 struct l_pselect6arg lpse6; 2280 struct l_timespec lts; 2281 struct timespec uts; 2282 l_sigset_t l_ss; 2283 sigset_t *ssp; 2284 sigset_t ss; 2285 int error; 2286 2287 ssp = NULL; 2288 if (args->sig != NULL) { 2289 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2290 if (error != 0) 2291 return (error); 2292 if (lpse6.ss_len != sizeof(l_ss)) 2293 return (EINVAL); 2294 if (lpse6.ss != 0) { 2295 error = copyin(PTRIN(lpse6.ss), &l_ss, 2296 sizeof(l_ss)); 2297 if (error != 0) 2298 return (error); 2299 linux_to_bsd_sigset(&l_ss, &ss); 2300 ssp = &ss; 2301 } 2302 } 2303 2304 /* 2305 * Currently glibc changes nanosecond number to microsecond. 2306 * This mean losing precision but for now it is hardly seen. 2307 */ 2308 if (args->tsp != NULL) { 2309 error = copyin(args->tsp, <s, sizeof(lts)); 2310 if (error != 0) 2311 return (error); 2312 error = linux_to_native_timespec(&uts, <s); 2313 if (error != 0) 2314 return (error); 2315 2316 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2317 if (itimerfix(&utv)) 2318 return (EINVAL); 2319 2320 microtime(&tv0); 2321 tvp = &utv; 2322 } else 2323 tvp = NULL; 2324 2325 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2326 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2327 2328 if (error == 0 && args->tsp != NULL) { 2329 if (td->td_retval[0] != 0) { 2330 /* 2331 * Compute how much time was left of the timeout, 2332 * by subtracting the current time and the time 2333 * before we started the call, and subtracting 2334 * that result from the user-supplied value. 2335 */ 2336 2337 microtime(&tv1); 2338 timevalsub(&tv1, &tv0); 2339 timevalsub(&utv, &tv1); 2340 if (utv.tv_sec < 0) 2341 timevalclear(&utv); 2342 } else 2343 timevalclear(&utv); 2344 2345 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2346 2347 error = native_to_linux_timespec(<s, &uts); 2348 if (error == 0) 2349 error = copyout(<s, args->tsp, sizeof(lts)); 2350 } 2351 2352 return (error); 2353 } 2354 2355 int 2356 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2357 { 2358 struct timespec ts0, ts1; 2359 struct l_timespec lts; 2360 struct timespec uts, *tsp; 2361 l_sigset_t l_ss; 2362 sigset_t *ssp; 2363 sigset_t ss; 2364 int error; 2365 2366 if (args->sset != NULL) { 2367 if (args->ssize != sizeof(l_ss)) 2368 return (EINVAL); 2369 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2370 if (error) 2371 return (error); 2372 linux_to_bsd_sigset(&l_ss, &ss); 2373 ssp = &ss; 2374 } else 2375 ssp = NULL; 2376 if (args->tsp != NULL) { 2377 error = copyin(args->tsp, <s, sizeof(lts)); 2378 if (error) 2379 return (error); 2380 error = linux_to_native_timespec(&uts, <s); 2381 if (error != 0) 2382 return (error); 2383 2384 nanotime(&ts0); 2385 tsp = &uts; 2386 } else 2387 tsp = NULL; 2388 2389 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2390 2391 if (error == 0 && args->tsp != NULL) { 2392 if (td->td_retval[0]) { 2393 nanotime(&ts1); 2394 timespecsub(&ts1, &ts0, &ts1); 2395 timespecsub(&uts, &ts1, &uts); 2396 if (uts.tv_sec < 0) 2397 timespecclear(&uts); 2398 } else 2399 timespecclear(&uts); 2400 2401 error = native_to_linux_timespec(<s, &uts); 2402 if (error == 0) 2403 error = copyout(<s, args->tsp, sizeof(lts)); 2404 } 2405 2406 return (error); 2407 } 2408 2409 #if defined(DEBUG) || defined(KTR) 2410 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2411 2412 #ifdef COMPAT_LINUX32 2413 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2414 #else 2415 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2416 #endif 2417 2418 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2419 2420 static int 2421 linux_debug(int syscall, int toggle, int global) 2422 { 2423 2424 if (global) { 2425 char c = toggle ? 0 : 0xff; 2426 2427 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2428 return (0); 2429 } 2430 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2431 return (EINVAL); 2432 if (toggle) 2433 clrbit(linux_debug_map, syscall); 2434 else 2435 setbit(linux_debug_map, syscall); 2436 return (0); 2437 } 2438 #undef L_MAXSYSCALL 2439 2440 /* 2441 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2442 * 2443 * E.g.: sysctl linux.debug=21.0 2444 * 2445 * As a special case, syscall "all" will apply to all syscalls globally. 2446 */ 2447 #define LINUX_MAX_DEBUGSTR 16 2448 int 2449 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2450 { 2451 char value[LINUX_MAX_DEBUGSTR], *p; 2452 int error, sysc, toggle; 2453 int global = 0; 2454 2455 value[0] = '\0'; 2456 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2457 if (error || req->newptr == NULL) 2458 return (error); 2459 for (p = value; *p != '\0' && *p != '.'; p++); 2460 if (*p == '\0') 2461 return (EINVAL); 2462 *p++ = '\0'; 2463 sysc = strtol(value, NULL, 0); 2464 toggle = strtol(p, NULL, 0); 2465 if (strcmp(value, "all") == 0) 2466 global = 1; 2467 error = linux_debug(sysc, toggle, global); 2468 return (error); 2469 } 2470 2471 #endif /* DEBUG || KTR */ 2472 2473 int 2474 linux_sched_rr_get_interval(struct thread *td, 2475 struct linux_sched_rr_get_interval_args *uap) 2476 { 2477 struct timespec ts; 2478 struct l_timespec lts; 2479 struct thread *tdt; 2480 int error; 2481 2482 /* 2483 * According to man in case the invalid pid specified 2484 * EINVAL should be returned. 2485 */ 2486 if (uap->pid < 0) 2487 return (EINVAL); 2488 2489 tdt = linux_tdfind(td, uap->pid, -1); 2490 if (tdt == NULL) 2491 return (ESRCH); 2492 2493 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2494 PROC_UNLOCK(tdt->td_proc); 2495 if (error != 0) 2496 return (error); 2497 error = native_to_linux_timespec(<s, &ts); 2498 if (error != 0) 2499 return (error); 2500 return (copyout(<s, uap->interval, sizeof(lts))); 2501 } 2502 2503 /* 2504 * In case when the Linux thread is the initial thread in 2505 * the thread group thread id is equal to the process id. 2506 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2507 */ 2508 struct thread * 2509 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2510 { 2511 struct linux_emuldata *em; 2512 struct thread *tdt; 2513 struct proc *p; 2514 2515 tdt = NULL; 2516 if (tid == 0 || tid == td->td_tid) { 2517 tdt = td; 2518 PROC_LOCK(tdt->td_proc); 2519 } else if (tid > PID_MAX) 2520 tdt = tdfind(tid, pid); 2521 else { 2522 /* 2523 * Initial thread where the tid equal to the pid. 2524 */ 2525 p = pfind(tid); 2526 if (p != NULL) { 2527 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2528 /* 2529 * p is not a Linuxulator process. 2530 */ 2531 PROC_UNLOCK(p); 2532 return (NULL); 2533 } 2534 FOREACH_THREAD_IN_PROC(p, tdt) { 2535 em = em_find(tdt); 2536 if (tid == em->em_tid) 2537 return (tdt); 2538 } 2539 PROC_UNLOCK(p); 2540 } 2541 return (NULL); 2542 } 2543 2544 return (tdt); 2545 } 2546 2547 void 2548 linux_to_bsd_waitopts(int options, int *bsdopts) 2549 { 2550 2551 if (options & LINUX_WNOHANG) 2552 *bsdopts |= WNOHANG; 2553 if (options & LINUX_WUNTRACED) 2554 *bsdopts |= WUNTRACED; 2555 if (options & LINUX_WEXITED) 2556 *bsdopts |= WEXITED; 2557 if (options & LINUX_WCONTINUED) 2558 *bsdopts |= WCONTINUED; 2559 if (options & LINUX_WNOWAIT) 2560 *bsdopts |= WNOWAIT; 2561 2562 if (options & __WCLONE) 2563 *bsdopts |= WLINUXCLONE; 2564 } 2565 2566 int 2567 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2568 { 2569 struct uio uio; 2570 struct iovec iov; 2571 int error; 2572 2573 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2574 return (EINVAL); 2575 if (args->count > INT_MAX) 2576 args->count = INT_MAX; 2577 2578 iov.iov_base = args->buf; 2579 iov.iov_len = args->count; 2580 2581 uio.uio_iov = &iov; 2582 uio.uio_iovcnt = 1; 2583 uio.uio_resid = iov.iov_len; 2584 uio.uio_segflg = UIO_USERSPACE; 2585 uio.uio_rw = UIO_READ; 2586 uio.uio_td = td; 2587 2588 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2589 if (error == 0) 2590 td->td_retval[0] = args->count - uio.uio_resid; 2591 return (error); 2592 } 2593 2594 int 2595 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2596 { 2597 2598 /* Needs to be page-aligned */ 2599 if (args->start & PAGE_MASK) 2600 return (EINVAL); 2601 return (kern_mincore(td, args->start, args->len, args->vec)); 2602 } 2603