1 /* 2 * Copyright (c) 2010 Kip Macy. All rights reserved. 3 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * Derived in part from libplebnet's pn_glue.c. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/kernel.h> 33 #include <sys/kthread.h> 34 #include <sys/event.h> 35 #include <sys/jail.h> 36 #include <sys/limits.h> 37 #include <sys/malloc.h> 38 #include <sys/refcount.h> 39 #include <sys/resourcevar.h> 40 #include <sys/sysctl.h> 41 #include <sys/sysent.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/time.h> 46 #include <sys/ucred.h> 47 #include <sys/uio.h> 48 #include <sys/param.h> 49 #include <sys/bus.h> 50 #include <sys/buf.h> 51 #include <sys/file.h> 52 #include <sys/vmem.h> 53 #include <sys/mbuf.h> 54 #include <sys/smp.h> 55 #include <sys/sched.h> 56 #include <sys/vmmeter.h> 57 #include <sys/unpcb.h> 58 #include <sys/eventfd.h> 59 #include <sys/linker.h> 60 #include <sys/sleepqueue.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/pmap.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_map.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_domainset.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_pagequeue.h> 71 72 #include <netinet/in_systm.h> 73 74 #include <ck_epoch.h> 75 #include <ck_stack.h> 76 77 #include "ff_host_interface.h" 78 79 int kstack_pages = KSTACK_PAGES; 80 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, 81 "Kernel stack size in pages"); 82 83 int __read_mostly vm_ndomains = 1; 84 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 85 &vm_ndomains, 0, "Number of physical memory domains available."); 86 87 #ifndef MAXMEMDOM 88 #define MAXMEMDOM 1 89 #endif 90 91 struct domainset __read_mostly domainset_fixed[MAXMEMDOM]; 92 struct domainset __read_mostly domainset_prefer[MAXMEMDOM]; 93 struct domainset __read_mostly domainset_roundrobin; 94 95 struct vm_domain vm_dom[MAXMEMDOM]; 96 97 domainset_t __exclusive_cache_line vm_min_domains; 98 99 int bootverbose; 100 101 SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); 102 103 SYSCTL_ROOT_NODE(CTL_VFS, vfs, CTLFLAG_RW, 0, "File system"); 104 105 SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW, 0, "High kernel, proc, limits &c"); 106 107 SYSCTL_ROOT_NODE(CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)"); 108 109 SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent"); 110 111 SYSCTL_ROOT_NODE(CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory"); 112 113 SYSCTL_ROOT_NODE(CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging"); 114 115 SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0, "Security"); 116 117 SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features"); 118 119 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); 120 121 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 122 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 123 static MALLOC_DEFINE(M_CRED, "cred", "credentials"); 124 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 125 126 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 127 128 static void configure_final(void *dummy); 129 130 SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); 131 132 volatile int ticks; 133 int cpu_disable_deep_sleep; 134 135 static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS); 136 137 /* This is used in modules that need to work in both SMP and UP. */ 138 cpuset_t all_cpus; 139 140 int mp_ncpus = 1; 141 /* export this for libkvm consumers. */ 142 int mp_maxcpus = MAXCPU; 143 144 volatile int smp_started; 145 u_int mp_maxid; 146 147 static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, 148 "Kernel SMP"); 149 150 SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, 151 "Max CPU ID."); 152 153 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus, 154 0, "Max number of CPUs that the system was compiled for."); 155 156 SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0, 157 sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode"); 158 159 int smp_disabled = 0; /* has smp been disabled? */ 160 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD, 161 &smp_disabled, 0, "SMP has been disabled from the loader"); 162 163 int smp_cpus = 1; /* how many cpu's running */ 164 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0, 165 "Number of CPUs online"); 166 167 int smp_topology = 0; /* Which topology we're using. */ 168 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0, 169 "Topology override setting; 0 is default provided by hardware."); 170 171 u_int vn_lock_pair_pause_max = 1; // ff_global_cfg.freebsd.hz / 100; 172 SYSCTL_UINT(_debug, OID_AUTO, vn_lock_pair_pause_max, CTLFLAG_RW, 173 &vn_lock_pair_pause_max, 0, 174 "Max ticks for vn_lock_pair deadlock avoidance sleep"); 175 176 long first_page = 0; 177 178 struct vmmeter vm_cnt; 179 vm_map_t kernel_map = 0; 180 vm_map_t kmem_map = 0; 181 182 vmem_t *kernel_arena = NULL; 183 vmem_t *kmem_arena = NULL; 184 185 struct vm_object kernel_object_store; 186 struct vm_object kmem_object_store; 187 188 struct filterops fs_filtops; 189 struct filterops sig_filtops; 190 191 int cold = 1; 192 193 int unmapped_buf_allowed = 1; 194 195 int cpu_deepest_sleep = 0; /* Deepest Cx state available. */ 196 int cpu_disable_c2_sleep = 0; /* Timer dies in C2. */ 197 int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */ 198 199 u_char __read_frequently kdb_active = 0; 200 201 static void timevalfix(struct timeval *); 202 203 /* Extra care is taken with this sysctl because the data type is volatile */ 204 static int 205 sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS) 206 { 207 int error, active; 208 209 active = smp_started; 210 error = SYSCTL_OUT(req, &active, sizeof(active)); 211 return (error); 212 } 213 214 void 215 procinit() 216 { 217 sx_init(&allproc_lock, "allproc"); 218 LIST_INIT(&allproc); 219 } 220 221 222 /* 223 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 224 */ 225 struct prison * 226 prison_find_child(struct prison *mypr, int prid) 227 { 228 return (NULL); 229 } 230 231 void 232 prison_free(struct prison *pr) 233 { 234 235 } 236 237 void 238 prison_hold_locked(struct prison *pr) 239 { 240 241 } 242 243 int 244 prison_if(struct ucred *cred, const struct sockaddr *sa) 245 { 246 return (0); 247 } 248 249 int 250 prison_check_af(struct ucred *cred, int af) 251 { 252 return (0); 253 } 254 255 int 256 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia) 257 { 258 return (0); 259 } 260 261 int 262 prison_equal_ip4(struct prison *pr1, struct prison *pr2) 263 { 264 return (1); 265 } 266 267 #ifdef INET6 268 int 269 prison_check_ip6(const struct ucred *cred, const struct in6_addr *ia) 270 { 271 return (0); 272 } 273 274 int 275 prison_equal_ip6(struct prison *pr1, struct prison *pr2) 276 { 277 return (1); 278 } 279 #endif 280 281 /* 282 * See if a prison has the specific flag set. 283 */ 284 int 285 prison_flag(struct ucred *cred, unsigned flag) 286 { 287 /* This is an atomic read, so no locking is necessary. */ 288 return (flag & PR_HOST); 289 } 290 291 int 292 prison_get_ip4(struct ucred *cred, struct in_addr *ia) 293 { 294 return (0); 295 } 296 297 int 298 prison_local_ip4(struct ucred *cred, struct in_addr *ia) 299 { 300 return (0); 301 } 302 303 int 304 prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 305 { 306 return (0); 307 } 308 309 #ifdef INET6 310 int 311 prison_get_ip6(struct ucred *cred, struct in6_addr *ia) 312 { 313 return (0); 314 } 315 316 int 317 prison_local_ip6(struct ucred *cred, struct in6_addr *ia, int other) 318 { 319 return (0); 320 } 321 322 int 323 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia) 324 { 325 return (0); 326 } 327 #endif 328 329 int 330 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 331 { 332 /* not jailed */ 333 return (1); 334 } 335 336 #ifdef INET6 337 int 338 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia) 339 { 340 /* not jailed */ 341 return (1); 342 } 343 #endif 344 345 #if 0 346 int 347 jailed(struct ucred *cred) 348 { 349 return (0); 350 } 351 #endif 352 353 /* 354 * Return 1 if the passed credential is in a jail and that jail does not 355 * have its own virtual network stack, otherwise 0. 356 */ 357 int 358 jailed_without_vnet(struct ucred *cred) 359 { 360 return (0); 361 } 362 363 int 364 priv_check(struct thread *td, int priv) 365 { 366 return (0); 367 } 368 369 int 370 priv_check_cred(struct ucred *cred, int priv) 371 { 372 return (0); 373 } 374 375 376 int 377 vslock(void *addr, size_t len) 378 { 379 return (0); 380 } 381 382 void 383 vsunlock(void *addr, size_t len) 384 { 385 386 } 387 388 389 /* 390 * Check that a proposed value to load into the .it_value or 391 * .it_interval part of an interval timer is acceptable, and 392 * fix it to have at least minimal value (i.e. if it is less 393 * than the resolution of the clock, round it up.) 394 */ 395 int 396 itimerfix(struct timeval *tv) 397 { 398 399 if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) 400 return (EINVAL); 401 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) 402 tv->tv_usec = tick; 403 return (0); 404 } 405 406 /* 407 * Decrement an interval timer by a specified number 408 * of microseconds, which must be less than a second, 409 * i.e. < 1000000. If the timer expires, then reload 410 * it. In this case, carry over (usec - old value) to 411 * reduce the value reloaded into the timer so that 412 * the timer does not drift. This routine assumes 413 * that it is called in a context where the timers 414 * on which it is operating cannot change in value. 415 */ 416 int 417 itimerdecr(struct itimerval *itp, int usec) 418 { 419 if (itp->it_value.tv_usec < usec) { 420 if (itp->it_value.tv_sec == 0) { 421 /* expired, and already in next interval */ 422 usec -= itp->it_value.tv_usec; 423 goto expire; 424 } 425 itp->it_value.tv_usec += 1000000; 426 itp->it_value.tv_sec--; 427 } 428 itp->it_value.tv_usec -= usec; 429 usec = 0; 430 if (timevalisset(&itp->it_value)) 431 return (1); 432 /* expired, exactly at end of interval */ 433 expire: 434 if (timevalisset(&itp->it_interval)) { 435 itp->it_value = itp->it_interval; 436 itp->it_value.tv_usec -= usec; 437 if (itp->it_value.tv_usec < 0) { 438 itp->it_value.tv_usec += 1000000; 439 itp->it_value.tv_sec--; 440 } 441 } else 442 itp->it_value.tv_usec = 0; /* sec is already 0 */ 443 return (0); 444 } 445 446 /* 447 * Add and subtract routines for timevals. 448 * N.B.: subtract routine doesn't deal with 449 * results which are before the beginning, 450 * it just gets very confused in this case. 451 * Caveat emptor. 452 */ 453 void 454 timevaladd(struct timeval *t1, const struct timeval *t2) 455 { 456 t1->tv_sec += t2->tv_sec; 457 t1->tv_usec += t2->tv_usec; 458 timevalfix(t1); 459 } 460 461 void 462 timevalsub(struct timeval *t1, const struct timeval *t2) 463 { 464 t1->tv_sec -= t2->tv_sec; 465 t1->tv_usec -= t2->tv_usec; 466 timevalfix(t1); 467 } 468 469 static void 470 timevalfix(struct timeval *t1) 471 { 472 if (t1->tv_usec < 0) { 473 t1->tv_sec--; 474 t1->tv_usec += 1000000; 475 } 476 if (t1->tv_usec >= 1000000) { 477 t1->tv_sec++; 478 t1->tv_usec -= 1000000; 479 } 480 } 481 482 /* 483 * ratecheck(): simple time-based rate-limit checking. 484 */ 485 int 486 ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 487 { 488 struct timeval tv, delta; 489 int rv = 0; 490 491 getmicrouptime(&tv); /* NB: 10ms precision */ 492 delta = tv; 493 timevalsub(&delta, lasttime); 494 495 /* 496 * check for 0,0 is so that the message will be seen at least once, 497 * even if interval is huge. 498 */ 499 if (timevalcmp(&delta, mininterval, >=) || 500 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 501 *lasttime = tv; 502 rv = 1; 503 } 504 505 return (rv); 506 } 507 508 /* 509 * ppsratecheck(): packets (or events) per second limitation. 510 * 511 * Return 0 if the limit is to be enforced (e.g. the caller 512 * should drop a packet because of the rate limitation). 513 * 514 * maxpps of 0 always causes zero to be returned. maxpps of -1 515 * always causes 1 to be returned; this effectively defeats rate 516 * limiting. 517 * 518 * Note that we maintain the struct timeval for compatibility 519 * with other bsd systems. We reuse the storage and just monitor 520 * clock ticks for minimal overhead. 521 */ 522 int 523 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 524 { 525 int now; 526 527 /* 528 * Reset the last time and counter if this is the first call 529 * or more than a second has passed since the last update of 530 * lasttime. 531 */ 532 now = ticks; 533 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 534 lasttime->tv_sec = now; 535 *curpps = 1; 536 return (maxpps != 0); 537 } else { 538 (*curpps)++; /* NB: ignore potential overflow */ 539 return (maxpps < 0 || *curpps < maxpps); 540 } 541 } 542 543 /* 544 * Compute number of ticks in the specified amount of time. 545 */ 546 int 547 tvtohz(tv) 548 struct timeval *tv; 549 { 550 register unsigned long ticks; 551 register long sec, usec; 552 553 /* 554 * If the number of usecs in the whole seconds part of the time 555 * difference fits in a long, then the total number of usecs will 556 * fit in an unsigned long. Compute the total and convert it to 557 * ticks, rounding up and adding 1 to allow for the current tick 558 * to expire. Rounding also depends on unsigned long arithmetic 559 * to avoid overflow. 560 * 561 * Otherwise, if the number of ticks in the whole seconds part of 562 * the time difference fits in a long, then convert the parts to 563 * ticks separately and add, using similar rounding methods and 564 * overflow avoidance. This method would work in the previous 565 * case but it is slightly slower and assumes that hz is integral. 566 * 567 * Otherwise, round the time difference down to the maximum 568 * representable value. 569 * 570 * If ints have 32 bits, then the maximum value for any timeout in 571 * 10ms ticks is 248 days. 572 */ 573 sec = tv->tv_sec; 574 usec = tv->tv_usec; 575 if (usec < 0) { 576 sec--; 577 usec += 1000000; 578 } 579 if (sec < 0) { 580 #ifdef DIAGNOSTIC 581 if (usec > 0) { 582 sec++; 583 usec -= 1000000; 584 } 585 printf("tvotohz: negative time difference %ld sec %ld usec\n", 586 sec, usec); 587 #endif 588 ticks = 1; 589 } else if (sec <= LONG_MAX / 1000000) 590 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 591 / tick + 1; 592 else if (sec <= LONG_MAX / hz) 593 ticks = sec * hz 594 + ((unsigned long)usec + (tick - 1)) / tick + 1; 595 else 596 ticks = LONG_MAX; 597 if (ticks > INT_MAX) 598 ticks = INT_MAX; 599 return ((int)ticks); 600 } 601 602 int 603 copyin(const void *uaddr, void *kaddr, size_t len) 604 { 605 memcpy(kaddr, uaddr, len); 606 return (0); 607 } 608 609 int 610 copyout(const void *kaddr, void *uaddr, size_t len) 611 { 612 memcpy(uaddr, kaddr, len); 613 return (0); 614 } 615 616 #if 0 617 int 618 copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done) 619 { 620 size_t bytes; 621 622 bytes = strlcpy(kdaddr, kfaddr, len); 623 if (done != NULL) 624 *done = bytes; 625 626 return (0); 627 } 628 #endif 629 630 int 631 copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) 632 { 633 size_t bytes; 634 635 bytes = strlcpy(kaddr, uaddr, len); 636 if (done != NULL) 637 *done = bytes; 638 639 return (0); 640 } 641 642 int 643 copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) 644 { 645 u_int iovlen; 646 647 *iov = NULL; 648 if (iovcnt > UIO_MAXIOV) 649 return (error); 650 iovlen = iovcnt * sizeof (struct iovec); 651 *iov = malloc(iovlen, M_IOV, M_WAITOK); 652 error = copyin(iovp, *iov, iovlen); 653 if (error) { 654 free(*iov, M_IOV); 655 *iov = NULL; 656 } 657 return (error); 658 } 659 660 int 661 subyte(volatile void *base, int byte) 662 { 663 *(volatile char *)base = (uint8_t)byte; 664 return (0); 665 } 666 667 static inline int 668 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name) 669 { 670 /* Don't allow them to exceed max, but allow subtraction. */ 671 if (diff > 0 && max != 0) { 672 if (atomic_fetchadd_long(limit, (long)diff) + diff > max) { 673 atomic_subtract_long(limit, (long)diff); 674 return (0); 675 } 676 } else { 677 atomic_add_long(limit, (long)diff); 678 if (*limit < 0) 679 printf("negative %s for uid = %d\n", name, uip->ui_uid); 680 } 681 return (1); 682 } 683 684 /* 685 * Change the count associated with number of processes 686 * a given user is using. When 'max' is 0, don't enforce a limit 687 */ 688 int 689 chgproccnt(struct uidinfo *uip, int diff, rlim_t max) 690 { 691 return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt")); 692 } 693 694 /* 695 * Change the total socket buffer size a user has used. 696 */ 697 int 698 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max) 699 { 700 int diff, rv; 701 702 diff = to - *hiwat; 703 if (diff > 0 && max == 0) { 704 rv = 0; 705 } else { 706 rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize"); 707 if (rv != 0) 708 *hiwat = to; 709 } 710 return (rv); 711 } 712 713 /* 714 * Change the count associated with number of pseudo-terminals 715 * a given user is using. When 'max' is 0, don't enforce a limit 716 */ 717 int 718 chgptscnt(struct uidinfo *uip, int diff, rlim_t max) 719 { 720 return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt")); 721 } 722 723 int 724 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max) 725 { 726 return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt")); 727 } 728 729 int 730 chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max) 731 { 732 return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt")); 733 } 734 735 /* 736 * Allocate a new resource limits structure and initialize its 737 * reference count and mutex pointer. 738 */ 739 struct plimit * 740 lim_alloc() 741 { 742 struct plimit *limp; 743 744 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 745 refcount_init(&limp->pl_refcnt, 1); 746 return (limp); 747 } 748 749 struct plimit * 750 lim_hold(struct plimit *limp) 751 { 752 refcount_acquire(&limp->pl_refcnt); 753 return (limp); 754 } 755 756 /* 757 * Return the current (soft) limit for a particular system resource. 758 * The which parameter which specifies the index into the rlimit array 759 */ 760 rlim_t 761 lim_cur(struct thread *td, int which) 762 { 763 struct rlimit rl; 764 765 lim_rlimit(td, which, &rl); 766 return (rl.rlim_cur); 767 } 768 769 rlim_t 770 lim_cur_proc(struct proc *p, int which) 771 { 772 struct rlimit rl; 773 774 lim_rlimit_proc(p, which, &rl); 775 return (rl.rlim_cur); 776 } 777 778 /* 779 * Return a copy of the entire rlimit structure for the system limit 780 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 781 */ 782 void 783 lim_rlimit(struct thread *td, int which, struct rlimit *rlp) 784 { 785 struct proc *p = td->td_proc; 786 787 MPASS(td == curthread); 788 KASSERT(which >= 0 && which < RLIM_NLIMITS, 789 ("request for invalid resource limit")); 790 *rlp = p->p_limit->pl_rlimit[which]; 791 if (p->p_sysent->sv_fixlimit != NULL) 792 p->p_sysent->sv_fixlimit(rlp, which); 793 } 794 795 void 796 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp) 797 { 798 PROC_LOCK_ASSERT(p, MA_OWNED); 799 KASSERT(which >= 0 && which < RLIM_NLIMITS, 800 ("request for invalid resource limit")); 801 *rlp = p->p_limit->pl_rlimit[which]; 802 if (p->p_sysent->sv_fixlimit != NULL) 803 p->p_sysent->sv_fixlimit(rlp, which); 804 } 805 806 int 807 useracc(void *addr, int len, int rw) 808 { 809 return (1); 810 } 811 812 struct pgrp * 813 pgfind(pid_t pgid) 814 { 815 return (NULL); 816 } 817 818 #if 0 819 struct proc * 820 zpfind(pid_t pid) 821 { 822 return (NULL); 823 } 824 #endif 825 826 int 827 p_cansee(struct thread *td, struct proc *p) 828 { 829 return (0); 830 } 831 832 struct proc * 833 pfind(pid_t pid) 834 { 835 return (NULL); 836 } 837 838 int 839 pget(pid_t pid, int flags, struct proc **pp) 840 { 841 return (ESRCH); 842 } 843 844 struct uidinfo uid0; 845 846 struct uidinfo * 847 uifind(uid_t uid) 848 { 849 return (&uid0); 850 } 851 852 /* 853 * Allocate a zeroed cred structure. 854 */ 855 struct ucred * 856 crget(void) 857 { 858 register struct ucred *cr; 859 860 cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); 861 refcount_init(&cr->cr_ref, 1); 862 863 return (cr); 864 } 865 866 /* 867 * Claim another reference to a ucred structure. 868 */ 869 struct ucred * 870 crhold(struct ucred *cr) 871 { 872 refcount_acquire(&cr->cr_ref); 873 return (cr); 874 } 875 876 /* 877 * Free a cred structure. Throws away space when ref count gets to 0. 878 */ 879 void 880 crfree(struct ucred *cr) 881 { 882 KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); 883 KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred")); 884 if (refcount_release(&cr->cr_ref)) { 885 886 free(cr, M_CRED); 887 } 888 } 889 890 /* 891 * Fill in a struct xucred based on a struct ucred. 892 */ 893 894 void 895 cru2x(struct ucred *cr, struct xucred *xcr) 896 { 897 #if 0 898 int ngroups; 899 900 bzero(xcr, sizeof(*xcr)); 901 xcr->cr_version = XUCRED_VERSION; 902 xcr->cr_uid = cr->cr_uid; 903 904 ngroups = MIN(cr->cr_ngroups, XU_NGROUPS); 905 xcr->cr_ngroups = ngroups; 906 bcopy(cr->cr_groups, xcr->cr_groups, 907 ngroups * sizeof(*cr->cr_groups)); 908 #endif 909 } 910 911 912 int 913 cr_cansee(struct ucred *u1, struct ucred *u2) 914 { 915 return (0); 916 } 917 918 int 919 cr_canseesocket(struct ucred *cred, struct socket *so) 920 { 921 return (0); 922 } 923 924 int 925 cr_canseeinpcb(struct ucred *cred, struct inpcb *inp) 926 { 927 return (0); 928 } 929 930 int 931 securelevel_gt(struct ucred *cr, int level) 932 { 933 return (0); 934 } 935 936 int 937 securelevel_ge(struct ucred *cr, int level) 938 { 939 return (0); 940 } 941 942 /** 943 * @brief Send a 'notification' to userland, using standard ways 944 */ 945 void 946 devctl_notify(const char *system, const char *subsystem, const char *type, 947 const char *data) 948 { 949 950 } 951 952 void 953 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 954 { 955 956 } 957 958 static void 959 configure_final(void *dummy) 960 { 961 cold = 0; 962 } 963 964 /* 965 * Send a SIGIO or SIGURG signal to a process or process group using stored 966 * credentials rather than those of the current process. 967 */ 968 void 969 pgsigio(sigiop, sig, checkctty) 970 struct sigio **sigiop; 971 int sig, checkctty; 972 { 973 panic("SIGIO not supported yet\n"); 974 #ifdef notyet 975 ksiginfo_t ksi; 976 struct sigio *sigio; 977 978 ksiginfo_init(&ksi); 979 ksi.ksi_signo = sig; 980 ksi.ksi_code = SI_KERNEL; 981 982 SIGIO_LOCK(); 983 sigio = *sigiop; 984 if (sigio == NULL) { 985 SIGIO_UNLOCK(); 986 return; 987 } 988 if (sigio->sio_pgid > 0) { 989 PROC_LOCK(sigio->sio_proc); 990 if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred)) 991 psignal(sigio->sio_proc, sig); 992 PROC_UNLOCK(sigio->sio_proc); 993 } else if (sigio->sio_pgid < 0) { 994 struct proc *p; 995 996 PGRP_LOCK(sigio->sio_pgrp); 997 LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) { 998 PROC_LOCK(p); 999 if (CANSIGIO(sigio->sio_ucred, p->p_ucred) && 1000 (checkctty == 0 || (p->p_flag & P_CONTROLT))) 1001 psignal(p, sig); 1002 PROC_UNLOCK(p); 1003 } 1004 PGRP_UNLOCK(sigio->sio_pgrp); 1005 } 1006 SIGIO_UNLOCK(); 1007 #endif 1008 } 1009 1010 void 1011 kproc_exit(int ecode) 1012 { 1013 panic("kproc_exit unsupported"); 1014 } 1015 1016 vm_offset_t 1017 kmem_malloc(vm_size_t bytes, int flags) 1018 { 1019 void *alloc = ff_mmap(NULL, bytes, ff_PROT_READ|ff_PROT_WRITE, ff_MAP_ANON|ff_MAP_PRIVATE, -1, 0); 1020 if ((flags & M_ZERO) && alloc != NULL) 1021 bzero(alloc, bytes); 1022 return ((vm_offset_t)alloc); 1023 } 1024 1025 void 1026 kmem_free(vm_offset_t addr, vm_size_t size) 1027 { 1028 ff_munmap((void *)addr, size); 1029 } 1030 1031 vm_offset_t 1032 kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, 1033 vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) 1034 { 1035 return (kmem_malloc(size, flags)); 1036 } 1037 1038 void 1039 malloc_init(void *data) 1040 { 1041 /* Nothing to do here */ 1042 } 1043 1044 1045 void 1046 malloc_uninit(void *data) 1047 { 1048 /* Nothing to do here */ 1049 } 1050 1051 void * 1052 malloc(unsigned long size, struct malloc_type *type, int flags) 1053 { 1054 void *alloc; 1055 1056 do { 1057 alloc = ff_malloc(size); 1058 if (alloc || !(flags & M_WAITOK)) 1059 break; 1060 1061 pause("malloc", hz/100); 1062 } while (alloc == NULL); 1063 1064 if ((flags & M_ZERO) && alloc != NULL) 1065 bzero(alloc, size); 1066 return (alloc); 1067 } 1068 1069 void 1070 free(void *addr, struct malloc_type *type) 1071 { 1072 ff_free(addr); 1073 } 1074 1075 void * 1076 realloc(void *addr, unsigned long size, struct malloc_type *type, 1077 int flags) 1078 { 1079 return (ff_realloc(addr, size)); 1080 } 1081 1082 void * 1083 reallocf(void *addr, unsigned long size, struct malloc_type *type, 1084 int flags) 1085 { 1086 void *mem; 1087 1088 if ((mem = ff_realloc(addr, size)) == NULL) 1089 ff_free(addr); 1090 1091 return (mem); 1092 } 1093 1094 void 1095 DELAY(int delay) 1096 { 1097 struct timespec rqt; 1098 1099 if (delay < 1000) 1100 return; 1101 1102 rqt.tv_nsec = 1000*((unsigned long)delay); 1103 rqt.tv_sec = 0; 1104 /* 1105 * FIXME: We shouldn't sleep in dpdk apps. 1106 */ 1107 //nanosleep(&rqt, NULL); 1108 } 1109 1110 void 1111 bwillwrite(void) 1112 { 1113 1114 } 1115 1116 off_t 1117 foffset_lock(struct file *fp, int flags) 1118 { 1119 struct mtx *mtxp; 1120 off_t res; 1121 1122 KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); 1123 1124 #if OFF_MAX <= LONG_MAX 1125 /* 1126 * Caller only wants the current f_offset value. Assume that 1127 * the long and shorter integer types reads are atomic. 1128 */ 1129 if ((flags & FOF_NOLOCK) != 0) 1130 return (fp->f_offset); 1131 #endif 1132 1133 /* 1134 * According to McKusick the vn lock was protecting f_offset here. 1135 * It is now protected by the FOFFSET_LOCKED flag. 1136 */ 1137 mtxp = mtx_pool_find(mtxpool_sleep, fp); 1138 mtx_lock(mtxp); 1139 /* 1140 if ((flags & FOF_NOLOCK) == 0) { 1141 while (fp->f_vnread_flags & FOFFSET_LOCKED) { 1142 fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; 1143 msleep(&fp->f_vnread_flags, mtxp, PUSER -1, 1144 "vofflock", 0); 1145 } 1146 fp->f_vnread_flags |= FOFFSET_LOCKED; 1147 } 1148 */ 1149 res = fp->f_offset; 1150 mtx_unlock(mtxp); 1151 return (res); 1152 } 1153 1154 #if 0 1155 void 1156 sf_ext_free(void *arg1, void *arg2) 1157 { 1158 panic("sf_ext_free not implemented.\n"); 1159 } 1160 1161 void 1162 sf_ext_free_nocache(void *arg1, void *arg2) 1163 { 1164 panic("sf_ext_free_nocache not implemented.\n"); 1165 } 1166 #endif 1167 1168 void 1169 sched_bind(struct thread *td, int cpu) 1170 { 1171 1172 } 1173 1174 void 1175 sched_unbind(struct thread* td) 1176 { 1177 1178 } 1179 1180 void 1181 getcredhostid(struct ucred *cred, unsigned long *hostid) 1182 { 1183 *hostid = 0; 1184 } 1185 1186 /* 1187 * Check if gid is a member of the group set. 1188 */ 1189 int 1190 groupmember(gid_t gid, struct ucred *cred) 1191 { 1192 int l; 1193 int h; 1194 int m; 1195 1196 if (cred->cr_groups[0] == gid) 1197 return(1); 1198 1199 /* 1200 * If gid was not our primary group, perform a binary search 1201 * of the supplemental groups. This is possible because we 1202 * sort the groups in crsetgroups(). 1203 */ 1204 l = 1; 1205 h = cred->cr_ngroups; 1206 while (l < h) { 1207 m = l + ((h - l) / 2); 1208 if (cred->cr_groups[m] < gid) 1209 l = m + 1; 1210 else 1211 h = m; 1212 } 1213 if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid)) 1214 return (1); 1215 1216 return (0); 1217 } 1218 1219 int 1220 vm_wait_doms(const domainset_t *wdoms, int mflags) 1221 { 1222 return 0; 1223 } 1224 1225 void 1226 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 1227 struct domainset_ref *dr, int *domain, int *flags) 1228 { 1229 *domain = 0; 1230 } 1231 1232 int 1233 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 1234 { 1235 //return (EJUSTRETURN); 1236 return 0; 1237 } 1238 1239 vm_offset_t 1240 kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags) 1241 { 1242 return (kmem_malloc(size, flags)); 1243 } 1244 1245 void * 1246 mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) 1247 { 1248 return (malloc(size * nmemb, type, flags)); 1249 } 1250 1251 void 1252 getcredhostuuid(struct ucred *cred, char *buf, size_t size) 1253 { 1254 mtx_lock(&cred->cr_prison->pr_mtx); 1255 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 1256 mtx_unlock(&cred->cr_prison->pr_mtx); 1257 } 1258 1259 void 1260 getjailname(struct ucred *cred, char *name, size_t len) 1261 { 1262 mtx_lock(&cred->cr_prison->pr_mtx); 1263 strlcpy(name, cred->cr_prison->pr_name, len); 1264 mtx_unlock(&cred->cr_prison->pr_mtx); 1265 } 1266 1267 void * 1268 malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds, 1269 int flags) 1270 { 1271 return (malloc(size, mtp, flags)); 1272 } 1273 1274 void * 1275 malloc_exec(size_t size, struct malloc_type *mtp, int flags) 1276 { 1277 1278 return (malloc(size, mtp, flags)); 1279 } 1280 1281 int 1282 bus_get_domain(device_t dev, int *domain) 1283 { 1284 return (-1); 1285 } 1286 1287 void 1288 cru2xt(struct thread *td, struct xucred *xcr) 1289 { 1290 cru2x(td->td_ucred, xcr); 1291 xcr->cr_pid = td->td_proc->p_pid; 1292 } 1293 1294 /* 1295 * Set socket peer credentials at connection time. 1296 * 1297 * The client's PCB credentials are copied from its process structure. The 1298 * server's PCB credentials are copied from the socket on which it called 1299 * listen(2). uipc_listen cached that process's credentials at the time. 1300 */ 1301 void 1302 unp_copy_peercred(struct thread *td, struct unpcb *client_unp, 1303 struct unpcb *server_unp, struct unpcb *listen_unp) 1304 { 1305 cru2xt(td, &client_unp->unp_peercred); 1306 client_unp->unp_flags |= UNP_HAVEPC; 1307 1308 memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred, 1309 sizeof(server_unp->unp_peercred)); 1310 server_unp->unp_flags |= UNP_HAVEPC; 1311 client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK); 1312 } 1313 1314 int 1315 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 1316 int flags) 1317 { 1318 return (0); 1319 } 1320 1321 void 1322 sched_prio(struct thread *td, u_char prio) 1323 { 1324 1325 } 1326 1327 /* 1328 * The machine independent parts of context switching. 1329 * 1330 * The thread lock is required on entry and is no longer held on return. 1331 */ 1332 void 1333 mi_switch(int flags) 1334 { 1335 1336 } 1337 1338 int 1339 sched_is_bound(struct thread *td) 1340 { 1341 return (1); 1342 } 1343 1344 /* 1345 * This function must not be called with-in read section. 1346 */ 1347 void 1348 ck_epoch_synchronize_wait(struct ck_epoch *global, 1349 ck_epoch_wait_cb_t *cb, void *ct) 1350 { 1351 1352 } 1353 1354 bool 1355 ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred) 1356 { 1357 return (true); 1358 } 1359 1360 void 1361 _ck_epoch_addref(struct ck_epoch_record *record, 1362 struct ck_epoch_section *section) 1363 { 1364 1365 } 1366 1367 bool 1368 _ck_epoch_delref(struct ck_epoch_record *record, 1369 struct ck_epoch_section *section) 1370 { 1371 return true; 1372 } 1373 1374 void 1375 ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record, 1376 void *ct) 1377 { 1378 1379 } 1380 1381 void 1382 ck_epoch_init(struct ck_epoch *global) 1383 { 1384 1385 } 1386 1387 #if 0 1388 void 1389 wakeup_any(const void *ident) 1390 { 1391 1392 } 1393 #endif 1394 1395 /* 1396 * kmem_bootstrap_free: 1397 * 1398 * Free pages backing preloaded data (e.g., kernel modules) to the 1399 * system. Currently only supported on platforms that create a 1400 * vm_phys segment for preloaded data. 1401 */ 1402 void 1403 kmem_bootstrap_free(vm_offset_t start, vm_size_t size) 1404 { 1405 1406 } 1407 1408 #if 0 1409 int 1410 elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) 1411 { 1412 return (0); 1413 } 1414 #endif 1415 1416 int 1417 pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot) 1418 { 1419 return 0; 1420 } 1421 1422 void * 1423 memset_early(void *buf, int c, size_t len) 1424 { 1425 return (memset(buf, c, len)); 1426 } 1427 1428 int 1429 elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, 1430 int type, elf_lookup_fn lookup) 1431 { 1432 return (0); 1433 } 1434 1435 bool 1436 elf_is_ifunc_reloc(Elf_Size r_info) 1437 { 1438 return (true); 1439 } 1440 1441 void 1442 sleepq_chains_remove_matching(bool (*matches)(struct thread *)) 1443 { 1444 1445 } 1446 1447 u_int 1448 vm_free_count(void) 1449 { 1450 return vm_dom[0].vmd_free_count; 1451 } 1452 1453 struct proc * 1454 pfind_any(pid_t pid) 1455 { 1456 return (curproc); 1457 } 1458 1459