1 /* 2 * Copyright (c) 2010 Kip Macy. All rights reserved. 3 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * Derived in part from libplebnet's pn_glue.c. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/kernel.h> 33 #include <sys/kthread.h> 34 #include <sys/event.h> 35 #include <sys/jail.h> 36 #include <sys/limits.h> 37 #include <sys/malloc.h> 38 #include <sys/refcount.h> 39 #include <sys/resourcevar.h> 40 #include <sys/sysctl.h> 41 #include <sys/sysent.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/time.h> 46 #include <sys/ucred.h> 47 #include <sys/uio.h> 48 #include <sys/param.h> 49 #include <sys/bus.h> 50 #include <sys/buf.h> 51 #include <sys/file.h> 52 #include <sys/vmem.h> 53 #include <sys/mbuf.h> 54 #include <sys/smp.h> 55 #include <sys/sched.h> 56 #include <sys/vmmeter.h> 57 #include <sys/unpcb.h> 58 #include <sys/eventfd.h> 59 #include <sys/linker.h> 60 #include <sys/sleepqueue.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/pmap.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_map.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_domainset.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_pagequeue.h> 71 72 #include <netinet/in_systm.h> 73 74 #include <ck_epoch.h> 75 #include <ck_stack.h> 76 77 #include "ff_host_interface.h" 78 79 int kstack_pages = KSTACK_PAGES; 80 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, 81 "Kernel stack size in pages"); 82 83 int __read_mostly vm_ndomains = 1; 84 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 85 &vm_ndomains, 0, "Number of physical memory domains available."); 86 87 #ifndef MAXMEMDOM 88 #define MAXMEMDOM 1 89 #endif 90 91 struct domainset __read_mostly domainset_fixed[MAXMEMDOM]; 92 struct domainset __read_mostly domainset_prefer[MAXMEMDOM]; 93 struct domainset __read_mostly domainset_roundrobin; 94 95 struct vm_domain vm_dom[MAXMEMDOM]; 96 97 domainset_t __exclusive_cache_line vm_min_domains; 98 99 int bootverbose; 100 101 SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); 102 103 SYSCTL_ROOT_NODE(CTL_VFS, vfs, CTLFLAG_RW, 0, "File system"); 104 105 SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW, 0, "High kernel, proc, limits &c"); 106 107 SYSCTL_ROOT_NODE(CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)"); 108 109 SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent"); 110 111 SYSCTL_ROOT_NODE(CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory"); 112 113 SYSCTL_ROOT_NODE(CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging"); 114 115 SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0, "Security"); 116 117 SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features"); 118 119 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); 120 121 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 122 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 123 static MALLOC_DEFINE(M_CRED, "cred", "credentials"); 124 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 125 126 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 127 128 static void configure_final(void *dummy); 129 130 SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); 131 132 volatile int ticks; 133 int cpu_disable_deep_sleep; 134 135 static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS); 136 137 /* This is used in modules that need to work in both SMP and UP. */ 138 cpuset_t all_cpus; 139 140 int mp_ncpus = 1; 141 /* export this for libkvm consumers. */ 142 int mp_maxcpus = MAXCPU; 143 144 volatile int smp_started; 145 u_int mp_maxid; 146 147 static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, 148 "Kernel SMP"); 149 150 SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, 151 "Max CPU ID."); 152 153 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus, 154 0, "Max number of CPUs that the system was compiled for."); 155 156 SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0, 157 sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode"); 158 159 int smp_disabled = 0; /* has smp been disabled? */ 160 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD, 161 &smp_disabled, 0, "SMP has been disabled from the loader"); 162 163 int smp_cpus = 1; /* how many cpu's running */ 164 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0, 165 "Number of CPUs online"); 166 167 int smp_topology = 0; /* Which topology we're using. */ 168 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0, 169 "Topology override setting; 0 is default provided by hardware."); 170 171 u_int vn_lock_pair_pause_max = 1; // ff_global_cfg.freebsd.hz / 100; 172 SYSCTL_UINT(_debug, OID_AUTO, vn_lock_pair_pause_max, CTLFLAG_RW, 173 &vn_lock_pair_pause_max, 0, 174 "Max ticks for vn_lock_pair deadlock avoidance sleep"); 175 176 long first_page = 0; 177 178 struct vmmeter vm_cnt; 179 vm_map_t kernel_map = 0; 180 vm_map_t kmem_map = 0; 181 182 vmem_t *kernel_arena = NULL; 183 vmem_t *kmem_arena = NULL; 184 185 struct vm_object kernel_object_store; 186 struct vm_object kmem_object_store; 187 188 struct filterops fs_filtops; 189 struct filterops sig_filtops; 190 191 int cold = 1; 192 193 int unmapped_buf_allowed = 1; 194 195 int cpu_deepest_sleep = 0; /* Deepest Cx state available. */ 196 int cpu_disable_c2_sleep = 0; /* Timer dies in C2. */ 197 int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */ 198 199 u_char __read_frequently kdb_active = 0; 200 201 static void timevalfix(struct timeval *); 202 203 /* Extra care is taken with this sysctl because the data type is volatile */ 204 static int 205 sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS) 206 { 207 int error, active; 208 209 active = smp_started; 210 error = SYSCTL_OUT(req, &active, sizeof(active)); 211 return (error); 212 } 213 214 void 215 procinit() 216 { 217 sx_init(&allproc_lock, "allproc"); 218 LIST_INIT(&allproc); 219 } 220 221 222 /* 223 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 224 */ 225 struct prison * 226 prison_find_child(struct prison *mypr, int prid) 227 { 228 return (NULL); 229 } 230 231 void 232 prison_free(struct prison *pr) 233 { 234 235 } 236 237 void 238 prison_hold_locked(struct prison *pr) 239 { 240 241 } 242 243 int 244 prison_if(struct ucred *cred, const struct sockaddr *sa) 245 { 246 return (0); 247 } 248 249 int 250 prison_check_af(struct ucred *cred, int af) 251 { 252 return (0); 253 } 254 255 int 256 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia) 257 { 258 return (0); 259 } 260 261 int 262 prison_equal_ip4(struct prison *pr1, struct prison *pr2) 263 { 264 return (1); 265 } 266 267 #ifdef INET6 268 int 269 prison_check_ip6(const struct ucred *cred, const struct in6_addr *ia) 270 { 271 return (0); 272 } 273 274 int 275 prison_equal_ip6(struct prison *pr1, struct prison *pr2) 276 { 277 return (1); 278 } 279 #endif 280 281 /* 282 * See if a prison has the specific flag set. 283 */ 284 int 285 prison_flag(struct ucred *cred, unsigned flag) 286 { 287 /* This is an atomic read, so no locking is necessary. */ 288 return (flag & PR_HOST); 289 } 290 291 int 292 prison_get_ip4(struct ucred *cred, struct in_addr *ia) 293 { 294 return (0); 295 } 296 297 int 298 prison_local_ip4(struct ucred *cred, struct in_addr *ia) 299 { 300 return (0); 301 } 302 303 int 304 prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 305 { 306 return (0); 307 } 308 309 #ifdef INET6 310 int 311 prison_get_ip6(struct ucred *cred, struct in6_addr *ia) 312 { 313 return (0); 314 } 315 316 int 317 prison_local_ip6(struct ucred *cred, struct in6_addr *ia, int other) 318 { 319 return (0); 320 } 321 322 int 323 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia) 324 { 325 return (0); 326 } 327 #endif 328 329 int 330 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 331 { 332 /* not jailed */ 333 return (1); 334 } 335 336 #ifdef INET6 337 int 338 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia) 339 { 340 /* not jailed */ 341 return (1); 342 } 343 #endif 344 345 #if 0 346 int 347 jailed(struct ucred *cred) 348 { 349 return (0); 350 } 351 #endif 352 353 /* 354 * Return 1 if the passed credential is in a jail and that jail does not 355 * have its own virtual network stack, otherwise 0. 356 */ 357 int 358 jailed_without_vnet(struct ucred *cred) 359 { 360 return (0); 361 } 362 363 int 364 priv_check(struct thread *td, int priv) 365 { 366 return (0); 367 } 368 369 int 370 priv_check_cred(struct ucred *cred, int priv) 371 { 372 return (0); 373 } 374 375 376 int 377 vslock(void *addr, size_t len) 378 { 379 return (0); 380 } 381 382 void 383 vsunlock(void *addr, size_t len) 384 { 385 386 } 387 388 389 /* 390 * Check that a proposed value to load into the .it_value or 391 * .it_interval part of an interval timer is acceptable, and 392 * fix it to have at least minimal value (i.e. if it is less 393 * than the resolution of the clock, round it up.) 394 */ 395 int 396 itimerfix(struct timeval *tv) 397 { 398 399 if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) 400 return (EINVAL); 401 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) 402 tv->tv_usec = tick; 403 return (0); 404 } 405 406 /* 407 * Decrement an interval timer by a specified number 408 * of microseconds, which must be less than a second, 409 * i.e. < 1000000. If the timer expires, then reload 410 * it. In this case, carry over (usec - old value) to 411 * reduce the value reloaded into the timer so that 412 * the timer does not drift. This routine assumes 413 * that it is called in a context where the timers 414 * on which it is operating cannot change in value. 415 */ 416 int 417 itimerdecr(struct itimerval *itp, int usec) 418 { 419 if (itp->it_value.tv_usec < usec) { 420 if (itp->it_value.tv_sec == 0) { 421 /* expired, and already in next interval */ 422 usec -= itp->it_value.tv_usec; 423 goto expire; 424 } 425 itp->it_value.tv_usec += 1000000; 426 itp->it_value.tv_sec--; 427 } 428 itp->it_value.tv_usec -= usec; 429 usec = 0; 430 if (timevalisset(&itp->it_value)) 431 return (1); 432 /* expired, exactly at end of interval */ 433 expire: 434 if (timevalisset(&itp->it_interval)) { 435 itp->it_value = itp->it_interval; 436 itp->it_value.tv_usec -= usec; 437 if (itp->it_value.tv_usec < 0) { 438 itp->it_value.tv_usec += 1000000; 439 itp->it_value.tv_sec--; 440 } 441 } else 442 itp->it_value.tv_usec = 0; /* sec is already 0 */ 443 return (0); 444 } 445 446 /* 447 * Add and subtract routines for timevals. 448 * N.B.: subtract routine doesn't deal with 449 * results which are before the beginning, 450 * it just gets very confused in this case. 451 * Caveat emptor. 452 */ 453 void 454 timevaladd(struct timeval *t1, const struct timeval *t2) 455 { 456 t1->tv_sec += t2->tv_sec; 457 t1->tv_usec += t2->tv_usec; 458 timevalfix(t1); 459 } 460 461 void 462 timevalsub(struct timeval *t1, const struct timeval *t2) 463 { 464 t1->tv_sec -= t2->tv_sec; 465 t1->tv_usec -= t2->tv_usec; 466 timevalfix(t1); 467 } 468 469 static void 470 timevalfix(struct timeval *t1) 471 { 472 if (t1->tv_usec < 0) { 473 t1->tv_sec--; 474 t1->tv_usec += 1000000; 475 } 476 if (t1->tv_usec >= 1000000) { 477 t1->tv_sec++; 478 t1->tv_usec -= 1000000; 479 } 480 } 481 482 /* 483 * ratecheck(): simple time-based rate-limit checking. 484 */ 485 int 486 ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 487 { 488 struct timeval tv, delta; 489 int rv = 0; 490 491 getmicrouptime(&tv); /* NB: 10ms precision */ 492 delta = tv; 493 timevalsub(&delta, lasttime); 494 495 /* 496 * check for 0,0 is so that the message will be seen at least once, 497 * even if interval is huge. 498 */ 499 if (timevalcmp(&delta, mininterval, >=) || 500 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 501 *lasttime = tv; 502 rv = 1; 503 } 504 505 return (rv); 506 } 507 508 /* 509 * ppsratecheck(): packets (or events) per second limitation. 510 * 511 * Return 0 if the limit is to be enforced (e.g. the caller 512 * should drop a packet because of the rate limitation). 513 * 514 * maxpps of 0 always causes zero to be returned. maxpps of -1 515 * always causes 1 to be returned; this effectively defeats rate 516 * limiting. 517 * 518 * Note that we maintain the struct timeval for compatibility 519 * with other bsd systems. We reuse the storage and just monitor 520 * clock ticks for minimal overhead. 521 */ 522 int 523 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 524 { 525 int now; 526 527 /* 528 * Reset the last time and counter if this is the first call 529 * or more than a second has passed since the last update of 530 * lasttime. 531 */ 532 now = ticks; 533 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 534 lasttime->tv_sec = now; 535 *curpps = 1; 536 return (maxpps != 0); 537 } else { 538 (*curpps)++; /* NB: ignore potential overflow */ 539 return (maxpps < 0 || *curpps < maxpps); 540 } 541 } 542 543 /* 544 * Compute number of ticks in the specified amount of time. 545 */ 546 int 547 tvtohz(tv) 548 struct timeval *tv; 549 { 550 register unsigned long ticks; 551 register long sec, usec; 552 553 /* 554 * If the number of usecs in the whole seconds part of the time 555 * difference fits in a long, then the total number of usecs will 556 * fit in an unsigned long. Compute the total and convert it to 557 * ticks, rounding up and adding 1 to allow for the current tick 558 * to expire. Rounding also depends on unsigned long arithmetic 559 * to avoid overflow. 560 * 561 * Otherwise, if the number of ticks in the whole seconds part of 562 * the time difference fits in a long, then convert the parts to 563 * ticks separately and add, using similar rounding methods and 564 * overflow avoidance. This method would work in the previous 565 * case but it is slightly slower and assumes that hz is integral. 566 * 567 * Otherwise, round the time difference down to the maximum 568 * representable value. 569 * 570 * If ints have 32 bits, then the maximum value for any timeout in 571 * 10ms ticks is 248 days. 572 */ 573 sec = tv->tv_sec; 574 usec = tv->tv_usec; 575 if (usec < 0) { 576 sec--; 577 usec += 1000000; 578 } 579 if (sec < 0) { 580 #ifdef DIAGNOSTIC 581 if (usec > 0) { 582 sec++; 583 usec -= 1000000; 584 } 585 printf("tvotohz: negative time difference %ld sec %ld usec\n", 586 sec, usec); 587 #endif 588 ticks = 1; 589 } else if (sec <= LONG_MAX / 1000000) 590 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 591 / tick + 1; 592 else if (sec <= LONG_MAX / hz) 593 ticks = sec * hz 594 + ((unsigned long)usec + (tick - 1)) / tick + 1; 595 else 596 ticks = LONG_MAX; 597 if (ticks > INT_MAX) 598 ticks = INT_MAX; 599 return ((int)ticks); 600 } 601 602 int 603 copyin(const void *uaddr, void *kaddr, size_t len) 604 { 605 memcpy(kaddr, uaddr, len); 606 return (0); 607 } 608 609 int 610 copyout(const void *kaddr, void *uaddr, size_t len) 611 { 612 memcpy(uaddr, kaddr, len); 613 return (0); 614 } 615 616 #if 0 617 int 618 copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done) 619 { 620 size_t bytes; 621 622 bytes = strlcpy(kdaddr, kfaddr, len); 623 if (done != NULL) 624 *done = bytes; 625 626 return (0); 627 } 628 #endif 629 630 int 631 copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) 632 { 633 size_t bytes; 634 635 bytes = strlcpy(kaddr, uaddr, len); 636 if (done != NULL) 637 *done = bytes; 638 639 return (0); 640 } 641 642 int 643 copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) 644 { 645 u_int iovlen; 646 647 *iov = NULL; 648 if (iovcnt > UIO_MAXIOV) 649 return (error); 650 iovlen = iovcnt * sizeof (struct iovec); 651 *iov = malloc(iovlen, M_IOV, M_WAITOK); 652 error = copyin(iovp, *iov, iovlen); 653 if (error) { 654 free(*iov, M_IOV); 655 *iov = NULL; 656 } 657 return (error); 658 } 659 660 int 661 subyte(volatile void *base, int byte) 662 { 663 *(volatile char *)base = (uint8_t)byte; 664 return (0); 665 } 666 667 static inline int 668 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name) 669 { 670 /* Don't allow them to exceed max, but allow subtraction. */ 671 if (diff > 0 && max != 0) { 672 if (atomic_fetchadd_long(limit, (long)diff) + diff > max) { 673 atomic_subtract_long(limit, (long)diff); 674 return (0); 675 } 676 } else { 677 atomic_add_long(limit, (long)diff); 678 if (*limit < 0) 679 printf("negative %s for uid = %d\n", name, uip->ui_uid); 680 } 681 return (1); 682 } 683 684 /* 685 * Change the count associated with number of processes 686 * a given user is using. When 'max' is 0, don't enforce a limit 687 */ 688 int 689 chgproccnt(struct uidinfo *uip, int diff, rlim_t max) 690 { 691 return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt")); 692 } 693 694 /* 695 * Change the total socket buffer size a user has used. 696 */ 697 int 698 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max) 699 { 700 int diff, rv; 701 702 diff = to - *hiwat; 703 if (diff > 0 && max == 0) { 704 rv = 0; 705 } else { 706 rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize"); 707 if (rv != 0) 708 *hiwat = to; 709 } 710 return (rv); 711 } 712 713 /* 714 * Change the count associated with number of pseudo-terminals 715 * a given user is using. When 'max' is 0, don't enforce a limit 716 */ 717 int 718 chgptscnt(struct uidinfo *uip, int diff, rlim_t max) 719 { 720 return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt")); 721 } 722 723 int 724 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max) 725 { 726 return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt")); 727 } 728 729 int 730 chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max) 731 { 732 return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt")); 733 } 734 735 /* 736 * Allocate a new resource limits structure and initialize its 737 * reference count and mutex pointer. 738 */ 739 struct plimit * 740 lim_alloc() 741 { 742 struct plimit *limp; 743 744 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 745 refcount_init(&limp->pl_refcnt, 1); 746 return (limp); 747 } 748 749 struct plimit * 750 lim_hold(struct plimit *limp) 751 { 752 refcount_acquire(&limp->pl_refcnt); 753 return (limp); 754 } 755 756 #if 0 757 /* 758 * Return the current (soft) limit for a particular system resource. 759 * The which parameter which specifies the index into the rlimit array 760 */ 761 rlim_t 762 lim_cur(struct thread *td, int which) 763 { 764 struct rlimit rl; 765 766 lim_rlimit(td, which, &rl); 767 return (rl.rlim_cur); 768 } 769 #endif 770 771 rlim_t 772 lim_cur_proc(struct proc *p, int which) 773 { 774 struct rlimit rl; 775 776 lim_rlimit_proc(p, which, &rl); 777 return (rl.rlim_cur); 778 } 779 780 /* 781 * Return a copy of the entire rlimit structure for the system limit 782 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 783 */ 784 void 785 lim_rlimit(struct thread *td, int which, struct rlimit *rlp) 786 { 787 struct proc *p = td->td_proc; 788 789 MPASS(td == curthread); 790 KASSERT(which >= 0 && which < RLIM_NLIMITS, 791 ("request for invalid resource limit")); 792 *rlp = p->p_limit->pl_rlimit[which]; 793 if (p->p_sysent->sv_fixlimit != NULL) 794 p->p_sysent->sv_fixlimit(rlp, which); 795 } 796 797 void 798 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp) 799 { 800 PROC_LOCK_ASSERT(p, MA_OWNED); 801 KASSERT(which >= 0 && which < RLIM_NLIMITS, 802 ("request for invalid resource limit")); 803 *rlp = p->p_limit->pl_rlimit[which]; 804 if (p->p_sysent->sv_fixlimit != NULL) 805 p->p_sysent->sv_fixlimit(rlp, which); 806 } 807 808 int 809 useracc(void *addr, int len, int rw) 810 { 811 return (1); 812 } 813 814 struct pgrp * 815 pgfind(pid_t pgid) 816 { 817 return (NULL); 818 } 819 820 #if 0 821 struct proc * 822 zpfind(pid_t pid) 823 { 824 return (NULL); 825 } 826 #endif 827 828 int 829 p_cansee(struct thread *td, struct proc *p) 830 { 831 return (0); 832 } 833 834 struct proc * 835 pfind(pid_t pid) 836 { 837 return (NULL); 838 } 839 840 int 841 pget(pid_t pid, int flags, struct proc **pp) 842 { 843 return (ESRCH); 844 } 845 846 struct uidinfo uid0; 847 848 struct uidinfo * 849 uifind(uid_t uid) 850 { 851 return (&uid0); 852 } 853 854 /* 855 * Allocate a zeroed cred structure. 856 */ 857 struct ucred * 858 crget(void) 859 { 860 register struct ucred *cr; 861 862 cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); 863 refcount_init(&cr->cr_ref, 1); 864 865 return (cr); 866 } 867 868 /* 869 * Claim another reference to a ucred structure. 870 */ 871 struct ucred * 872 crhold(struct ucred *cr) 873 { 874 refcount_acquire(&cr->cr_ref); 875 return (cr); 876 } 877 878 /* 879 * Free a cred structure. Throws away space when ref count gets to 0. 880 */ 881 void 882 crfree(struct ucred *cr) 883 { 884 KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); 885 KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred")); 886 if (refcount_release(&cr->cr_ref)) { 887 888 free(cr, M_CRED); 889 } 890 } 891 892 /* 893 * Fill in a struct xucred based on a struct ucred. 894 */ 895 896 void 897 cru2x(struct ucred *cr, struct xucred *xcr) 898 { 899 #if 0 900 int ngroups; 901 902 bzero(xcr, sizeof(*xcr)); 903 xcr->cr_version = XUCRED_VERSION; 904 xcr->cr_uid = cr->cr_uid; 905 906 ngroups = MIN(cr->cr_ngroups, XU_NGROUPS); 907 xcr->cr_ngroups = ngroups; 908 bcopy(cr->cr_groups, xcr->cr_groups, 909 ngroups * sizeof(*cr->cr_groups)); 910 #endif 911 } 912 913 914 int 915 cr_cansee(struct ucred *u1, struct ucred *u2) 916 { 917 return (0); 918 } 919 920 int 921 cr_canseesocket(struct ucred *cred, struct socket *so) 922 { 923 return (0); 924 } 925 926 int 927 cr_canseeinpcb(struct ucred *cred, struct inpcb *inp) 928 { 929 return (0); 930 } 931 932 int 933 securelevel_gt(struct ucred *cr, int level) 934 { 935 return (0); 936 } 937 938 int 939 securelevel_ge(struct ucred *cr, int level) 940 { 941 return (0); 942 } 943 944 /** 945 * @brief Send a 'notification' to userland, using standard ways 946 */ 947 void 948 devctl_notify(const char *system, const char *subsystem, const char *type, 949 const char *data) 950 { 951 952 } 953 954 void 955 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 956 { 957 958 } 959 960 static void 961 configure_final(void *dummy) 962 { 963 cold = 0; 964 } 965 966 /* 967 * Send a SIGIO or SIGURG signal to a process or process group using stored 968 * credentials rather than those of the current process. 969 */ 970 void 971 pgsigio(sigiop, sig, checkctty) 972 struct sigio **sigiop; 973 int sig, checkctty; 974 { 975 panic("SIGIO not supported yet\n"); 976 #ifdef notyet 977 ksiginfo_t ksi; 978 struct sigio *sigio; 979 980 ksiginfo_init(&ksi); 981 ksi.ksi_signo = sig; 982 ksi.ksi_code = SI_KERNEL; 983 984 SIGIO_LOCK(); 985 sigio = *sigiop; 986 if (sigio == NULL) { 987 SIGIO_UNLOCK(); 988 return; 989 } 990 if (sigio->sio_pgid > 0) { 991 PROC_LOCK(sigio->sio_proc); 992 if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred)) 993 psignal(sigio->sio_proc, sig); 994 PROC_UNLOCK(sigio->sio_proc); 995 } else if (sigio->sio_pgid < 0) { 996 struct proc *p; 997 998 PGRP_LOCK(sigio->sio_pgrp); 999 LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) { 1000 PROC_LOCK(p); 1001 if (CANSIGIO(sigio->sio_ucred, p->p_ucred) && 1002 (checkctty == 0 || (p->p_flag & P_CONTROLT))) 1003 psignal(p, sig); 1004 PROC_UNLOCK(p); 1005 } 1006 PGRP_UNLOCK(sigio->sio_pgrp); 1007 } 1008 SIGIO_UNLOCK(); 1009 #endif 1010 } 1011 1012 void 1013 kproc_exit(int ecode) 1014 { 1015 panic("kproc_exit unsupported"); 1016 } 1017 1018 vm_offset_t 1019 kmem_malloc(vm_size_t bytes, int flags) 1020 { 1021 void *alloc = ff_mmap(NULL, bytes, ff_PROT_READ|ff_PROT_WRITE, ff_MAP_ANON|ff_MAP_PRIVATE, -1, 0); 1022 if ((flags & M_ZERO) && alloc != NULL) 1023 bzero(alloc, bytes); 1024 return ((vm_offset_t)alloc); 1025 } 1026 1027 void 1028 kmem_free(vm_offset_t addr, vm_size_t size) 1029 { 1030 ff_munmap((void *)addr, size); 1031 } 1032 1033 vm_offset_t 1034 kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, 1035 vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) 1036 { 1037 return (kmem_malloc(size, flags)); 1038 } 1039 1040 void 1041 malloc_init(void *data) 1042 { 1043 /* Nothing to do here */ 1044 } 1045 1046 1047 void 1048 malloc_uninit(void *data) 1049 { 1050 /* Nothing to do here */ 1051 } 1052 1053 void * 1054 malloc(unsigned long size, struct malloc_type *type, int flags) 1055 { 1056 void *alloc; 1057 1058 do { 1059 alloc = ff_malloc(size); 1060 if (alloc || !(flags & M_WAITOK)) 1061 break; 1062 1063 pause("malloc", hz/100); 1064 } while (alloc == NULL); 1065 1066 if ((flags & M_ZERO) && alloc != NULL) 1067 bzero(alloc, size); 1068 return (alloc); 1069 } 1070 1071 void 1072 free(void *addr, struct malloc_type *type) 1073 { 1074 ff_free(addr); 1075 } 1076 1077 void * 1078 realloc(void *addr, unsigned long size, struct malloc_type *type, 1079 int flags) 1080 { 1081 return (ff_realloc(addr, size)); 1082 } 1083 1084 void * 1085 reallocf(void *addr, unsigned long size, struct malloc_type *type, 1086 int flags) 1087 { 1088 void *mem; 1089 1090 if ((mem = ff_realloc(addr, size)) == NULL) 1091 ff_free(addr); 1092 1093 return (mem); 1094 } 1095 1096 void 1097 DELAY(int delay) 1098 { 1099 struct timespec rqt; 1100 1101 if (delay < 1000) 1102 return; 1103 1104 rqt.tv_nsec = 1000*((unsigned long)delay); 1105 rqt.tv_sec = 0; 1106 /* 1107 * FIXME: We shouldn't sleep in dpdk apps. 1108 */ 1109 //nanosleep(&rqt, NULL); 1110 } 1111 1112 void 1113 bwillwrite(void) 1114 { 1115 1116 } 1117 1118 off_t 1119 foffset_lock(struct file *fp, int flags) 1120 { 1121 struct mtx *mtxp; 1122 off_t res; 1123 1124 KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); 1125 1126 #if OFF_MAX <= LONG_MAX 1127 /* 1128 * Caller only wants the current f_offset value. Assume that 1129 * the long and shorter integer types reads are atomic. 1130 */ 1131 if ((flags & FOF_NOLOCK) != 0) 1132 return (fp->f_offset); 1133 #endif 1134 1135 /* 1136 * According to McKusick the vn lock was protecting f_offset here. 1137 * It is now protected by the FOFFSET_LOCKED flag. 1138 */ 1139 mtxp = mtx_pool_find(mtxpool_sleep, fp); 1140 mtx_lock(mtxp); 1141 /* 1142 if ((flags & FOF_NOLOCK) == 0) { 1143 while (fp->f_vnread_flags & FOFFSET_LOCKED) { 1144 fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; 1145 msleep(&fp->f_vnread_flags, mtxp, PUSER -1, 1146 "vofflock", 0); 1147 } 1148 fp->f_vnread_flags |= FOFFSET_LOCKED; 1149 } 1150 */ 1151 res = fp->f_offset; 1152 mtx_unlock(mtxp); 1153 return (res); 1154 } 1155 1156 #if 0 1157 void 1158 sf_ext_free(void *arg1, void *arg2) 1159 { 1160 panic("sf_ext_free not implemented.\n"); 1161 } 1162 1163 void 1164 sf_ext_free_nocache(void *arg1, void *arg2) 1165 { 1166 panic("sf_ext_free_nocache not implemented.\n"); 1167 } 1168 #endif 1169 1170 void 1171 sched_bind(struct thread *td, int cpu) 1172 { 1173 1174 } 1175 1176 void 1177 sched_unbind(struct thread* td) 1178 { 1179 1180 } 1181 1182 void 1183 getcredhostid(struct ucred *cred, unsigned long *hostid) 1184 { 1185 *hostid = 0; 1186 } 1187 1188 /* 1189 * Check if gid is a member of the group set. 1190 */ 1191 int 1192 groupmember(gid_t gid, struct ucred *cred) 1193 { 1194 int l; 1195 int h; 1196 int m; 1197 1198 if (cred->cr_groups[0] == gid) 1199 return(1); 1200 1201 /* 1202 * If gid was not our primary group, perform a binary search 1203 * of the supplemental groups. This is possible because we 1204 * sort the groups in crsetgroups(). 1205 */ 1206 l = 1; 1207 h = cred->cr_ngroups; 1208 while (l < h) { 1209 m = l + ((h - l) / 2); 1210 if (cred->cr_groups[m] < gid) 1211 l = m + 1; 1212 else 1213 h = m; 1214 } 1215 if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid)) 1216 return (1); 1217 1218 return (0); 1219 } 1220 1221 int 1222 vm_wait_doms(const domainset_t *wdoms, int mflags) 1223 { 1224 return 0; 1225 } 1226 1227 void 1228 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 1229 struct domainset_ref *dr, int *domain, int *flags) 1230 { 1231 *domain = 0; 1232 } 1233 1234 int 1235 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 1236 { 1237 //return (EJUSTRETURN); 1238 return 0; 1239 } 1240 1241 vm_offset_t 1242 kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags) 1243 { 1244 return (kmem_malloc(size, flags)); 1245 } 1246 1247 void * 1248 mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) 1249 { 1250 return (malloc(size * nmemb, type, flags)); 1251 } 1252 1253 void 1254 getcredhostuuid(struct ucred *cred, char *buf, size_t size) 1255 { 1256 mtx_lock(&cred->cr_prison->pr_mtx); 1257 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 1258 mtx_unlock(&cred->cr_prison->pr_mtx); 1259 } 1260 1261 void 1262 getjailname(struct ucred *cred, char *name, size_t len) 1263 { 1264 mtx_lock(&cred->cr_prison->pr_mtx); 1265 strlcpy(name, cred->cr_prison->pr_name, len); 1266 mtx_unlock(&cred->cr_prison->pr_mtx); 1267 } 1268 1269 void * 1270 malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds, 1271 int flags) 1272 { 1273 return (malloc(size, mtp, flags)); 1274 } 1275 1276 void * 1277 malloc_exec(size_t size, struct malloc_type *mtp, int flags) 1278 { 1279 1280 return (malloc(size, mtp, flags)); 1281 } 1282 1283 int 1284 bus_get_domain(device_t dev, int *domain) 1285 { 1286 return (-1); 1287 } 1288 1289 void 1290 cru2xt(struct thread *td, struct xucred *xcr) 1291 { 1292 cru2x(td->td_ucred, xcr); 1293 xcr->cr_pid = td->td_proc->p_pid; 1294 } 1295 1296 /* 1297 * Set socket peer credentials at connection time. 1298 * 1299 * The client's PCB credentials are copied from its process structure. The 1300 * server's PCB credentials are copied from the socket on which it called 1301 * listen(2). uipc_listen cached that process's credentials at the time. 1302 */ 1303 void 1304 unp_copy_peercred(struct thread *td, struct unpcb *client_unp, 1305 struct unpcb *server_unp, struct unpcb *listen_unp) 1306 { 1307 cru2xt(td, &client_unp->unp_peercred); 1308 client_unp->unp_flags |= UNP_HAVEPC; 1309 1310 memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred, 1311 sizeof(server_unp->unp_peercred)); 1312 server_unp->unp_flags |= UNP_HAVEPC; 1313 client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK); 1314 } 1315 1316 int 1317 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 1318 int flags) 1319 { 1320 return (0); 1321 } 1322 1323 void 1324 sched_prio(struct thread *td, u_char prio) 1325 { 1326 1327 } 1328 1329 /* 1330 * The machine independent parts of context switching. 1331 * 1332 * The thread lock is required on entry and is no longer held on return. 1333 */ 1334 void 1335 mi_switch(int flags) 1336 { 1337 1338 } 1339 1340 int 1341 sched_is_bound(struct thread *td) 1342 { 1343 return (1); 1344 } 1345 1346 /* 1347 * This function must not be called with-in read section. 1348 */ 1349 void 1350 ck_epoch_synchronize_wait(struct ck_epoch *global, 1351 ck_epoch_wait_cb_t *cb, void *ct) 1352 { 1353 1354 } 1355 1356 bool 1357 ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred) 1358 { 1359 return (true); 1360 } 1361 1362 void 1363 _ck_epoch_addref(struct ck_epoch_record *record, 1364 struct ck_epoch_section *section) 1365 { 1366 1367 } 1368 1369 bool 1370 _ck_epoch_delref(struct ck_epoch_record *record, 1371 struct ck_epoch_section *section) 1372 { 1373 return true; 1374 } 1375 1376 void 1377 ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record, 1378 void *ct) 1379 { 1380 1381 } 1382 1383 void 1384 ck_epoch_init(struct ck_epoch *global) 1385 { 1386 1387 } 1388 1389 #if 0 1390 void 1391 wakeup_any(const void *ident) 1392 { 1393 1394 } 1395 #endif 1396 1397 /* 1398 * kmem_bootstrap_free: 1399 * 1400 * Free pages backing preloaded data (e.g., kernel modules) to the 1401 * system. Currently only supported on platforms that create a 1402 * vm_phys segment for preloaded data. 1403 */ 1404 void 1405 kmem_bootstrap_free(vm_offset_t start, vm_size_t size) 1406 { 1407 1408 } 1409 1410 #if 0 1411 int 1412 elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) 1413 { 1414 return (0); 1415 } 1416 #endif 1417 1418 int 1419 pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot) 1420 { 1421 return 0; 1422 } 1423 1424 void * 1425 memset_early(void *buf, int c, size_t len) 1426 { 1427 return (memset(buf, c, len)); 1428 } 1429 1430 int 1431 elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, 1432 int type, elf_lookup_fn lookup) 1433 { 1434 return (0); 1435 } 1436 1437 bool 1438 elf_is_ifunc_reloc(Elf_Size r_info) 1439 { 1440 return (true); 1441 } 1442 1443 void 1444 sleepq_chains_remove_matching(bool (*matches)(struct thread *)) 1445 { 1446 1447 } 1448 1449 u_int 1450 vm_free_count(void) 1451 { 1452 return vm_dom[0].vmd_free_count; 1453 } 1454 1455 struct proc * 1456 pfind_any(pid_t pid) 1457 { 1458 return (curproc); 1459 } 1460 1461