xref: /f-stack/lib/ff_glue.c (revision 22ce4aff)
1 /*
2  * Copyright (c) 2010 Kip Macy. All rights reserved.
3  * Copyright (C) 2017 THL A29 Limited, a Tencent company.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *   list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * Derived in part from libplebnet's pn_glue.c.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/event.h>
35 #include <sys/jail.h>
36 #include <sys/limits.h>
37 #include <sys/malloc.h>
38 #include <sys/refcount.h>
39 #include <sys/resourcevar.h>
40 #include <sys/sysctl.h>
41 #include <sys/sysent.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/time.h>
46 #include <sys/ucred.h>
47 #include <sys/uio.h>
48 #include <sys/param.h>
49 #include <sys/bus.h>
50 #include <sys/buf.h>
51 #include <sys/file.h>
52 #include <sys/vmem.h>
53 #include <sys/mbuf.h>
54 #include <sys/smp.h>
55 #include <sys/sched.h>
56 #include <sys/vmmeter.h>
57 #include <sys/unpcb.h>
58 #include <sys/eventfd.h>
59 #include <sys/linker.h>
60 #include <sys/sleepqueue.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_extern.h>
68 #include <vm/vm_domainset.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_pagequeue.h>
71 
72 #include <netinet/in_systm.h>
73 
74 #include <ck_epoch.h>
75 #include <ck_stack.h>
76 
77 #include "ff_host_interface.h"
78 
79 int kstack_pages = KSTACK_PAGES;
80 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
81     "Kernel stack size in pages");
82 
83 int __read_mostly vm_ndomains = 1;
84 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
85     &vm_ndomains, 0, "Number of physical memory domains available.");
86 
87 #ifndef MAXMEMDOM
88 #define MAXMEMDOM 1
89 #endif
90 
91 struct domainset __read_mostly domainset_fixed[MAXMEMDOM];
92 struct domainset __read_mostly domainset_prefer[MAXMEMDOM];
93 struct domainset __read_mostly domainset_roundrobin;
94 
95 struct vm_domain vm_dom[MAXMEMDOM];
96 
97 domainset_t __exclusive_cache_line vm_min_domains;
98 
99 int bootverbose;
100 
101 SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic");
102 
103 SYSCTL_ROOT_NODE(CTL_VFS, vfs, CTLFLAG_RW, 0, "File system");
104 
105 SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW, 0, "High kernel, proc, limits &c");
106 
107 SYSCTL_ROOT_NODE(CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)");
108 
109 SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent");
110 
111 SYSCTL_ROOT_NODE(CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory");
112 
113 SYSCTL_ROOT_NODE(CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging");
114 
115 SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0, "Security");
116 
117 SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
118 
119 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
120 
121 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
122 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
123 static MALLOC_DEFINE(M_CRED, "cred", "credentials");
124 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
125 
126 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
127 
128 static void configure_final(void *dummy);
129 
130 SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
131 
132 volatile int ticks;
133 int cpu_disable_deep_sleep;
134 
135 static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
136 
137 /* This is used in modules that need to work in both SMP and UP. */
138 cpuset_t all_cpus;
139 
140 int mp_ncpus = 1;
141 /* export this for libkvm consumers. */
142 int mp_maxcpus = MAXCPU;
143 
144 volatile int smp_started;
145 u_int mp_maxid;
146 
147 static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
148     "Kernel SMP");
149 
150 SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
151     "Max CPU ID.");
152 
153 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
154     0, "Max number of CPUs that the system was compiled for.");
155 
156 SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0,
157     sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode");
158 
159 int smp_disabled = 0;    /* has smp been disabled? */
160 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
161     &smp_disabled, 0, "SMP has been disabled from the loader");
162 
163 int smp_cpus = 1;    /* how many cpu's running */
164 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
165     "Number of CPUs online");
166 
167 int smp_topology = 0;    /* Which topology we're using. */
168 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
169     "Topology override setting; 0 is default provided by hardware.");
170 
171 u_int vn_lock_pair_pause_max = 1; // ff_global_cfg.freebsd.hz / 100;
172 SYSCTL_UINT(_debug, OID_AUTO, vn_lock_pair_pause_max, CTLFLAG_RW,
173     &vn_lock_pair_pause_max, 0,
174     "Max ticks for vn_lock_pair deadlock avoidance sleep");
175 
176 long first_page = 0;
177 
178 struct vmmeter vm_cnt;
179 vm_map_t kernel_map = 0;
180 vm_map_t kmem_map = 0;
181 
182 vmem_t *kernel_arena = NULL;
183 vmem_t *kmem_arena = NULL;
184 
185 struct vm_object kernel_object_store;
186 struct vm_object kmem_object_store;
187 
188 struct filterops fs_filtops;
189 struct filterops sig_filtops;
190 
191 int cold = 1;
192 
193 int unmapped_buf_allowed = 1;
194 
195 int cpu_deepest_sleep = 0;    /* Deepest Cx state available. */
196 int cpu_disable_c2_sleep = 0; /* Timer dies in C2. */
197 int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */
198 
199 u_char __read_frequently kdb_active = 0;
200 
201 static void timevalfix(struct timeval *);
202 
203 /* Extra care is taken with this sysctl because the data type is volatile */
204 static int
205 sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
206 {
207     int error, active;
208 
209     active = smp_started;
210     error = SYSCTL_OUT(req, &active, sizeof(active));
211     return (error);
212 }
213 
214 void
215 procinit()
216 {
217     sx_init(&allproc_lock, "allproc");
218     LIST_INIT(&allproc);
219 }
220 
221 
222 /*
223  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
224  */
225 struct prison *
226 prison_find_child(struct prison *mypr, int prid)
227 {
228     return (NULL);
229 }
230 
231 void
232 prison_free(struct prison *pr)
233 {
234 
235 }
236 
237 void
238 prison_hold_locked(struct prison *pr)
239 {
240 
241 }
242 
243 int
244 prison_if(struct ucred *cred, const struct sockaddr *sa)
245 {
246     return (0);
247 }
248 
249 int
250 prison_check_af(struct ucred *cred, int af)
251 {
252     return (0);
253 }
254 
255 int
256 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
257 {
258     return (0);
259 }
260 
261 int
262 prison_equal_ip4(struct prison *pr1, struct prison *pr2)
263 {
264     return (1);
265 }
266 
267 #ifdef INET6
268 int
269 prison_check_ip6(const struct ucred *cred, const struct in6_addr *ia)
270 {
271     return (0);
272 }
273 
274 int
275 prison_equal_ip6(struct prison *pr1, struct prison *pr2)
276 {
277     return (1);
278 }
279 #endif
280 
281 /*
282  * See if a prison has the specific flag set.
283  */
284 int
285 prison_flag(struct ucred *cred, unsigned flag)
286 {
287     /* This is an atomic read, so no locking is necessary. */
288     return (flag & PR_HOST);
289 }
290 
291 int
292 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
293 {
294     return (0);
295 }
296 
297 int
298 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
299 {
300     return (0);
301 }
302 
303 int
304 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
305 {
306     return (0);
307 }
308 
309 #ifdef INET6
310 int
311 prison_get_ip6(struct ucred *cred, struct in6_addr *ia)
312 {
313     return (0);
314 }
315 
316 int
317 prison_local_ip6(struct ucred *cred, struct in6_addr *ia, int other)
318 {
319     return (0);
320 }
321 
322 int
323 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia)
324 {
325     return (0);
326 }
327 #endif
328 
329 int
330 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
331 {
332     /* not jailed */
333     return (1);
334 }
335 
336 #ifdef INET6
337 int
338 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia)
339 {
340     /* not jailed */
341     return (1);
342 }
343 #endif
344 
345 #if 0
346 int
347 jailed(struct ucred *cred)
348 {
349     return (0);
350 }
351 #endif
352 
353 /*
354  * Return 1 if the passed credential is in a jail and that jail does not
355  * have its own virtual network stack, otherwise 0.
356  */
357 int
358 jailed_without_vnet(struct ucred *cred)
359 {
360     return (0);
361 }
362 
363 int
364 priv_check(struct thread *td, int priv)
365 {
366     return (0);
367 }
368 
369 int
370 priv_check_cred(struct ucred *cred, int priv)
371 {
372     return (0);
373 }
374 
375 
376 int
377 vslock(void *addr, size_t len)
378 {
379     return (0);
380 }
381 
382 void
383 vsunlock(void *addr, size_t len)
384 {
385 
386 }
387 
388 
389 /*
390  * Check that a proposed value to load into the .it_value or
391  * .it_interval part of an interval timer is acceptable, and
392  * fix it to have at least minimal value (i.e. if it is less
393  * than the resolution of the clock, round it up.)
394  */
395 int
396 itimerfix(struct timeval *tv)
397 {
398 
399     if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
400         return (EINVAL);
401     if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
402         tv->tv_usec = tick;
403     return (0);
404 }
405 
406 /*
407  * Decrement an interval timer by a specified number
408  * of microseconds, which must be less than a second,
409  * i.e. < 1000000.  If the timer expires, then reload
410  * it.  In this case, carry over (usec - old value) to
411  * reduce the value reloaded into the timer so that
412  * the timer does not drift.  This routine assumes
413  * that it is called in a context where the timers
414  * on which it is operating cannot change in value.
415  */
416 int
417 itimerdecr(struct itimerval *itp, int usec)
418 {
419     if (itp->it_value.tv_usec < usec) {
420         if (itp->it_value.tv_sec == 0) {
421             /* expired, and already in next interval */
422             usec -= itp->it_value.tv_usec;
423             goto expire;
424         }
425         itp->it_value.tv_usec += 1000000;
426         itp->it_value.tv_sec--;
427     }
428     itp->it_value.tv_usec -= usec;
429     usec = 0;
430     if (timevalisset(&itp->it_value))
431         return (1);
432     /* expired, exactly at end of interval */
433 expire:
434     if (timevalisset(&itp->it_interval)) {
435         itp->it_value = itp->it_interval;
436         itp->it_value.tv_usec -= usec;
437         if (itp->it_value.tv_usec < 0) {
438             itp->it_value.tv_usec += 1000000;
439             itp->it_value.tv_sec--;
440         }
441     } else
442         itp->it_value.tv_usec = 0;        /* sec is already 0 */
443     return (0);
444 }
445 
446 /*
447  * Add and subtract routines for timevals.
448  * N.B.: subtract routine doesn't deal with
449  * results which are before the beginning,
450  * it just gets very confused in this case.
451  * Caveat emptor.
452  */
453 void
454 timevaladd(struct timeval *t1, const struct timeval *t2)
455 {
456     t1->tv_sec += t2->tv_sec;
457     t1->tv_usec += t2->tv_usec;
458     timevalfix(t1);
459 }
460 
461 void
462 timevalsub(struct timeval *t1, const struct timeval *t2)
463 {
464     t1->tv_sec -= t2->tv_sec;
465     t1->tv_usec -= t2->tv_usec;
466     timevalfix(t1);
467 }
468 
469 static void
470 timevalfix(struct timeval *t1)
471 {
472     if (t1->tv_usec < 0) {
473         t1->tv_sec--;
474         t1->tv_usec += 1000000;
475     }
476     if (t1->tv_usec >= 1000000) {
477         t1->tv_sec++;
478         t1->tv_usec -= 1000000;
479     }
480 }
481 
482 /*
483  * ratecheck(): simple time-based rate-limit checking.
484  */
485 int
486 ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
487 {
488     struct timeval tv, delta;
489     int rv = 0;
490 
491     getmicrouptime(&tv);        /* NB: 10ms precision */
492     delta = tv;
493     timevalsub(&delta, lasttime);
494 
495     /*
496      * check for 0,0 is so that the message will be seen at least once,
497      * even if interval is huge.
498      */
499     if (timevalcmp(&delta, mininterval, >=) ||
500         (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
501         *lasttime = tv;
502         rv = 1;
503     }
504 
505     return (rv);
506 }
507 
508 /*
509  * ppsratecheck(): packets (or events) per second limitation.
510  *
511  * Return 0 if the limit is to be enforced (e.g. the caller
512  * should drop a packet because of the rate limitation).
513  *
514  * maxpps of 0 always causes zero to be returned.  maxpps of -1
515  * always causes 1 to be returned; this effectively defeats rate
516  * limiting.
517  *
518  * Note that we maintain the struct timeval for compatibility
519  * with other bsd systems.  We reuse the storage and just monitor
520  * clock ticks for minimal overhead.
521  */
522 int
523 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
524 {
525     int now;
526 
527     /*
528      * Reset the last time and counter if this is the first call
529      * or more than a second has passed since the last update of
530      * lasttime.
531      */
532     now = ticks;
533     if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
534         lasttime->tv_sec = now;
535         *curpps = 1;
536         return (maxpps != 0);
537     } else {
538         (*curpps)++;        /* NB: ignore potential overflow */
539         return (maxpps < 0 || *curpps < maxpps);
540     }
541 }
542 
543 /*
544  * Compute number of ticks in the specified amount of time.
545  */
546 int
547 tvtohz(tv)
548     struct timeval *tv;
549 {
550     register unsigned long ticks;
551     register long sec, usec;
552 
553     /*
554      * If the number of usecs in the whole seconds part of the time
555      * difference fits in a long, then the total number of usecs will
556      * fit in an unsigned long.  Compute the total and convert it to
557      * ticks, rounding up and adding 1 to allow for the current tick
558      * to expire.  Rounding also depends on unsigned long arithmetic
559      * to avoid overflow.
560      *
561      * Otherwise, if the number of ticks in the whole seconds part of
562      * the time difference fits in a long, then convert the parts to
563      * ticks separately and add, using similar rounding methods and
564      * overflow avoidance.  This method would work in the previous
565      * case but it is slightly slower and assumes that hz is integral.
566      *
567      * Otherwise, round the time difference down to the maximum
568      * representable value.
569      *
570      * If ints have 32 bits, then the maximum value for any timeout in
571      * 10ms ticks is 248 days.
572      */
573     sec = tv->tv_sec;
574     usec = tv->tv_usec;
575     if (usec < 0) {
576         sec--;
577         usec += 1000000;
578     }
579     if (sec < 0) {
580 #ifdef DIAGNOSTIC
581         if (usec > 0) {
582             sec++;
583             usec -= 1000000;
584         }
585         printf("tvotohz: negative time difference %ld sec %ld usec\n",
586                sec, usec);
587 #endif
588         ticks = 1;
589     } else if (sec <= LONG_MAX / 1000000)
590         ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
591             / tick + 1;
592     else if (sec <= LONG_MAX / hz)
593         ticks = sec * hz
594             + ((unsigned long)usec + (tick - 1)) / tick + 1;
595     else
596         ticks = LONG_MAX;
597     if (ticks > INT_MAX)
598         ticks = INT_MAX;
599     return ((int)ticks);
600 }
601 
602 int
603 copyin(const void *uaddr, void *kaddr, size_t len)
604 {
605     memcpy(kaddr, uaddr, len);
606     return (0);
607 }
608 
609 int
610 copyout(const void *kaddr, void *uaddr, size_t len)
611 {
612     memcpy(uaddr, kaddr, len);
613     return (0);
614 }
615 
616 #if 0
617 int
618 copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done)
619 {
620     size_t bytes;
621 
622     bytes = strlcpy(kdaddr, kfaddr, len);
623     if (done != NULL)
624         *done = bytes;
625 
626     return (0);
627 }
628 #endif
629 
630 int
631 copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
632 {
633     size_t bytes;
634 
635     bytes = strlcpy(kaddr, uaddr, len);
636     if (done != NULL)
637         *done = bytes;
638 
639     return (0);
640 }
641 
642 int
643 copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
644 {
645     u_int iovlen;
646 
647     *iov = NULL;
648     if (iovcnt > UIO_MAXIOV)
649         return (error);
650     iovlen = iovcnt * sizeof (struct iovec);
651     *iov = malloc(iovlen, M_IOV, M_WAITOK);
652     error = copyin(iovp, *iov, iovlen);
653     if (error) {
654         free(*iov, M_IOV);
655         *iov = NULL;
656     }
657     return (error);
658 }
659 
660 int
661 subyte(volatile void *base, int byte)
662 {
663     *(volatile char *)base = (uint8_t)byte;
664     return (0);
665 }
666 
667 static inline int
668 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
669 {
670     /* Don't allow them to exceed max, but allow subtraction. */
671     if (diff > 0 && max != 0) {
672         if (atomic_fetchadd_long(limit, (long)diff) + diff > max) {
673             atomic_subtract_long(limit, (long)diff);
674             return (0);
675         }
676     } else {
677         atomic_add_long(limit, (long)diff);
678         if (*limit < 0)
679             printf("negative %s for uid = %d\n", name, uip->ui_uid);
680     }
681     return (1);
682 }
683 
684 /*
685  * Change the count associated with number of processes
686  * a given user is using.  When 'max' is 0, don't enforce a limit
687  */
688 int
689 chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
690 {
691     return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
692 }
693 
694 /*
695  * Change the total socket buffer size a user has used.
696  */
697 int
698 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
699 {
700     int diff, rv;
701 
702     diff = to - *hiwat;
703     if (diff > 0 && max == 0) {
704         rv = 0;
705     } else {
706         rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
707         if (rv != 0)
708             *hiwat = to;
709     }
710     return (rv);
711 }
712 
713 /*
714  * Change the count associated with number of pseudo-terminals
715  * a given user is using.  When 'max' is 0, don't enforce a limit
716  */
717 int
718 chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
719 {
720     return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
721 }
722 
723 int
724 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
725 {
726     return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
727 }
728 
729 int
730 chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max)
731 {
732     return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt"));
733 }
734 
735 /*
736  * Allocate a new resource limits structure and initialize its
737  * reference count and mutex pointer.
738  */
739 struct plimit *
740 lim_alloc()
741 {
742     struct plimit *limp;
743 
744     limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
745     refcount_init(&limp->pl_refcnt, 1);
746     return (limp);
747 }
748 
749 struct plimit *
750 lim_hold(struct plimit *limp)
751 {
752     refcount_acquire(&limp->pl_refcnt);
753     return (limp);
754 }
755 
756 #if 0
757 /*
758  * Return the current (soft) limit for a particular system resource.
759  * The which parameter which specifies the index into the rlimit array
760  */
761 rlim_t
762 lim_cur(struct thread *td, int which)
763 {
764     struct rlimit rl;
765 
766     lim_rlimit(td, which, &rl);
767     return (rl.rlim_cur);
768 }
769 #endif
770 
771 rlim_t
772 lim_cur_proc(struct proc *p, int which)
773 {
774     struct rlimit rl;
775 
776     lim_rlimit_proc(p, which, &rl);
777     return (rl.rlim_cur);
778 }
779 
780 /*
781  * Return a copy of the entire rlimit structure for the system limit
782  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
783  */
784 void
785 lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
786 {
787     struct proc *p = td->td_proc;
788 
789     MPASS(td == curthread);
790     KASSERT(which >= 0 && which < RLIM_NLIMITS,
791         ("request for invalid resource limit"));
792     *rlp = p->p_limit->pl_rlimit[which];
793     if (p->p_sysent->sv_fixlimit != NULL)
794         p->p_sysent->sv_fixlimit(rlp, which);
795 }
796 
797 void
798 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
799 {
800     PROC_LOCK_ASSERT(p, MA_OWNED);
801     KASSERT(which >= 0 && which < RLIM_NLIMITS,
802         ("request for invalid resource limit"));
803     *rlp = p->p_limit->pl_rlimit[which];
804     if (p->p_sysent->sv_fixlimit != NULL)
805         p->p_sysent->sv_fixlimit(rlp, which);
806 }
807 
808 int
809 useracc(void *addr, int len, int rw)
810 {
811     return (1);
812 }
813 
814 struct pgrp *
815 pgfind(pid_t pgid)
816 {
817     return (NULL);
818 }
819 
820 #if 0
821 struct proc *
822 zpfind(pid_t pid)
823 {
824     return (NULL);
825 }
826 #endif
827 
828 int
829 p_cansee(struct thread *td, struct proc *p)
830 {
831     return (0);
832 }
833 
834 struct proc *
835 pfind(pid_t pid)
836 {
837     return (NULL);
838 }
839 
840 int
841 pget(pid_t pid, int flags, struct proc **pp)
842 {
843     return (ESRCH);
844 }
845 
846 struct uidinfo uid0;
847 
848 struct uidinfo *
849 uifind(uid_t uid)
850 {
851     return (&uid0);
852 }
853 
854 /*
855  * Allocate a zeroed cred structure.
856  */
857 struct ucred *
858 crget(void)
859 {
860     register struct ucred *cr;
861 
862     cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
863     refcount_init(&cr->cr_ref, 1);
864 
865     return (cr);
866 }
867 
868 /*
869  * Claim another reference to a ucred structure.
870  */
871 struct ucred *
872 crhold(struct ucred *cr)
873 {
874     refcount_acquire(&cr->cr_ref);
875     return (cr);
876 }
877 
878 /*
879  * Free a cred structure.  Throws away space when ref count gets to 0.
880  */
881 void
882 crfree(struct ucred *cr)
883 {
884     KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
885     KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
886     if (refcount_release(&cr->cr_ref)) {
887 
888         free(cr, M_CRED);
889     }
890 }
891 
892 /*
893  * Fill in a struct xucred based on a struct ucred.
894  */
895 
896 void
897 cru2x(struct ucred *cr, struct xucred *xcr)
898 {
899 #if 0
900     int ngroups;
901 
902     bzero(xcr, sizeof(*xcr));
903     xcr->cr_version = XUCRED_VERSION;
904     xcr->cr_uid = cr->cr_uid;
905 
906     ngroups = MIN(cr->cr_ngroups, XU_NGROUPS);
907     xcr->cr_ngroups = ngroups;
908     bcopy(cr->cr_groups, xcr->cr_groups,
909         ngroups * sizeof(*cr->cr_groups));
910 #endif
911 }
912 
913 
914 int
915 cr_cansee(struct ucred *u1, struct ucred *u2)
916 {
917     return (0);
918 }
919 
920 int
921 cr_canseesocket(struct ucred *cred, struct socket *so)
922 {
923     return (0);
924 }
925 
926 int
927 cr_canseeinpcb(struct ucred *cred, struct inpcb *inp)
928 {
929     return (0);
930 }
931 
932 int
933 securelevel_gt(struct ucred *cr, int level)
934 {
935     return (0);
936 }
937 
938 int
939 securelevel_ge(struct ucred *cr, int level)
940 {
941         return (0);
942 }
943 
944 /**
945  * @brief Send a 'notification' to userland, using standard ways
946  */
947 void
948 devctl_notify(const char *system, const char *subsystem, const char *type,
949     const char *data)
950 {
951 
952 }
953 
954 void
955 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
956 {
957 
958 }
959 
960 static void
961 configure_final(void *dummy)
962 {
963     cold = 0;
964 }
965 
966 /*
967  * Send a SIGIO or SIGURG signal to a process or process group using stored
968  * credentials rather than those of the current process.
969  */
970 void
971 pgsigio(sigiop, sig, checkctty)
972     struct sigio **sigiop;
973     int sig, checkctty;
974 {
975     panic("SIGIO not supported yet\n");
976 #ifdef notyet
977     ksiginfo_t ksi;
978     struct sigio *sigio;
979 
980     ksiginfo_init(&ksi);
981     ksi.ksi_signo = sig;
982     ksi.ksi_code = SI_KERNEL;
983 
984     SIGIO_LOCK();
985     sigio = *sigiop;
986     if (sigio == NULL) {
987         SIGIO_UNLOCK();
988         return;
989     }
990     if (sigio->sio_pgid > 0) {
991         PROC_LOCK(sigio->sio_proc);
992         if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred))
993             psignal(sigio->sio_proc, sig);
994         PROC_UNLOCK(sigio->sio_proc);
995     } else if (sigio->sio_pgid < 0) {
996         struct proc *p;
997 
998         PGRP_LOCK(sigio->sio_pgrp);
999         LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) {
1000             PROC_LOCK(p);
1001             if (CANSIGIO(sigio->sio_ucred, p->p_ucred) &&
1002                 (checkctty == 0 || (p->p_flag & P_CONTROLT)))
1003                 psignal(p, sig);
1004             PROC_UNLOCK(p);
1005         }
1006         PGRP_UNLOCK(sigio->sio_pgrp);
1007     }
1008     SIGIO_UNLOCK();
1009 #endif
1010 }
1011 
1012 void
1013 kproc_exit(int ecode)
1014 {
1015     panic("kproc_exit unsupported");
1016 }
1017 
1018 vm_offset_t
1019 kmem_malloc(vm_size_t bytes, int flags)
1020 {
1021     void *alloc = ff_mmap(NULL, bytes, ff_PROT_READ|ff_PROT_WRITE, ff_MAP_ANON|ff_MAP_PRIVATE, -1, 0);
1022     if ((flags & M_ZERO) && alloc != NULL)
1023         bzero(alloc, bytes);
1024     return ((vm_offset_t)alloc);
1025 }
1026 
1027 void
1028 kmem_free(vm_offset_t addr, vm_size_t size)
1029 {
1030     ff_munmap((void *)addr, size);
1031 }
1032 
1033 vm_offset_t
1034 kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low,
1035     vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr)
1036 {
1037     return (kmem_malloc(size, flags));
1038 }
1039 
1040 void
1041 malloc_init(void *data)
1042 {
1043     /* Nothing to do here */
1044 }
1045 
1046 
1047 void
1048 malloc_uninit(void *data)
1049 {
1050     /* Nothing to do here */
1051 }
1052 
1053 void *
1054 malloc(unsigned long size, struct malloc_type *type, int flags)
1055 {
1056     void *alloc;
1057 
1058     do {
1059         alloc = ff_malloc(size);
1060         if (alloc || !(flags & M_WAITOK))
1061             break;
1062 
1063         pause("malloc", hz/100);
1064     } while (alloc == NULL);
1065 
1066     if ((flags & M_ZERO) && alloc != NULL)
1067         bzero(alloc, size);
1068     return (alloc);
1069 }
1070 
1071 void
1072 free(void *addr, struct malloc_type *type)
1073 {
1074     ff_free(addr);
1075 }
1076 
1077 void *
1078 realloc(void *addr, unsigned long size, struct malloc_type *type,
1079     int flags)
1080 {
1081     return (ff_realloc(addr, size));
1082 }
1083 
1084 void *
1085 reallocf(void *addr, unsigned long size, struct malloc_type *type,
1086      int flags)
1087 {
1088     void *mem;
1089 
1090     if ((mem = ff_realloc(addr, size)) == NULL)
1091         ff_free(addr);
1092 
1093     return (mem);
1094 }
1095 
1096 void
1097 DELAY(int delay)
1098 {
1099     struct timespec rqt;
1100 
1101     if (delay < 1000)
1102         return;
1103 
1104     rqt.tv_nsec = 1000*((unsigned long)delay);
1105     rqt.tv_sec = 0;
1106     /*
1107      * FIXME: We shouldn't sleep in dpdk apps.
1108      */
1109     //nanosleep(&rqt, NULL);
1110 }
1111 
1112 void
1113 bwillwrite(void)
1114 {
1115 
1116 }
1117 
1118 off_t
1119 foffset_lock(struct file *fp, int flags)
1120 {
1121     struct mtx *mtxp;
1122     off_t res;
1123 
1124     KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
1125 
1126 #if OFF_MAX <= LONG_MAX
1127     /*
1128      * Caller only wants the current f_offset value.  Assume that
1129      * the long and shorter integer types reads are atomic.
1130      */
1131     if ((flags & FOF_NOLOCK) != 0)
1132         return (fp->f_offset);
1133 #endif
1134 
1135     /*
1136      * According to McKusick the vn lock was protecting f_offset here.
1137      * It is now protected by the FOFFSET_LOCKED flag.
1138      */
1139     mtxp = mtx_pool_find(mtxpool_sleep, fp);
1140     mtx_lock(mtxp);
1141     /*
1142     if ((flags & FOF_NOLOCK) == 0) {
1143         while (fp->f_vnread_flags & FOFFSET_LOCKED) {
1144             fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
1145             msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
1146                 "vofflock", 0);
1147         }
1148         fp->f_vnread_flags |= FOFFSET_LOCKED;
1149     }
1150     */
1151     res = fp->f_offset;
1152     mtx_unlock(mtxp);
1153     return (res);
1154 }
1155 
1156 #if 0
1157 void
1158 sf_ext_free(void *arg1, void *arg2)
1159 {
1160     panic("sf_ext_free not implemented.\n");
1161 }
1162 
1163 void
1164 sf_ext_free_nocache(void *arg1, void *arg2)
1165 {
1166     panic("sf_ext_free_nocache not implemented.\n");
1167 }
1168 #endif
1169 
1170 void
1171 sched_bind(struct thread *td, int cpu)
1172 {
1173 
1174 }
1175 
1176 void
1177 sched_unbind(struct thread* td)
1178 {
1179 
1180 }
1181 
1182 void
1183 getcredhostid(struct ucred *cred, unsigned long *hostid)
1184 {
1185     *hostid = 0;
1186 }
1187 
1188 /*
1189  * Check if gid is a member of the group set.
1190  */
1191 int
1192 groupmember(gid_t gid, struct ucred *cred)
1193 {
1194     int l;
1195     int h;
1196     int m;
1197 
1198     if (cred->cr_groups[0] == gid)
1199         return(1);
1200 
1201     /*
1202      * If gid was not our primary group, perform a binary search
1203      * of the supplemental groups.  This is possible because we
1204      * sort the groups in crsetgroups().
1205      */
1206     l = 1;
1207     h = cred->cr_ngroups;
1208     while (l < h) {
1209         m = l + ((h - l) / 2);
1210         if (cred->cr_groups[m] < gid)
1211             l = m + 1;
1212         else
1213             h = m;
1214     }
1215     if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid))
1216         return (1);
1217 
1218     return (0);
1219 }
1220 
1221 int
1222 vm_wait_doms(const domainset_t *wdoms, int mflags)
1223 {
1224     return 0;
1225 }
1226 
1227 void
1228 vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
1229     struct domainset_ref *dr, int *domain, int *flags)
1230 {
1231     *domain = 0;
1232 }
1233 
1234 int
1235 vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
1236 {
1237     //return (EJUSTRETURN);
1238     return 0;
1239 }
1240 
1241 vm_offset_t
1242 kmem_malloc_domainset(struct domainset *ds, vm_size_t size, int flags)
1243 {
1244     return (kmem_malloc(size, flags));
1245 }
1246 
1247 void *
1248 mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags)
1249 {
1250     return (malloc(size * nmemb, type, flags));
1251 }
1252 
1253 void
1254 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
1255 {
1256     mtx_lock(&cred->cr_prison->pr_mtx);
1257     strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
1258     mtx_unlock(&cred->cr_prison->pr_mtx);
1259 }
1260 
1261 void
1262 getjailname(struct ucred *cred, char *name, size_t len)
1263 {
1264     mtx_lock(&cred->cr_prison->pr_mtx);
1265     strlcpy(name, cred->cr_prison->pr_name, len);
1266     mtx_unlock(&cred->cr_prison->pr_mtx);
1267 }
1268 
1269 void *
1270 malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds,
1271     int flags)
1272 {
1273     return (malloc(size, mtp, flags));
1274 }
1275 
1276 void *
1277 malloc_exec(size_t size, struct malloc_type *mtp, int flags)
1278 {
1279 
1280     return (malloc(size, mtp, flags));
1281 }
1282 
1283 int
1284 bus_get_domain(device_t dev, int *domain)
1285 {
1286     return (-1);
1287 }
1288 
1289 void
1290 cru2xt(struct thread *td, struct xucred *xcr)
1291 {
1292     cru2x(td->td_ucred, xcr);
1293     xcr->cr_pid = td->td_proc->p_pid;
1294 }
1295 
1296 /*
1297  * Set socket peer credentials at connection time.
1298  *
1299  * The client's PCB credentials are copied from its process structure.  The
1300  * server's PCB credentials are copied from the socket on which it called
1301  * listen(2).  uipc_listen cached that process's credentials at the time.
1302  */
1303 void
1304 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
1305     struct unpcb *server_unp, struct unpcb *listen_unp)
1306 {
1307     cru2xt(td, &client_unp->unp_peercred);
1308     client_unp->unp_flags |= UNP_HAVEPC;
1309 
1310     memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
1311         sizeof(server_unp->unp_peercred));
1312     server_unp->unp_flags |= UNP_HAVEPC;
1313     client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
1314 }
1315 
1316 int
1317 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
1318     int flags)
1319 {
1320     return (0);
1321 }
1322 
1323 void
1324 sched_prio(struct thread *td, u_char prio)
1325 {
1326 
1327 }
1328 
1329 /*
1330  * The machine independent parts of context switching.
1331  *
1332  * The thread lock is required on entry and is no longer held on return.
1333  */
1334 void
1335 mi_switch(int flags)
1336 {
1337 
1338 }
1339 
1340 int
1341 sched_is_bound(struct thread *td)
1342 {
1343     return (1);
1344 }
1345 
1346 /*
1347  * This function must not be called with-in read section.
1348  */
1349 void
1350 ck_epoch_synchronize_wait(struct ck_epoch *global,
1351     ck_epoch_wait_cb_t *cb, void *ct)
1352 {
1353 
1354 }
1355 
1356 bool
1357 ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred)
1358 {
1359     return (true);
1360 }
1361 
1362 void
1363 _ck_epoch_addref(struct ck_epoch_record *record,
1364     struct ck_epoch_section *section)
1365 {
1366 
1367 }
1368 
1369 bool
1370 _ck_epoch_delref(struct ck_epoch_record *record,
1371     struct ck_epoch_section *section)
1372 {
1373     return true;
1374 }
1375 
1376 void
1377 ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record,
1378     void *ct)
1379 {
1380 
1381 }
1382 
1383 void
1384 ck_epoch_init(struct ck_epoch *global)
1385 {
1386 
1387 }
1388 
1389 #if 0
1390 void
1391 wakeup_any(const void *ident)
1392 {
1393 
1394 }
1395 #endif
1396 
1397 /*
1398  * kmem_bootstrap_free:
1399  *
1400  * Free pages backing preloaded data (e.g., kernel modules) to the
1401  * system.  Currently only supported on platforms that create a
1402  * vm_phys segment for preloaded data.
1403  */
1404 void
1405 kmem_bootstrap_free(vm_offset_t start, vm_size_t size)
1406 {
1407 
1408 }
1409 
1410 #if 0
1411 int
1412 elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
1413 {
1414     return (0);
1415 }
1416 #endif
1417 
1418 int
1419 pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot)
1420 {
1421     return 0;
1422 }
1423 
1424 void *
1425 memset_early(void *buf, int c, size_t len)
1426 {
1427     return (memset(buf, c, len));
1428 }
1429 
1430 int
1431 elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data,
1432     int type, elf_lookup_fn lookup)
1433 {
1434     return (0);
1435 }
1436 
1437 bool
1438 elf_is_ifunc_reloc(Elf_Size r_info)
1439 {
1440     return (true);
1441 }
1442 
1443 void
1444 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
1445 {
1446 
1447 }
1448 
1449 u_int
1450 vm_free_count(void)
1451 {
1452     return vm_dom[0].vmd_free_count;
1453 }
1454 
1455 struct proc *
1456 pfind_any(pid_t pid)
1457 {
1458     return (curproc);
1459 }
1460 
1461