1 /*- 2 * Copyright (c) 1995 Terrence R. Lambert 3 * All rights reserved. 4 * 5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)init_main.c 8.9 (Berkeley) 1/21/94 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include "opt_ddb.h" 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/exec.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/jail.h> 55 #include <sys/ktr.h> 56 #include <sys/lock.h> 57 #include <sys/mount.h> 58 #include <sys/mutex.h> 59 #include <sys/syscallsubr.h> 60 #include <sys/sysctl.h> 61 #include <sys/proc.h> 62 #include <sys/resourcevar.h> 63 #include <sys/systm.h> 64 #include <sys/signalvar.h> 65 #include <sys/sysent.h> 66 #include <sys/reboot.h> 67 #include <sys/sched.h> 68 #include <sys/sx.h> 69 #include <sys/sysproto.h> 70 #include <sys/vmmeter.h> 71 #include <sys/unistd.h> 72 #include <sys/malloc.h> 73 #include <sys/conf.h> 74 #include <sys/cpuset.h> 75 #include <sys/eventhandler.h> 76 77 #include <machine/cpu.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <sys/copyright.h> 87 88 #include <ddb/ddb.h> 89 #include <ddb/db_sym.h> 90 91 void mi_startup(void); /* Should be elsewhere */ 92 93 /* Components of the first process -- never freed. */ 94 struct proc proc0; 95 struct prison prison0; 96 struct thread0_storage thread0_st __aligned(16); 97 struct vmspace vmspace0; 98 struct proc *initproc; 99 #if 0 100 int boothowto = 0; /* initialized so that it can be patched */ 101 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, ""); 102 int bootverbose; 103 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, ""); 104 #endif 105 106 //#define VERBOSE_SYSINIT 107 108 109 /* 110 * This ensures that there is at least one entry so that the sysinit_set 111 * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never 112 * executed. 113 */ 114 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); 115 116 /* 117 * The sysinit table itself. Items are checked off as the are run. 118 * If we want to register new sysinit types, add them to newsysinit. 119 */ 120 SET_DECLARE(sysinit_set, struct sysinit); 121 struct sysinit **sysinit, **sysinit_end; 122 struct sysinit **newsysinit, **newsysinit_end; 123 124 /* 125 * Merge a new sysinit set into the current set, reallocating it if 126 * necessary. This can only be called after malloc is running. 127 */ 128 void 129 sysinit_add(struct sysinit **set, struct sysinit **set_end) 130 { 131 struct sysinit **newset; 132 struct sysinit **sipp; 133 struct sysinit **xipp; 134 int count; 135 136 count = set_end - set; 137 if (newsysinit) 138 count += newsysinit_end - newsysinit; 139 else 140 count += sysinit_end - sysinit; 141 newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); 142 if (newset == NULL) 143 panic("cannot malloc for sysinit"); 144 xipp = newset; 145 if (newsysinit) 146 for (sipp = newsysinit; sipp < newsysinit_end; sipp++) 147 *xipp++ = *sipp; 148 else 149 for (sipp = sysinit; sipp < sysinit_end; sipp++) 150 *xipp++ = *sipp; 151 for (sipp = set; sipp < set_end; sipp++) 152 *xipp++ = *sipp; 153 if (newsysinit) 154 free(newsysinit, M_TEMP); 155 newsysinit = newset; 156 newsysinit_end = newset + count; 157 } 158 159 160 /* 161 * System startup; initialize the world, create process 0, mount root 162 * filesystem, and fork to create init and pagedaemon. Most of the 163 * hard work is done in the lower-level initialization routines including 164 * startup(), which does memory initialization and autoconfiguration. 165 * 166 * This allows simple addition of new kernel subsystems that require 167 * boot time initialization. It also allows substitution of subsystem 168 * (for instance, a scheduler, kernel profiler, or VM system) by object 169 * module. Finally, it allows for optional "kernel threads". 170 */ 171 void 172 mi_startup(void) 173 { 174 175 register struct sysinit **sipp; /* system initialization*/ 176 register struct sysinit **xipp; /* interior loop of sort*/ 177 register struct sysinit *save; /* bubble*/ 178 struct sysinit **temp; 179 int size; 180 181 #ifdef VERBOSE_SYSINIT 182 int last; 183 int verbose; 184 #endif 185 186 if (sysinit == NULL) { 187 sysinit = SET_BEGIN(sysinit_set); 188 sysinit_end = SET_LIMIT(sysinit_set); 189 size = (uintptr_t)sysinit_end - (uintptr_t)sysinit; 190 temp = malloc(size, M_DEVBUF, M_WAITOK); 191 memcpy(temp, sysinit, size); 192 sysinit = temp; 193 sysinit_end = (struct sysinit **)(((uint8_t *)sysinit) + size); 194 } 195 196 restart: 197 /* 198 * Perform a bubble sort of the system initialization objects by 199 * their subsystem (primary key) and order (secondary key). 200 */ 201 for (sipp = sysinit; sipp < sysinit_end; sipp++) { 202 for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { 203 if ((*sipp)->subsystem < (*xipp)->subsystem || 204 ((*sipp)->subsystem == (*xipp)->subsystem && 205 (*sipp)->order <= (*xipp)->order)) 206 continue; /* skip*/ 207 save = *sipp; 208 *sipp = *xipp; 209 *xipp = save; 210 } 211 } 212 213 #ifdef VERBOSE_SYSINIT 214 last = SI_SUB_COPYRIGHT; 215 verbose = 0; 216 #ifndef DDB 217 printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); 218 #endif 219 #endif 220 221 /* 222 * Traverse the (now) ordered list of system initialization tasks. 223 * Perform each task, and continue on to the next task. 224 * 225 * The last item on the list is expected to be the scheduler, 226 * which will not return. 227 */ 228 for (sipp = sysinit; sipp < sysinit_end; sipp++) { 229 230 if ((*sipp)->subsystem == SI_SUB_DUMMY) 231 continue; /* skip dummy task(s)*/ 232 233 if ((*sipp)->subsystem == SI_SUB_DONE) 234 continue; 235 236 #ifdef VERBOSE_SYSINIT 237 if ((*sipp)->subsystem > last) { 238 verbose = 1; 239 last = (*sipp)->subsystem; 240 printf("subsystem %x\n", last); 241 } 242 if (verbose) { 243 #ifdef DDB 244 const char *name; 245 c_db_sym_t sym; 246 db_expr_t offset; 247 248 sym = db_search_symbol((vm_offset_t)(*sipp)->func, 249 DB_STGY_PROC, &offset); 250 db_symbol_values(sym, &name, NULL); 251 if (name != NULL) 252 printf(" %s(%p)... ", name, (*sipp)->udata); 253 else 254 #endif 255 printf(" %p(%p)... ", (*sipp)->func, 256 (*sipp)->udata); 257 } 258 #endif 259 260 /* Call function */ 261 (*((*sipp)->func))((*sipp)->udata); 262 263 #ifdef VERBOSE_SYSINIT 264 if (verbose) 265 printf("done.\n"); 266 #endif 267 268 /* Check off the one we're just done */ 269 (*sipp)->subsystem = SI_SUB_DONE; 270 271 /* Check if we've installed more sysinit items via KLD */ 272 if (newsysinit != NULL) { 273 if (sysinit != SET_BEGIN(sysinit_set)) 274 free(sysinit, M_TEMP); 275 sysinit = newsysinit; 276 sysinit_end = newsysinit_end; 277 newsysinit = NULL; 278 newsysinit_end = NULL; 279 goto restart; 280 } 281 } 282 283 } 284 285 static int 286 null_fetch_syscall_args(struct thread *td __unused) 287 { 288 289 panic("null_fetch_syscall_args"); 290 } 291 292 static void 293 null_set_syscall_retval(struct thread *td __unused, int error __unused) 294 { 295 296 panic("null_set_syscall_retval"); 297 } 298 299 struct sysentvec null_sysvec = { 300 .sv_size = 0, 301 .sv_table = NULL, 302 .sv_transtrap = NULL, 303 .sv_fixup = NULL, 304 .sv_sendsig = NULL, 305 .sv_sigcode = NULL, 306 .sv_szsigcode = NULL, 307 .sv_name = "null", 308 .sv_coredump = NULL, 309 .sv_imgact_try = NULL, 310 .sv_minsigstksz = 0, 311 .sv_minuser = VM_MIN_ADDRESS, 312 .sv_maxuser = VM_MAXUSER_ADDRESS, 313 .sv_usrstack = USRSTACK, 314 .sv_psstrings = PS_STRINGS, 315 .sv_stackprot = VM_PROT_ALL, 316 .sv_copyout_strings = NULL, 317 .sv_setregs = NULL, 318 .sv_fixlimit = NULL, 319 .sv_maxssiz = NULL, 320 .sv_flags = 0, 321 .sv_set_syscall_retval = null_set_syscall_retval, 322 .sv_fetch_syscall_args = null_fetch_syscall_args, 323 .sv_syscallnames = NULL, 324 .sv_schedtail = NULL, 325 .sv_thread_detach = NULL, 326 .sv_trap = NULL, 327 328 }; 329 330 /* 331 *************************************************************************** 332 **** 333 **** The two following SYSINIT's are proc0 specific glue code. I am not 334 **** convinced that they can not be safely combined, but their order of 335 **** operation has been maintained as the same as the original init_main.c 336 **** for right now. 337 **** 338 **** These probably belong in init_proc.c or kern_proc.c, since they 339 **** deal with proc0 (the fork template process). 340 **** 341 *************************************************************************** 342 */ 343 /* ARGSUSED*/ 344 static void 345 proc0_init(void *dummy __unused) 346 { 347 struct proc *p; 348 struct thread *td; 349 350 vm_paddr_t pageablemem; 351 int i; 352 353 GIANT_REQUIRED; 354 355 p = &proc0; 356 td = &thread0; 357 init_param1(); 358 init_param2(physmem); 359 360 /* 361 * Initialize magic number and osrel. 362 */ 363 p->p_magic = P_MAGIC; 364 365 #if 0 366 p->p_osrel = osreldate; 367 368 369 /* 370 * Initialize thread and process structures. 371 */ 372 procinit(); /* set up proc zone */ 373 threadinit(); /* set up UMA zones */ 374 375 /* 376 * Initialise scheduler resources. 377 * Add scheduler specific parts to proc, thread as needed. 378 */ 379 schedinit(); /* scheduler gets its house in order */ 380 /* 381 * Initialize sleep queue hash table 382 */ 383 sleepinit(); 384 385 /* 386 * additional VM structures 387 */ 388 vm_init2(); 389 390 /* 391 * Create process 0 (the swapper). 392 */ 393 LIST_INSERT_HEAD(&allproc, p, p_list); 394 LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); 395 mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); 396 p->p_pgrp = &pgrp0; 397 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 398 LIST_INIT(&pgrp0.pg_members); 399 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 400 401 pgrp0.pg_session = &session0; 402 403 mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); 404 refcount_init(&session0.s_count, 1); 405 session0.s_leader = p; 406 #endif 407 p->p_sysent = &null_sysvec; 408 p->p_flag = P_SYSTEM | P_INMEM; 409 p->p_state = PRS_NORMAL; 410 p->p_klist = knlist_alloc(&p->p_mtx); 411 STAILQ_INIT(&p->p_ktr); 412 p->p_nice = NZERO; 413 td->td_tid = PID_MAX + 1; 414 #if 0 415 LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); 416 #endif 417 td->td_state = TDS_RUNNING; 418 td->td_pri_class = PRI_TIMESHARE; 419 td->td_user_pri = PUSER; 420 td->td_base_user_pri = PUSER; 421 td->td_priority = PVM; 422 td->td_base_pri = PUSER; 423 td->td_oncpu = 0; 424 td->td_flags = TDF_INMEM|TDP_KTHREAD; 425 td->td_proc = p; 426 #if 0 427 td->td_cpuset = cpuset_thread0(); 428 prison0.pr_cpuset = cpuset_ref(td->td_cpuset); 429 #endif 430 p->p_peers = 0; 431 p->p_leader = p; 432 433 434 strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); 435 strncpy(td->td_name, "swapper", sizeof (td->td_name)); 436 437 callout_init(&p->p_itcallout, CALLOUT_MPSAFE); 438 callout_init_mtx(&p->p_limco, &p->p_mtx, 0); 439 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 440 441 /* Create credentials. */ 442 p->p_ucred = crget(); 443 p->p_ucred->cr_ngroups = 1; /* group 0 */ 444 p->p_ucred->cr_uidinfo = uifind(0); 445 p->p_ucred->cr_ruidinfo = uifind(0); 446 p->p_ucred->cr_prison = &prison0; 447 448 #ifdef AUDIT 449 audit_cred_kproc0(p->p_ucred); 450 #endif 451 #ifdef MAC 452 mac_cred_create_swapper(p->p_ucred); 453 #endif 454 455 td->td_ucred = crhold(p->p_ucred); 456 #if 0 457 458 /* Create sigacts. */ 459 p->p_sigacts = sigacts_alloc(); 460 461 /* Initialize signal state for process 0. */ 462 siginit(&proc0); 463 #endif 464 465 /* Create the file descriptor table. */ 466 p->p_fd = fdinit(NULL, false, NULL); 467 p->p_fdtol = NULL; 468 469 470 /* Create the limits structures. */ 471 p->p_limit = lim_alloc(); 472 for (i = 0; i < RLIM_NLIMITS; i++) 473 p->p_limit->pl_rlimit[i].rlim_cur = 474 p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; 475 p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = 476 p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 477 p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = 478 p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 479 p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 480 p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 481 p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 482 p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 483 /* Cast to avoid overflow on i386/PAE. */ 484 pageablemem = ptoa((vm_paddr_t)vm_free_count()); 485 p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = 486 p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; 487 p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; 488 p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; 489 p->p_cpulimit = RLIM_INFINITY; 490 491 #if 0 492 p->p_stats = pstats_alloc(); 493 494 /* Allocate a prototype map so we have something to fork. */ 495 pmap_pinit0(vmspace_pmap(&vmspace0)); 496 p->p_vmspace = &vmspace0; 497 vmspace0.vm_refcnt = 1; 498 499 /* 500 * proc0 is not expected to enter usermode, so there is no special 501 * handling for sv_minuser here, like is done for exec_new_vmspace(). 502 */ 503 vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), 504 p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); 505 #endif 506 507 /* 508 * Call the init and ctor for the new thread and proc. We wait 509 * to do this until all other structures are fairly sane. 510 */ 511 EVENTHANDLER_INVOKE(process_init, p); 512 EVENTHANDLER_INVOKE(thread_init, td); 513 EVENTHANDLER_INVOKE(process_ctor, p); 514 EVENTHANDLER_INVOKE(thread_ctor, td); 515 516 #if 0 517 /* 518 * Charge root for one process. 519 */ 520 (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); 521 #endif 522 } 523 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); 524 525 /* ARGSUSED*/ 526 static void 527 proc0_post(void *dummy __unused) 528 { 529 #if 0 530 struct timespec ts; 531 /* 532 * Give the ``random'' number generator a thump. 533 */ 534 nanotime(&ts); 535 srandom(ts.tv_sec ^ ts.tv_nsec); 536 #endif 537 } 538 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); 539