1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 Søren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_emul.h> 80 #include <compat/linux/linux_futex.h> 81 #include <compat/linux/linux_ioctl.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_misc.h> 84 #include <compat/linux/linux_signal.h> 85 #include <compat/linux/linux_util.h> 86 #include <compat/linux/linux_vdso.h> 87 88 MODULE_VERSION(linux, 1); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 const char *linux_kplatform; 112 static int linux_szsigcode; 113 static vm_object_t linux_shared_page_obj; 114 static char *linux_shared_page_mapping; 115 extern char _binary_linux32_locore_o_start; 116 extern char _binary_linux32_locore_o_end; 117 118 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 119 120 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 121 122 static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124 static register_t *linux_copyout_strings(struct image_params *imgp); 125 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126 static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128 static void linux32_fixlimit(struct rlimit *rl, int which); 129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130 static void linux_vdso_install(void *param); 131 static void linux_vdso_deinstall(void *param); 132 133 /* 134 * Linux syscalls return negative errno's, we do positive and map them 135 * Reference: 136 * FreeBSD: src/sys/sys/errno.h 137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 138 * linux-2.6.17.8/include/asm-generic/errno.h 139 */ 140 static int bsd_to_linux_errno[ELAST + 1] = { 141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 150 -72, -67, -71 151 }; 152 153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 161 0, LINUX_SIGUSR1, LINUX_SIGUSR2 162 }; 163 164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 165 SIGHUP, SIGINT, SIGQUIT, SIGILL, 166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 172 SIGIO, SIGURG, SIGSYS 173 }; 174 175 #define LINUX_T_UNKNOWN 255 176 static int _bsd_to_linux_trapcode[] = { 177 LINUX_T_UNKNOWN, /* 0 */ 178 6, /* 1 T_PRIVINFLT */ 179 LINUX_T_UNKNOWN, /* 2 */ 180 3, /* 3 T_BPTFLT */ 181 LINUX_T_UNKNOWN, /* 4 */ 182 LINUX_T_UNKNOWN, /* 5 */ 183 16, /* 6 T_ARITHTRAP */ 184 254, /* 7 T_ASTFLT */ 185 LINUX_T_UNKNOWN, /* 8 */ 186 13, /* 9 T_PROTFLT */ 187 1, /* 10 T_TRCTRAP */ 188 LINUX_T_UNKNOWN, /* 11 */ 189 14, /* 12 T_PAGEFLT */ 190 LINUX_T_UNKNOWN, /* 13 */ 191 17, /* 14 T_ALIGNFLT */ 192 LINUX_T_UNKNOWN, /* 15 */ 193 LINUX_T_UNKNOWN, /* 16 */ 194 LINUX_T_UNKNOWN, /* 17 */ 195 0, /* 18 T_DIVIDE */ 196 2, /* 19 T_NMI */ 197 4, /* 20 T_OFLOW */ 198 5, /* 21 T_BOUND */ 199 7, /* 22 T_DNA */ 200 8, /* 23 T_DOUBLEFLT */ 201 9, /* 24 T_FPOPFLT */ 202 10, /* 25 T_TSSFLT */ 203 11, /* 26 T_SEGNPFLT */ 204 12, /* 27 T_STKFLT */ 205 18, /* 28 T_MCHK */ 206 19, /* 29 T_XMMFLT */ 207 15 /* 30 T_RESERVED */ 208 }; 209 #define bsd_to_linux_trapcode(code) \ 210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 211 _bsd_to_linux_trapcode[(code)]: \ 212 LINUX_T_UNKNOWN) 213 214 struct linux32_ps_strings { 215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 216 u_int ps_nargvstr; /* the number of argument strings */ 217 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 218 u_int ps_nenvstr; /* the number of environment strings */ 219 }; 220 221 LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 222 LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 223 LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 224 LINUX_VDSO_SYM_CHAR(linux_platform); 225 226 /* 227 * If FreeBSD & Linux have a difference of opinion about what a trap 228 * means, deal with it here. 229 * 230 * MPSAFE 231 */ 232 static int 233 translate_traps(int signal, int trap_code) 234 { 235 if (signal != SIGBUS) 236 return signal; 237 switch (trap_code) { 238 case T_PROTFLT: 239 case T_TSSFLT: 240 case T_DOUBLEFLT: 241 case T_PAGEFLT: 242 return SIGSEGV; 243 default: 244 return signal; 245 } 246 } 247 248 static int 249 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 250 { 251 Elf32_Auxargs *args; 252 Elf32_Addr *base; 253 Elf32_Addr *pos; 254 struct linux32_ps_strings *arginfo; 255 256 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 257 258 KASSERT(curthread->td_proc == imgp->proc, 259 ("unsafe elf_linux_fixup(), should be curproc")); 260 base = (Elf32_Addr *)*stack_base; 261 args = (Elf32_Auxargs *)imgp->auxargs; 262 pos = base + (imgp->args->argc + imgp->args->envc + 2); 263 264 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 265 imgp->proc->p_sysent->sv_shared_page_base); 266 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 267 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 268 269 /* 270 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 271 * as it has appeared in the 2.4.0-rc7 first time. 272 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 273 * glibc falls back to the hard-coded CLK_TCK value when aux entry 274 * is not present. 275 * Also see linux_times() implementation. 276 */ 277 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 278 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 279 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 280 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 281 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 282 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 283 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 284 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 285 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 286 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 287 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 288 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 289 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 290 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 291 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 292 if (args->execfd != -1) 293 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 294 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 295 296 free(imgp->auxargs, M_TEMP); 297 imgp->auxargs = NULL; 298 299 base--; 300 suword32(base, (uint32_t)imgp->args->argc); 301 *stack_base = (register_t *)base; 302 return (0); 303 } 304 305 static void 306 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 307 { 308 struct thread *td = curthread; 309 struct proc *p = td->td_proc; 310 struct sigacts *psp; 311 struct trapframe *regs; 312 struct l_rt_sigframe *fp, frame; 313 int oonstack; 314 int sig; 315 int code; 316 317 sig = ksi->ksi_signo; 318 code = ksi->ksi_code; 319 PROC_LOCK_ASSERT(p, MA_OWNED); 320 psp = p->p_sigacts; 321 mtx_assert(&psp->ps_mtx, MA_OWNED); 322 regs = td->td_frame; 323 oonstack = sigonstack(regs->tf_rsp); 324 325 #ifdef DEBUG 326 if (ldebug(rt_sendsig)) 327 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 328 catcher, sig, (void*)mask, code); 329 #endif 330 /* 331 * Allocate space for the signal handler context. 332 */ 333 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 334 SIGISMEMBER(psp->ps_sigonstack, sig)) { 335 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 336 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 337 } else 338 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 339 mtx_unlock(&psp->ps_mtx); 340 341 /* 342 * Build the argument list for the signal handler. 343 */ 344 if (p->p_sysent->sv_sigtbl) 345 if (sig <= p->p_sysent->sv_sigsize) 346 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 347 348 bzero(&frame, sizeof(frame)); 349 350 frame.sf_handler = PTROUT(catcher); 351 frame.sf_sig = sig; 352 frame.sf_siginfo = PTROUT(&fp->sf_si); 353 frame.sf_ucontext = PTROUT(&fp->sf_sc); 354 355 /* Fill in POSIX parts */ 356 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 357 358 /* 359 * Build the signal context to be used by sigreturn 360 * and libgcc unwind. 361 */ 362 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 363 frame.sf_sc.uc_link = 0; /* XXX ??? */ 364 365 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 366 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 367 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 368 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 369 PROC_UNLOCK(p); 370 371 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 372 373 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 374 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 375 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 376 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 377 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 378 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 379 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 380 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 381 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 382 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 383 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 384 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 385 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 386 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 387 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 388 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 389 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 390 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 391 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 392 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 393 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 394 395 #ifdef DEBUG 396 if (ldebug(rt_sendsig)) 397 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 398 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 399 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 400 #endif 401 402 if (copyout(&frame, fp, sizeof(frame)) != 0) { 403 /* 404 * Process has trashed its stack; give it an illegal 405 * instruction to halt it in its tracks. 406 */ 407 #ifdef DEBUG 408 if (ldebug(rt_sendsig)) 409 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 410 fp, oonstack); 411 #endif 412 PROC_LOCK(p); 413 sigexit(td, SIGILL); 414 } 415 416 /* 417 * Build context to run handler in. 418 */ 419 regs->tf_rsp = PTROUT(fp); 420 regs->tf_rip = linux32_rt_sigcode; 421 regs->tf_rflags &= ~(PSL_T | PSL_D); 422 regs->tf_cs = _ucode32sel; 423 regs->tf_ss = _udatasel; 424 regs->tf_ds = _udatasel; 425 regs->tf_es = _udatasel; 426 regs->tf_fs = _ufssel; 427 regs->tf_gs = _ugssel; 428 regs->tf_flags = TF_HASSEGS; 429 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 430 PROC_LOCK(p); 431 mtx_lock(&psp->ps_mtx); 432 } 433 434 435 /* 436 * Send an interrupt to process. 437 * 438 * Stack is set up to allow sigcode stored 439 * in u. to call routine, followed by kcall 440 * to sigreturn routine below. After sigreturn 441 * resets the signal mask, the stack, and the 442 * frame pointer, it returns to the user 443 * specified pc, psl. 444 */ 445 static void 446 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 447 { 448 struct thread *td = curthread; 449 struct proc *p = td->td_proc; 450 struct sigacts *psp; 451 struct trapframe *regs; 452 struct l_sigframe *fp, frame; 453 l_sigset_t lmask; 454 int oonstack, i; 455 int sig, code; 456 457 sig = ksi->ksi_signo; 458 code = ksi->ksi_code; 459 PROC_LOCK_ASSERT(p, MA_OWNED); 460 psp = p->p_sigacts; 461 mtx_assert(&psp->ps_mtx, MA_OWNED); 462 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 463 /* Signal handler installed with SA_SIGINFO. */ 464 linux_rt_sendsig(catcher, ksi, mask); 465 return; 466 } 467 468 regs = td->td_frame; 469 oonstack = sigonstack(regs->tf_rsp); 470 471 #ifdef DEBUG 472 if (ldebug(sendsig)) 473 printf(ARGS(sendsig, "%p, %d, %p, %u"), 474 catcher, sig, (void*)mask, code); 475 #endif 476 477 /* 478 * Allocate space for the signal handler context. 479 */ 480 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 481 SIGISMEMBER(psp->ps_sigonstack, sig)) { 482 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 483 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 484 } else 485 fp = (struct l_sigframe *)regs->tf_rsp - 1; 486 mtx_unlock(&psp->ps_mtx); 487 PROC_UNLOCK(p); 488 489 /* 490 * Build the argument list for the signal handler. 491 */ 492 if (p->p_sysent->sv_sigtbl) 493 if (sig <= p->p_sysent->sv_sigsize) 494 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 495 496 bzero(&frame, sizeof(frame)); 497 498 frame.sf_handler = PTROUT(catcher); 499 frame.sf_sig = sig; 500 501 bsd_to_linux_sigset(mask, &lmask); 502 503 /* 504 * Build the signal context to be used by sigreturn. 505 */ 506 frame.sf_sc.sc_mask = lmask.__bits[0]; 507 frame.sf_sc.sc_gs = regs->tf_gs; 508 frame.sf_sc.sc_fs = regs->tf_fs; 509 frame.sf_sc.sc_es = regs->tf_es; 510 frame.sf_sc.sc_ds = regs->tf_ds; 511 frame.sf_sc.sc_edi = regs->tf_rdi; 512 frame.sf_sc.sc_esi = regs->tf_rsi; 513 frame.sf_sc.sc_ebp = regs->tf_rbp; 514 frame.sf_sc.sc_ebx = regs->tf_rbx; 515 frame.sf_sc.sc_esp = regs->tf_rsp; 516 frame.sf_sc.sc_edx = regs->tf_rdx; 517 frame.sf_sc.sc_ecx = regs->tf_rcx; 518 frame.sf_sc.sc_eax = regs->tf_rax; 519 frame.sf_sc.sc_eip = regs->tf_rip; 520 frame.sf_sc.sc_cs = regs->tf_cs; 521 frame.sf_sc.sc_eflags = regs->tf_rflags; 522 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 523 frame.sf_sc.sc_ss = regs->tf_ss; 524 frame.sf_sc.sc_err = regs->tf_err; 525 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 526 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 527 528 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 529 frame.sf_extramask[i] = lmask.__bits[i+1]; 530 531 if (copyout(&frame, fp, sizeof(frame)) != 0) { 532 /* 533 * Process has trashed its stack; give it an illegal 534 * instruction to halt it in its tracks. 535 */ 536 PROC_LOCK(p); 537 sigexit(td, SIGILL); 538 } 539 540 /* 541 * Build context to run handler in. 542 */ 543 regs->tf_rsp = PTROUT(fp); 544 regs->tf_rip = linux32_sigcode; 545 regs->tf_rflags &= ~(PSL_T | PSL_D); 546 regs->tf_cs = _ucode32sel; 547 regs->tf_ss = _udatasel; 548 regs->tf_ds = _udatasel; 549 regs->tf_es = _udatasel; 550 regs->tf_fs = _ufssel; 551 regs->tf_gs = _ugssel; 552 regs->tf_flags = TF_HASSEGS; 553 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 554 PROC_LOCK(p); 555 mtx_lock(&psp->ps_mtx); 556 } 557 558 /* 559 * System call to cleanup state after a signal 560 * has been taken. Reset signal mask and 561 * stack state from context left by sendsig (above). 562 * Return to previous pc and psl as specified by 563 * context left by sendsig. Check carefully to 564 * make sure that the user has not modified the 565 * psl to gain improper privileges or to cause 566 * a machine fault. 567 */ 568 int 569 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 570 { 571 struct l_sigframe frame; 572 struct trapframe *regs; 573 sigset_t bmask; 574 l_sigset_t lmask; 575 int eflags, i; 576 ksiginfo_t ksi; 577 578 regs = td->td_frame; 579 580 #ifdef DEBUG 581 if (ldebug(sigreturn)) 582 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 583 #endif 584 /* 585 * The trampoline code hands us the sigframe. 586 * It is unsafe to keep track of it ourselves, in the event that a 587 * program jumps out of a signal handler. 588 */ 589 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 590 return (EFAULT); 591 592 /* 593 * Check for security violations. 594 */ 595 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 596 eflags = frame.sf_sc.sc_eflags; 597 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 598 return(EINVAL); 599 600 /* 601 * Don't allow users to load a valid privileged %cs. Let the 602 * hardware check for invalid selectors, excess privilege in 603 * other selectors, invalid %eip's and invalid %esp's. 604 */ 605 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 606 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 607 ksiginfo_init_trap(&ksi); 608 ksi.ksi_signo = SIGBUS; 609 ksi.ksi_code = BUS_OBJERR; 610 ksi.ksi_trapno = T_PROTFLT; 611 ksi.ksi_addr = (void *)regs->tf_rip; 612 trapsignal(td, &ksi); 613 return(EINVAL); 614 } 615 616 lmask.__bits[0] = frame.sf_sc.sc_mask; 617 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 618 lmask.__bits[i+1] = frame.sf_extramask[i]; 619 linux_to_bsd_sigset(&lmask, &bmask); 620 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 621 622 /* 623 * Restore signal context. 624 */ 625 regs->tf_rdi = frame.sf_sc.sc_edi; 626 regs->tf_rsi = frame.sf_sc.sc_esi; 627 regs->tf_rbp = frame.sf_sc.sc_ebp; 628 regs->tf_rbx = frame.sf_sc.sc_ebx; 629 regs->tf_rdx = frame.sf_sc.sc_edx; 630 regs->tf_rcx = frame.sf_sc.sc_ecx; 631 regs->tf_rax = frame.sf_sc.sc_eax; 632 regs->tf_rip = frame.sf_sc.sc_eip; 633 regs->tf_cs = frame.sf_sc.sc_cs; 634 regs->tf_ds = frame.sf_sc.sc_ds; 635 regs->tf_es = frame.sf_sc.sc_es; 636 regs->tf_fs = frame.sf_sc.sc_fs; 637 regs->tf_gs = frame.sf_sc.sc_gs; 638 regs->tf_rflags = eflags; 639 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 640 regs->tf_ss = frame.sf_sc.sc_ss; 641 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 642 643 return (EJUSTRETURN); 644 } 645 646 /* 647 * System call to cleanup state after a signal 648 * has been taken. Reset signal mask and 649 * stack state from context left by rt_sendsig (above). 650 * Return to previous pc and psl as specified by 651 * context left by sendsig. Check carefully to 652 * make sure that the user has not modified the 653 * psl to gain improper privileges or to cause 654 * a machine fault. 655 */ 656 int 657 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 658 { 659 struct l_ucontext uc; 660 struct l_sigcontext *context; 661 sigset_t bmask; 662 l_stack_t *lss; 663 stack_t ss; 664 struct trapframe *regs; 665 int eflags; 666 ksiginfo_t ksi; 667 668 regs = td->td_frame; 669 670 #ifdef DEBUG 671 if (ldebug(rt_sigreturn)) 672 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 673 #endif 674 /* 675 * The trampoline code hands us the ucontext. 676 * It is unsafe to keep track of it ourselves, in the event that a 677 * program jumps out of a signal handler. 678 */ 679 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 680 return (EFAULT); 681 682 context = &uc.uc_mcontext; 683 684 /* 685 * Check for security violations. 686 */ 687 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 688 eflags = context->sc_eflags; 689 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 690 return(EINVAL); 691 692 /* 693 * Don't allow users to load a valid privileged %cs. Let the 694 * hardware check for invalid selectors, excess privilege in 695 * other selectors, invalid %eip's and invalid %esp's. 696 */ 697 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 698 if (!CS_SECURE(context->sc_cs)) { 699 ksiginfo_init_trap(&ksi); 700 ksi.ksi_signo = SIGBUS; 701 ksi.ksi_code = BUS_OBJERR; 702 ksi.ksi_trapno = T_PROTFLT; 703 ksi.ksi_addr = (void *)regs->tf_rip; 704 trapsignal(td, &ksi); 705 return(EINVAL); 706 } 707 708 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 709 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 710 711 /* 712 * Restore signal context 713 */ 714 regs->tf_gs = context->sc_gs; 715 regs->tf_fs = context->sc_fs; 716 regs->tf_es = context->sc_es; 717 regs->tf_ds = context->sc_ds; 718 regs->tf_rdi = context->sc_edi; 719 regs->tf_rsi = context->sc_esi; 720 regs->tf_rbp = context->sc_ebp; 721 regs->tf_rbx = context->sc_ebx; 722 regs->tf_rdx = context->sc_edx; 723 regs->tf_rcx = context->sc_ecx; 724 regs->tf_rax = context->sc_eax; 725 regs->tf_rip = context->sc_eip; 726 regs->tf_cs = context->sc_cs; 727 regs->tf_rflags = eflags; 728 regs->tf_rsp = context->sc_esp_at_signal; 729 regs->tf_ss = context->sc_ss; 730 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 731 732 /* 733 * call sigaltstack & ignore results.. 734 */ 735 lss = &uc.uc_stack; 736 ss.ss_sp = PTRIN(lss->ss_sp); 737 ss.ss_size = lss->ss_size; 738 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 739 740 #ifdef DEBUG 741 if (ldebug(rt_sigreturn)) 742 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 743 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 744 #endif 745 (void)kern_sigaltstack(td, &ss, NULL); 746 747 return (EJUSTRETURN); 748 } 749 750 static int 751 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 752 { 753 struct proc *p; 754 struct trapframe *frame; 755 756 p = td->td_proc; 757 frame = td->td_frame; 758 759 sa->args[0] = frame->tf_rbx; 760 sa->args[1] = frame->tf_rcx; 761 sa->args[2] = frame->tf_rdx; 762 sa->args[3] = frame->tf_rsi; 763 sa->args[4] = frame->tf_rdi; 764 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 765 sa->code = frame->tf_rax; 766 767 if (sa->code >= p->p_sysent->sv_size) 768 sa->callp = &p->p_sysent->sv_table[0]; 769 else 770 sa->callp = &p->p_sysent->sv_table[sa->code]; 771 sa->narg = sa->callp->sy_narg; 772 773 td->td_retval[0] = 0; 774 td->td_retval[1] = frame->tf_rdx; 775 776 return (0); 777 } 778 779 /* 780 * If a linux binary is exec'ing something, try this image activator 781 * first. We override standard shell script execution in order to 782 * be able to modify the interpreter path. We only do this if a linux 783 * binary is doing the exec, so we do not create an EXEC module for it. 784 */ 785 static int exec_linux_imgact_try(struct image_params *iparams); 786 787 static int 788 exec_linux_imgact_try(struct image_params *imgp) 789 { 790 const char *head = (const char *)imgp->image_header; 791 char *rpath; 792 int error = -1; 793 794 /* 795 * The interpreter for shell scripts run from a linux binary needs 796 * to be located in /compat/linux if possible in order to recursively 797 * maintain linux path emulation. 798 */ 799 if (((const short *)head)[0] == SHELLMAGIC) { 800 /* 801 * Run our normal shell image activator. If it succeeds attempt 802 * to use the alternate path for the interpreter. If an 803 * alternate * path is found, use our stringspace to store it. 804 */ 805 if ((error = exec_shell_imgact(imgp)) == 0) { 806 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 807 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 808 AT_FDCWD); 809 if (rpath != NULL) 810 imgp->args->fname_buf = 811 imgp->interpreter_name = rpath; 812 } 813 } 814 return (error); 815 } 816 817 /* 818 * Clear registers on exec 819 * XXX copied from ia32_signal.c. 820 */ 821 static void 822 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 823 { 824 struct trapframe *regs = td->td_frame; 825 struct pcb *pcb = td->td_pcb; 826 827 mtx_lock(&dt_lock); 828 if (td->td_proc->p_md.md_ldt != NULL) 829 user_ldt_free(td); 830 else 831 mtx_unlock(&dt_lock); 832 833 critical_enter(); 834 wrmsr(MSR_FSBASE, 0); 835 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 836 pcb->pcb_fsbase = 0; 837 pcb->pcb_gsbase = 0; 838 critical_exit(); 839 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 840 841 bzero((char *)regs, sizeof(struct trapframe)); 842 regs->tf_rip = imgp->entry_addr; 843 regs->tf_rsp = stack; 844 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 845 regs->tf_gs = _ugssel; 846 regs->tf_fs = _ufssel; 847 regs->tf_es = _udatasel; 848 regs->tf_ds = _udatasel; 849 regs->tf_ss = _udatasel; 850 regs->tf_flags = TF_HASSEGS; 851 regs->tf_cs = _ucode32sel; 852 regs->tf_rbx = imgp->ps_strings; 853 854 fpstate_drop(td); 855 856 /* Do full restore on return so that we can change to a different %cs */ 857 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 858 td->td_retval[1] = 0; 859 } 860 861 /* 862 * XXX copied from ia32_sysvec.c. 863 */ 864 static register_t * 865 linux_copyout_strings(struct image_params *imgp) 866 { 867 int argc, envc; 868 u_int32_t *vectp; 869 char *stringp, *destp; 870 u_int32_t *stack_base; 871 struct linux32_ps_strings *arginfo; 872 873 /* 874 * Calculate string base and vector table pointers. 875 */ 876 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 877 destp = (caddr_t)arginfo - SPARE_USRSPACE - 878 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 879 880 /* 881 * If we have a valid auxargs ptr, prepare some room 882 * on the stack. 883 */ 884 if (imgp->auxargs) { 885 /* 886 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 887 * lower compatibility. 888 */ 889 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 890 (LINUX_AT_COUNT * 2); 891 /* 892 * The '+ 2' is for the null pointers at the end of each of 893 * the arg and env vector sets,and imgp->auxarg_size is room 894 * for argument of Runtime loader. 895 */ 896 vectp = (u_int32_t *) (destp - (imgp->args->argc + 897 imgp->args->envc + 2 + imgp->auxarg_size) * 898 sizeof(u_int32_t)); 899 900 } else 901 /* 902 * The '+ 2' is for the null pointers at the end of each of 903 * the arg and env vector sets 904 */ 905 vectp = (u_int32_t *)(destp - (imgp->args->argc + 906 imgp->args->envc + 2) * sizeof(u_int32_t)); 907 908 /* 909 * vectp also becomes our initial stack base 910 */ 911 stack_base = vectp; 912 913 stringp = imgp->args->begin_argv; 914 argc = imgp->args->argc; 915 envc = imgp->args->envc; 916 /* 917 * Copy out strings - arguments and environment. 918 */ 919 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 920 921 /* 922 * Fill in "ps_strings" struct for ps, w, etc. 923 */ 924 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 925 suword32(&arginfo->ps_nargvstr, argc); 926 927 /* 928 * Fill in argument portion of vector table. 929 */ 930 for (; argc > 0; --argc) { 931 suword32(vectp++, (uint32_t)(intptr_t)destp); 932 while (*stringp++ != 0) 933 destp++; 934 destp++; 935 } 936 937 /* a null vector table pointer separates the argp's from the envp's */ 938 suword32(vectp++, 0); 939 940 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 941 suword32(&arginfo->ps_nenvstr, envc); 942 943 /* 944 * Fill in environment portion of vector table. 945 */ 946 for (; envc > 0; --envc) { 947 suword32(vectp++, (uint32_t)(intptr_t)destp); 948 while (*stringp++ != 0) 949 destp++; 950 destp++; 951 } 952 953 /* end of vector table is a null pointer */ 954 suword32(vectp, 0); 955 956 return ((register_t *)stack_base); 957 } 958 959 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 960 "32-bit Linux emulation"); 961 962 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 963 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 964 &linux32_maxdsiz, 0, ""); 965 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 966 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 967 &linux32_maxssiz, 0, ""); 968 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 969 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 970 &linux32_maxvmem, 0, ""); 971 972 #if defined(DEBUG) 973 SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, 974 CTLTYPE_STRING | CTLFLAG_RW, 975 0, 0, linux_sysctl_debug, "A", 976 "Linux debugging control"); 977 #endif 978 979 static void 980 linux32_fixlimit(struct rlimit *rl, int which) 981 { 982 983 switch (which) { 984 case RLIMIT_DATA: 985 if (linux32_maxdsiz != 0) { 986 if (rl->rlim_cur > linux32_maxdsiz) 987 rl->rlim_cur = linux32_maxdsiz; 988 if (rl->rlim_max > linux32_maxdsiz) 989 rl->rlim_max = linux32_maxdsiz; 990 } 991 break; 992 case RLIMIT_STACK: 993 if (linux32_maxssiz != 0) { 994 if (rl->rlim_cur > linux32_maxssiz) 995 rl->rlim_cur = linux32_maxssiz; 996 if (rl->rlim_max > linux32_maxssiz) 997 rl->rlim_max = linux32_maxssiz; 998 } 999 break; 1000 case RLIMIT_VMEM: 1001 if (linux32_maxvmem != 0) { 1002 if (rl->rlim_cur > linux32_maxvmem) 1003 rl->rlim_cur = linux32_maxvmem; 1004 if (rl->rlim_max > linux32_maxvmem) 1005 rl->rlim_max = linux32_maxvmem; 1006 } 1007 break; 1008 } 1009 } 1010 1011 struct sysentvec elf_linux_sysvec = { 1012 .sv_size = LINUX_SYS_MAXSYSCALL, 1013 .sv_table = linux_sysent, 1014 .sv_mask = 0, 1015 .sv_sigsize = LINUX_SIGTBLSZ, 1016 .sv_sigtbl = bsd_to_linux_signal, 1017 .sv_errsize = ELAST + 1, 1018 .sv_errtbl = bsd_to_linux_errno, 1019 .sv_transtrap = translate_traps, 1020 .sv_fixup = elf_linux_fixup, 1021 .sv_sendsig = linux_sendsig, 1022 .sv_sigcode = &_binary_linux32_locore_o_start, 1023 .sv_szsigcode = &linux_szsigcode, 1024 .sv_prepsyscall = NULL, 1025 .sv_name = "Linux ELF32", 1026 .sv_coredump = elf32_coredump, 1027 .sv_imgact_try = exec_linux_imgact_try, 1028 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1029 .sv_pagesize = PAGE_SIZE, 1030 .sv_minuser = VM_MIN_ADDRESS, 1031 .sv_maxuser = LINUX32_MAXUSER, 1032 .sv_usrstack = LINUX32_USRSTACK, 1033 .sv_psstrings = LINUX32_PS_STRINGS, 1034 .sv_stackprot = VM_PROT_ALL, 1035 .sv_copyout_strings = linux_copyout_strings, 1036 .sv_setregs = exec_linux_setregs, 1037 .sv_fixlimit = linux32_fixlimit, 1038 .sv_maxssiz = &linux32_maxssiz, 1039 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1040 .sv_set_syscall_retval = cpu_set_syscall_retval, 1041 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1042 .sv_syscallnames = NULL, 1043 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1044 .sv_shared_page_len = PAGE_SIZE, 1045 .sv_schedtail = linux_schedtail, 1046 .sv_thread_detach = linux_thread_detach, 1047 }; 1048 1049 static void 1050 linux_vdso_install(void *param) 1051 { 1052 1053 linux_szsigcode = (&_binary_linux32_locore_o_end - 1054 &_binary_linux32_locore_o_start); 1055 1056 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1057 panic("Linux invalid vdso size\n"); 1058 1059 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1060 1061 linux_shared_page_obj = __elfN(linux_shared_page_init) 1062 (&linux_shared_page_mapping); 1063 1064 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1065 1066 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1067 linux_szsigcode); 1068 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1069 1070 linux_kplatform = linux_shared_page_mapping + 1071 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1072 } 1073 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1074 (sysinit_cfunc_t)linux_vdso_install, NULL); 1075 1076 static void 1077 linux_vdso_deinstall(void *param) 1078 { 1079 1080 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1081 }; 1082 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1083 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1084 1085 static char GNU_ABI_VENDOR[] = "GNU"; 1086 static int GNULINUX_ABI_DESC = 0; 1087 1088 static boolean_t 1089 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1090 { 1091 const Elf32_Word *desc; 1092 uintptr_t p; 1093 1094 p = (uintptr_t)(note + 1); 1095 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1096 1097 desc = (const Elf32_Word *)p; 1098 if (desc[0] != GNULINUX_ABI_DESC) 1099 return (FALSE); 1100 1101 /* 1102 * For linux we encode osrel as follows (see linux_mib.c): 1103 * VVVMMMIII (version, major, minor), see linux_mib.c. 1104 */ 1105 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1106 1107 return (TRUE); 1108 } 1109 1110 static Elf_Brandnote linux32_brandnote = { 1111 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1112 .hdr.n_descsz = 16, /* XXX at least 16 */ 1113 .hdr.n_type = 1, 1114 .vendor = GNU_ABI_VENDOR, 1115 .flags = BN_TRANSLATE_OSREL, 1116 .trans_osrel = linux32_trans_osrel 1117 }; 1118 1119 static Elf32_Brandinfo linux_brand = { 1120 .brand = ELFOSABI_LINUX, 1121 .machine = EM_386, 1122 .compat_3_brand = "Linux", 1123 .emul_path = "/compat/linux", 1124 .interp_path = "/lib/ld-linux.so.1", 1125 .sysvec = &elf_linux_sysvec, 1126 .interp_newpath = NULL, 1127 .brand_note = &linux32_brandnote, 1128 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1129 }; 1130 1131 static Elf32_Brandinfo linux_glibc2brand = { 1132 .brand = ELFOSABI_LINUX, 1133 .machine = EM_386, 1134 .compat_3_brand = "Linux", 1135 .emul_path = "/compat/linux", 1136 .interp_path = "/lib/ld-linux.so.2", 1137 .sysvec = &elf_linux_sysvec, 1138 .interp_newpath = NULL, 1139 .brand_note = &linux32_brandnote, 1140 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1141 }; 1142 1143 Elf32_Brandinfo *linux_brandlist[] = { 1144 &linux_brand, 1145 &linux_glibc2brand, 1146 NULL 1147 }; 1148 1149 static int 1150 linux_elf_modevent(module_t mod, int type, void *data) 1151 { 1152 Elf32_Brandinfo **brandinfo; 1153 int error; 1154 struct linux_ioctl_handler **lihp; 1155 1156 error = 0; 1157 1158 switch(type) { 1159 case MOD_LOAD: 1160 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1161 ++brandinfo) 1162 if (elf32_insert_brand_entry(*brandinfo) < 0) 1163 error = EINVAL; 1164 if (error == 0) { 1165 SET_FOREACH(lihp, linux_ioctl_handler_set) 1166 linux_ioctl_register_handler(*lihp); 1167 LIST_INIT(&futex_list); 1168 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1169 stclohz = (stathz ? stathz : hz); 1170 if (bootverbose) 1171 printf("Linux ELF exec handler installed\n"); 1172 } else 1173 printf("cannot insert Linux ELF brand handler\n"); 1174 break; 1175 case MOD_UNLOAD: 1176 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1177 ++brandinfo) 1178 if (elf32_brand_inuse(*brandinfo)) 1179 error = EBUSY; 1180 if (error == 0) { 1181 for (brandinfo = &linux_brandlist[0]; 1182 *brandinfo != NULL; ++brandinfo) 1183 if (elf32_remove_brand_entry(*brandinfo) < 0) 1184 error = EINVAL; 1185 } 1186 if (error == 0) { 1187 SET_FOREACH(lihp, linux_ioctl_handler_set) 1188 linux_ioctl_unregister_handler(*lihp); 1189 mtx_destroy(&futex_mtx); 1190 if (bootverbose) 1191 printf("Linux ELF exec handler removed\n"); 1192 } else 1193 printf("Could not deinstall ELF interpreter entry\n"); 1194 break; 1195 default: 1196 return (EOPNOTSUPP); 1197 } 1198 return (error); 1199 } 1200 1201 static moduledata_t linux_elf_mod = { 1202 "linuxelf", 1203 linux_elf_modevent, 1204 0 1205 }; 1206 1207 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1208 MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); 1209