xref: /freebsd-14.2/sys/i386/linux/linux_sysvec.c (revision 052d3c12)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/vnode.h>
51 #include <sys/eventhandler.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_param.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cputypes.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
65 
66 #include <i386/linux/linux.h>
67 #include <i386/linux/linux_proto.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_futex.h>
70 #include <compat/linux/linux_ioctl.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_vdso.h>
76 
77 MODULE_VERSION(linux, 1);
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 #if defined(DEBUG)
86 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
87     linux_sysctl_debug, "A", "Linux debugging control");
88 #endif
89 
90 /*
91  * Allow the sendsig functions to use the ldebug() facility even though they
92  * are not syscalls themselves.  Map them to syscall 0.  This is slightly less
93  * bogus than using ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
99 
100 static int linux_szsigcode;
101 static vm_object_t linux_shared_page_obj;
102 static char *linux_shared_page_mapping;
103 extern char _binary_linux_locore_o_start;
104 extern char _binary_linux_locore_o_end;
105 
106 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
107 
108 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
109 
110 static int	linux_fixup(register_t **stack_base,
111 		    struct image_params *iparams);
112 static int	elf_linux_fixup(register_t **stack_base,
113 		    struct image_params *iparams);
114 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
115 static void	exec_linux_setregs(struct thread *td,
116 		    struct image_params *imgp, u_long stack);
117 static register_t *linux_copyout_strings(struct image_params *imgp);
118 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
119 static void	linux_vdso_install(void *param);
120 static void	linux_vdso_deinstall(void *param);
121 
122 static int linux_szplatform;
123 const char *linux_kplatform;
124 
125 static eventhandler_tag linux_exit_tag;
126 static eventhandler_tag linux_exec_tag;
127 static eventhandler_tag linux_thread_dtor_tag;
128 
129 /*
130  * Linux syscalls return negative errno's, we do positive and map them
131  * Reference:
132  *   FreeBSD: src/sys/sys/errno.h
133  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
134  *            linux-2.6.17.8/include/asm-generic/errno.h
135  */
136 static int bsd_to_linux_errno[ELAST + 1] = {
137 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
138 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
139 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
140 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
141 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
142 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
143 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
144 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
145 	  -6,  -6, -43, -42, -75,-125, -84, -61, -16, -74,
146 	 -72, -67, -71
147 };
148 
149 #define LINUX_T_UNKNOWN  255
150 static int _bsd_to_linux_trapcode[] = {
151 	LINUX_T_UNKNOWN,	/* 0 */
152 	6,			/* 1  T_PRIVINFLT */
153 	LINUX_T_UNKNOWN,	/* 2 */
154 	3,			/* 3  T_BPTFLT */
155 	LINUX_T_UNKNOWN,	/* 4 */
156 	LINUX_T_UNKNOWN,	/* 5 */
157 	16,			/* 6  T_ARITHTRAP */
158 	254,			/* 7  T_ASTFLT */
159 	LINUX_T_UNKNOWN,	/* 8 */
160 	13,			/* 9  T_PROTFLT */
161 	1,			/* 10 T_TRCTRAP */
162 	LINUX_T_UNKNOWN,	/* 11 */
163 	14,			/* 12 T_PAGEFLT */
164 	LINUX_T_UNKNOWN,	/* 13 */
165 	17,			/* 14 T_ALIGNFLT */
166 	LINUX_T_UNKNOWN,	/* 15 */
167 	LINUX_T_UNKNOWN,	/* 16 */
168 	LINUX_T_UNKNOWN,	/* 17 */
169 	0,			/* 18 T_DIVIDE */
170 	2,			/* 19 T_NMI */
171 	4,			/* 20 T_OFLOW */
172 	5,			/* 21 T_BOUND */
173 	7,			/* 22 T_DNA */
174 	8,			/* 23 T_DOUBLEFLT */
175 	9,			/* 24 T_FPOPFLT */
176 	10,			/* 25 T_TSSFLT */
177 	11,			/* 26 T_SEGNPFLT */
178 	12,			/* 27 T_STKFLT */
179 	18,			/* 28 T_MCHK */
180 	19,			/* 29 T_XMMFLT */
181 	15			/* 30 T_RESERVED */
182 };
183 #define bsd_to_linux_trapcode(code) \
184     ((code)<nitems(_bsd_to_linux_trapcode)? \
185      _bsd_to_linux_trapcode[(code)]: \
186      LINUX_T_UNKNOWN)
187 
188 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
189 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
190 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
191 
192 /*
193  * If FreeBSD & Linux have a difference of opinion about what a trap
194  * means, deal with it here.
195  *
196  * MPSAFE
197  */
198 static int
199 translate_traps(int signal, int trap_code)
200 {
201 	if (signal != SIGBUS)
202 		return (signal);
203 	switch (trap_code) {
204 	case T_PROTFLT:
205 	case T_TSSFLT:
206 	case T_DOUBLEFLT:
207 	case T_PAGEFLT:
208 		return (SIGSEGV);
209 	default:
210 		return (signal);
211 	}
212 }
213 
214 static int
215 linux_fixup(register_t **stack_base, struct image_params *imgp)
216 {
217 	register_t *argv, *envp;
218 
219 	argv = *stack_base;
220 	envp = *stack_base + (imgp->args->argc + 1);
221 	(*stack_base)--;
222 	suword(*stack_base, (intptr_t)(void *)envp);
223 	(*stack_base)--;
224 	suword(*stack_base, (intptr_t)(void *)argv);
225 	(*stack_base)--;
226 	suword(*stack_base, imgp->args->argc);
227 	return (0);
228 }
229 
230 static int
231 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
232 {
233 	struct proc *p;
234 	Elf32_Auxargs *args;
235 	Elf32_Addr *uplatform;
236 	struct ps_strings *arginfo;
237 	register_t *pos;
238 	int issetugid;
239 
240 	KASSERT(curthread->td_proc == imgp->proc,
241 	    ("unsafe elf_linux_fixup(), should be curproc"));
242 
243 	p = imgp->proc;
244 	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
245 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
246 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
247 	args = (Elf32_Auxargs *)imgp->auxargs;
248 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
249 
250 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
251 	    imgp->proc->p_sysent->sv_shared_page_base);
252 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
253 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
254 
255 	/*
256 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
257 	 * as it has appeared in the 2.4.0-rc7 first time.
258 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
259 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
260 	 * is not present.
261 	 * Also see linux_times() implementation.
262 	 */
263 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
264 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
265 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
266 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
267 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
268 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
269 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
270 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
271 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
272 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
273 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
274 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
275 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
276 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
277 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
278 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
279 	if (imgp->execpathp != 0)
280 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
281 	if (args->execfd != -1)
282 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
283 	AUXARGS_ENTRY(pos, AT_NULL, 0);
284 
285 	free(imgp->auxargs, M_TEMP);
286 	imgp->auxargs = NULL;
287 
288 	(*stack_base)--;
289 	suword(*stack_base, (register_t)imgp->args->argc);
290 	return (0);
291 }
292 
293 /*
294  * Copied from kern/kern_exec.c
295  */
296 static register_t *
297 linux_copyout_strings(struct image_params *imgp)
298 {
299 	int argc, envc;
300 	char **vectp;
301 	char *stringp, *destp;
302 	register_t *stack_base;
303 	struct ps_strings *arginfo;
304 	char canary[LINUX_AT_RANDOM_LEN];
305 	size_t execpath_len;
306 	struct proc *p;
307 
308 	/* Calculate string base and vector table pointers. */
309 	p = imgp->proc;
310 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
311 		execpath_len = strlen(imgp->execpath) + 1;
312 	else
313 		execpath_len = 0;
314 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
315 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
316 	    roundup(sizeof(canary), sizeof(char *)) -
317 	    roundup(execpath_len, sizeof(char *)) -
318 	    roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *));
319 
320 	/* Install LINUX_PLATFORM. */
321 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
322 	    linux_szplatform);
323 
324 	if (execpath_len != 0) {
325 		imgp->execpathp = (uintptr_t)arginfo -
326 		linux_szplatform - execpath_len;
327 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
328 	}
329 
330 	/* Prepare the canary for SSP. */
331 	arc4rand(canary, sizeof(canary), 0);
332 	imgp->canary = (uintptr_t)arginfo - linux_szplatform -
333 	    roundup(execpath_len, sizeof(char *)) -
334 	    roundup(sizeof(canary), sizeof(char *));
335 	copyout(canary, (void *)imgp->canary, sizeof(canary));
336 
337 	/* If we have a valid auxargs ptr, prepare some room on the stack. */
338 	if (imgp->auxargs) {
339 		/*
340 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
341 		 * lower compatibility.
342 		 */
343 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
344 		    (LINUX_AT_COUNT * 2);
345 		/*
346 		 * The '+ 2' is for the null pointers at the end of each of
347 		 * the arg and env vector sets,and imgp->auxarg_size is room
348 		 * for argument of Runtime loader.
349 		 */
350 		vectp = (char **)(destp - (imgp->args->argc +
351 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
352 	} else {
353 		/*
354 		 * The '+ 2' is for the null pointers at the end of each of
355 		 * the arg and env vector sets
356 		 */
357 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
358 		    sizeof(char *));
359 	}
360 
361 	/* vectp also becomes our initial stack base. */
362 	stack_base = (register_t *)vectp;
363 
364 	stringp = imgp->args->begin_argv;
365 	argc = imgp->args->argc;
366 	envc = imgp->args->envc;
367 
368 	/* Copy out strings - arguments and environment. */
369 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
370 
371 	/* Fill in "ps_strings" struct for ps, w, etc. */
372 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
373 	suword(&arginfo->ps_nargvstr, argc);
374 
375 	/* Fill in argument portion of vector table. */
376 	for (; argc > 0; --argc) {
377 		suword(vectp++, (long)(intptr_t)destp);
378 		while (*stringp++ != 0)
379 			destp++;
380 		destp++;
381 	}
382 
383 	/* A null vector table pointer separates the argp's from the envp's. */
384 	suword(vectp++, 0);
385 
386 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
387 	suword(&arginfo->ps_nenvstr, envc);
388 
389 	/* Fill in environment portion of vector table. */
390 	for (; envc > 0; --envc) {
391 		suword(vectp++, (long)(intptr_t)destp);
392 		while (*stringp++ != 0)
393 			destp++;
394 		destp++;
395 	}
396 
397 	/* The end of the vector table is a null pointer. */
398 	suword(vectp, 0);
399 
400 	return (stack_base);
401 }
402 
403 static void
404 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
405 {
406 	struct thread *td = curthread;
407 	struct proc *p = td->td_proc;
408 	struct sigacts *psp;
409 	struct trapframe *regs;
410 	struct l_rt_sigframe *fp, frame;
411 	int sig, code;
412 	int oonstack;
413 
414 	sig = ksi->ksi_signo;
415 	code = ksi->ksi_code;
416 	PROC_LOCK_ASSERT(p, MA_OWNED);
417 	psp = p->p_sigacts;
418 	mtx_assert(&psp->ps_mtx, MA_OWNED);
419 	regs = td->td_frame;
420 	oonstack = sigonstack(regs->tf_esp);
421 
422 #ifdef DEBUG
423 	if (ldebug(rt_sendsig))
424 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
425 		    catcher, sig, (void*)mask, code);
426 #endif
427 	/* Allocate space for the signal handler context. */
428 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
429 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
430 		fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
431 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
432 	} else
433 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
434 	mtx_unlock(&psp->ps_mtx);
435 
436 	/* Build the argument list for the signal handler. */
437 	sig = bsd_to_linux_signal(sig);
438 
439 	bzero(&frame, sizeof(frame));
440 
441 	frame.sf_handler = catcher;
442 	frame.sf_sig = sig;
443 	frame.sf_siginfo = &fp->sf_si;
444 	frame.sf_ucontext = &fp->sf_sc;
445 
446 	/* Fill in POSIX parts. */
447 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
448 
449 	/* Build the signal context to be used by sigreturn. */
450 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
451 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
452 
453 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
454 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
455 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
456 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
457 	PROC_UNLOCK(p);
458 
459 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
460 
461 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__mask;
462 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
463 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
464 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
465 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
466 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
467 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
468 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
469 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
470 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
471 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
472 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
473 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
474 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
475 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
476 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
477 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
478 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
479 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
480 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
481 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
482 
483 #ifdef DEBUG
484 	if (ldebug(rt_sendsig))
485 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
486 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
487 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
488 #endif
489 
490 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
491 		/*
492 		 * Process has trashed its stack; give it an illegal
493 		 * instruction to halt it in its tracks.
494 		 */
495 #ifdef DEBUG
496 		if (ldebug(rt_sendsig))
497 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
498 			    fp, oonstack);
499 #endif
500 		PROC_LOCK(p);
501 		sigexit(td, SIGILL);
502 	}
503 
504 	/* Build context to run handler in. */
505 	regs->tf_esp = (int)fp;
506 	regs->tf_eip = linux_rt_sigcode;
507 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
508 	regs->tf_cs = _ucodesel;
509 	regs->tf_ds = _udatasel;
510 	regs->tf_es = _udatasel;
511 	regs->tf_fs = _udatasel;
512 	regs->tf_ss = _udatasel;
513 	PROC_LOCK(p);
514 	mtx_lock(&psp->ps_mtx);
515 }
516 
517 
518 /*
519  * Send an interrupt to process.
520  *
521  * Stack is set up to allow sigcode stored
522  * in u. to call routine, followed by kcall
523  * to sigreturn routine below.  After sigreturn
524  * resets the signal mask, the stack, and the
525  * frame pointer, it returns to the user
526  * specified pc, psl.
527  */
528 static void
529 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
530 {
531 	struct thread *td = curthread;
532 	struct proc *p = td->td_proc;
533 	struct sigacts *psp;
534 	struct trapframe *regs;
535 	struct l_sigframe *fp, frame;
536 	l_sigset_t lmask;
537 	int sig, code;
538 	int oonstack;
539 
540 	PROC_LOCK_ASSERT(p, MA_OWNED);
541 	psp = p->p_sigacts;
542 	sig = ksi->ksi_signo;
543 	code = ksi->ksi_code;
544 	mtx_assert(&psp->ps_mtx, MA_OWNED);
545 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
546 		/* Signal handler installed with SA_SIGINFO. */
547 		linux_rt_sendsig(catcher, ksi, mask);
548 		return;
549 	}
550 	regs = td->td_frame;
551 	oonstack = sigonstack(regs->tf_esp);
552 
553 #ifdef DEBUG
554 	if (ldebug(sendsig))
555 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
556 		    catcher, sig, (void*)mask, code);
557 #endif
558 
559 	/* Allocate space for the signal handler context. */
560 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
561 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
562 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
563 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
564 	} else
565 		fp = (struct l_sigframe *)regs->tf_esp - 1;
566 	mtx_unlock(&psp->ps_mtx);
567 	PROC_UNLOCK(p);
568 
569 	/* Build the argument list for the signal handler. */
570 	sig = bsd_to_linux_signal(sig);
571 
572 	bzero(&frame, sizeof(frame));
573 
574 	frame.sf_handler = catcher;
575 	frame.sf_sig = sig;
576 
577 	bsd_to_linux_sigset(mask, &lmask);
578 
579 	/* Build the signal context to be used by sigreturn. */
580 	frame.sf_sc.sc_mask   = lmask.__mask;
581 	frame.sf_sc.sc_gs     = rgs();
582 	frame.sf_sc.sc_fs     = regs->tf_fs;
583 	frame.sf_sc.sc_es     = regs->tf_es;
584 	frame.sf_sc.sc_ds     = regs->tf_ds;
585 	frame.sf_sc.sc_edi    = regs->tf_edi;
586 	frame.sf_sc.sc_esi    = regs->tf_esi;
587 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
588 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
589 	frame.sf_sc.sc_esp    = regs->tf_esp;
590 	frame.sf_sc.sc_edx    = regs->tf_edx;
591 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
592 	frame.sf_sc.sc_eax    = regs->tf_eax;
593 	frame.sf_sc.sc_eip    = regs->tf_eip;
594 	frame.sf_sc.sc_cs     = regs->tf_cs;
595 	frame.sf_sc.sc_eflags = regs->tf_eflags;
596 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
597 	frame.sf_sc.sc_ss     = regs->tf_ss;
598 	frame.sf_sc.sc_err    = regs->tf_err;
599 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
600 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
601 
602 	frame.sf_extramask[0] = lmask.__mask;
603 
604 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
605 		/*
606 		 * Process has trashed its stack; give it an illegal
607 		 * instruction to halt it in its tracks.
608 		 */
609 		PROC_LOCK(p);
610 		sigexit(td, SIGILL);
611 	}
612 
613 	/* Build context to run handler in. */
614 	regs->tf_esp = (int)fp;
615 	regs->tf_eip = linux_sigcode;
616 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
617 	regs->tf_cs = _ucodesel;
618 	regs->tf_ds = _udatasel;
619 	regs->tf_es = _udatasel;
620 	regs->tf_fs = _udatasel;
621 	regs->tf_ss = _udatasel;
622 	PROC_LOCK(p);
623 	mtx_lock(&psp->ps_mtx);
624 }
625 
626 /*
627  * System call to cleanup state after a signal
628  * has been taken.  Reset signal mask and
629  * stack state from context left by sendsig (above).
630  * Return to previous pc and psl as specified by
631  * context left by sendsig. Check carefully to
632  * make sure that the user has not modified the
633  * psl to gain improper privileges or to cause
634  * a machine fault.
635  */
636 int
637 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
638 {
639 	struct l_sigframe frame;
640 	struct trapframe *regs;
641 	l_sigset_t lmask;
642 	sigset_t bmask;
643 	int eflags;
644 	ksiginfo_t ksi;
645 
646 	regs = td->td_frame;
647 
648 #ifdef DEBUG
649 	if (ldebug(sigreturn))
650 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
651 #endif
652 	/*
653 	 * The trampoline code hands us the sigframe.
654 	 * It is unsafe to keep track of it ourselves, in the event that a
655 	 * program jumps out of a signal handler.
656 	 */
657 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
658 		return (EFAULT);
659 
660 	/* Check for security violations. */
661 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
662 	eflags = frame.sf_sc.sc_eflags;
663 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
664 		return (EINVAL);
665 
666 	/*
667 	 * Don't allow users to load a valid privileged %cs.  Let the
668 	 * hardware check for invalid selectors, excess privilege in
669 	 * other selectors, invalid %eip's and invalid %esp's.
670 	 */
671 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
672 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
673 		ksiginfo_init_trap(&ksi);
674 		ksi.ksi_signo = SIGBUS;
675 		ksi.ksi_code = BUS_OBJERR;
676 		ksi.ksi_trapno = T_PROTFLT;
677 		ksi.ksi_addr = (void *)regs->tf_eip;
678 		trapsignal(td, &ksi);
679 		return (EINVAL);
680 	}
681 
682 	lmask.__mask = frame.sf_sc.sc_mask;
683 	linux_to_bsd_sigset(&lmask, &bmask);
684 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
685 
686 	/* Restore signal context. */
687 	/* %gs was restored by the trampoline. */
688 	regs->tf_fs     = frame.sf_sc.sc_fs;
689 	regs->tf_es     = frame.sf_sc.sc_es;
690 	regs->tf_ds     = frame.sf_sc.sc_ds;
691 	regs->tf_edi    = frame.sf_sc.sc_edi;
692 	regs->tf_esi    = frame.sf_sc.sc_esi;
693 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
694 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
695 	regs->tf_edx    = frame.sf_sc.sc_edx;
696 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
697 	regs->tf_eax    = frame.sf_sc.sc_eax;
698 	regs->tf_eip    = frame.sf_sc.sc_eip;
699 	regs->tf_cs     = frame.sf_sc.sc_cs;
700 	regs->tf_eflags = eflags;
701 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
702 	regs->tf_ss     = frame.sf_sc.sc_ss;
703 
704 	return (EJUSTRETURN);
705 }
706 
707 /*
708  * System call to cleanup state after a signal
709  * has been taken.  Reset signal mask and
710  * stack state from context left by rt_sendsig (above).
711  * Return to previous pc and psl as specified by
712  * context left by sendsig. Check carefully to
713  * make sure that the user has not modified the
714  * psl to gain improper privileges or to cause
715  * a machine fault.
716  */
717 int
718 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
719 {
720 	struct l_ucontext uc;
721 	struct l_sigcontext *context;
722 	sigset_t bmask;
723 	l_stack_t *lss;
724 	stack_t ss;
725 	struct trapframe *regs;
726 	int eflags;
727 	ksiginfo_t ksi;
728 
729 	regs = td->td_frame;
730 
731 #ifdef DEBUG
732 	if (ldebug(rt_sigreturn))
733 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
734 #endif
735 	/*
736 	 * The trampoline code hands us the ucontext.
737 	 * It is unsafe to keep track of it ourselves, in the event that a
738 	 * program jumps out of a signal handler.
739 	 */
740 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
741 		return (EFAULT);
742 
743 	context = &uc.uc_mcontext;
744 
745 	/* Check for security violations. */
746 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
747 	eflags = context->sc_eflags;
748 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
749 		return (EINVAL);
750 
751 	/*
752 	 * Don't allow users to load a valid privileged %cs.  Let the
753 	 * hardware check for invalid selectors, excess privilege in
754 	 * other selectors, invalid %eip's and invalid %esp's.
755 	 */
756 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
757 	if (!CS_SECURE(context->sc_cs)) {
758 		ksiginfo_init_trap(&ksi);
759 		ksi.ksi_signo = SIGBUS;
760 		ksi.ksi_code = BUS_OBJERR;
761 		ksi.ksi_trapno = T_PROTFLT;
762 		ksi.ksi_addr = (void *)regs->tf_eip;
763 		trapsignal(td, &ksi);
764 		return (EINVAL);
765 	}
766 
767 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
768 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
769 
770 	/* Restore signal context. */
771 	/* %gs was restored by the trampoline. */
772 	regs->tf_fs     = context->sc_fs;
773 	regs->tf_es     = context->sc_es;
774 	regs->tf_ds     = context->sc_ds;
775 	regs->tf_edi    = context->sc_edi;
776 	regs->tf_esi    = context->sc_esi;
777 	regs->tf_ebp    = context->sc_ebp;
778 	regs->tf_ebx    = context->sc_ebx;
779 	regs->tf_edx    = context->sc_edx;
780 	regs->tf_ecx    = context->sc_ecx;
781 	regs->tf_eax    = context->sc_eax;
782 	regs->tf_eip    = context->sc_eip;
783 	regs->tf_cs     = context->sc_cs;
784 	regs->tf_eflags = eflags;
785 	regs->tf_esp    = context->sc_esp_at_signal;
786 	regs->tf_ss     = context->sc_ss;
787 
788 	/* Call sigaltstack & ignore results. */
789 	lss = &uc.uc_stack;
790 	ss.ss_sp = lss->ss_sp;
791 	ss.ss_size = lss->ss_size;
792 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
793 
794 #ifdef DEBUG
795 	if (ldebug(rt_sigreturn))
796 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
797 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
798 #endif
799 	(void)kern_sigaltstack(td, &ss, NULL);
800 
801 	return (EJUSTRETURN);
802 }
803 
804 static int
805 linux_fetch_syscall_args(struct thread *td)
806 {
807 	struct proc *p;
808 	struct trapframe *frame;
809 	struct syscall_args *sa;
810 
811 	p = td->td_proc;
812 	frame = td->td_frame;
813 	sa = &td->td_sa;
814 
815 	sa->code = frame->tf_eax;
816 	sa->args[0] = frame->tf_ebx;
817 	sa->args[1] = frame->tf_ecx;
818 	sa->args[2] = frame->tf_edx;
819 	sa->args[3] = frame->tf_esi;
820 	sa->args[4] = frame->tf_edi;
821 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
822 
823 	if (sa->code >= p->p_sysent->sv_size)
824 		/* nosys */
825 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
826 	else
827 		sa->callp = &p->p_sysent->sv_table[sa->code];
828 	sa->narg = sa->callp->sy_narg;
829 
830 	td->td_retval[0] = 0;
831 	td->td_retval[1] = frame->tf_edx;
832 
833 	return (0);
834 }
835 
836 /*
837  * If a Linux binary is exec'ing something, try this image activator
838  * first.  We override standard shell script execution in order to
839  * be able to modify the interpreter path.  We only do this if a Linux
840  * binary is doing the exec, so we do not create an EXEC module for it.
841  */
842 static int	exec_linux_imgact_try(struct image_params *iparams);
843 
844 static int
845 exec_linux_imgact_try(struct image_params *imgp)
846 {
847     const char *head = (const char *)imgp->image_header;
848     char *rpath;
849     int error = -1;
850 
851     /*
852      * The interpreter for shell scripts run from a Linux binary needs
853      * to be located in /compat/linux if possible in order to recursively
854      * maintain Linux path emulation.
855      */
856     if (((const short *)head)[0] == SHELLMAGIC) {
857 	    /*
858 	     * Run our normal shell image activator.  If it succeeds attempt
859 	     * to use the alternate path for the interpreter.  If an alternate
860 	     * path is found, use our stringspace to store it.
861 	     */
862 	    if ((error = exec_shell_imgact(imgp)) == 0) {
863 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
864 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
865 		    if (rpath != NULL)
866 			    imgp->args->fname_buf =
867 				imgp->interpreter_name = rpath;
868 	    }
869     }
870     return (error);
871 }
872 
873 /*
874  * exec_setregs may initialize some registers differently than Linux
875  * does, thus potentially confusing Linux binaries. If necessary, we
876  * override the exec_setregs default(s) here.
877  */
878 static void
879 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
880 {
881 	struct pcb *pcb = td->td_pcb;
882 
883 	exec_setregs(td, imgp, stack);
884 
885 	/* Linux sets %gs to 0, we default to _udatasel. */
886 	pcb->pcb_gs = 0;
887 	load_gs(0);
888 
889 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
890 }
891 
892 static void
893 linux_get_machine(const char **dst)
894 {
895 
896 	switch (cpu_class) {
897 	case CPUCLASS_686:
898 		*dst = "i686";
899 		break;
900 	case CPUCLASS_586:
901 		*dst = "i586";
902 		break;
903 	case CPUCLASS_486:
904 		*dst = "i486";
905 		break;
906 	default:
907 		*dst = "i386";
908 	}
909 }
910 
911 struct sysentvec linux_sysvec = {
912 	.sv_size	= LINUX_SYS_MAXSYSCALL,
913 	.sv_table	= linux_sysent,
914 	.sv_mask	= 0,
915 	.sv_errsize	= ELAST + 1,
916 	.sv_errtbl	= bsd_to_linux_errno,
917 	.sv_transtrap	= translate_traps,
918 	.sv_fixup	= linux_fixup,
919 	.sv_sendsig	= linux_sendsig,
920 	.sv_sigcode	= &_binary_linux_locore_o_start,
921 	.sv_szsigcode	= &linux_szsigcode,
922 	.sv_name	= "Linux a.out",
923 	.sv_coredump	= NULL,
924 	.sv_imgact_try	= exec_linux_imgact_try,
925 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
926 	.sv_pagesize	= PAGE_SIZE,
927 	.sv_minuser	= VM_MIN_ADDRESS,
928 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
929 	.sv_usrstack	= LINUX_USRSTACK,
930 	.sv_psstrings	= PS_STRINGS,
931 	.sv_stackprot	= VM_PROT_ALL,
932 	.sv_copyout_strings = exec_copyout_strings,
933 	.sv_setregs	= exec_linux_setregs,
934 	.sv_fixlimit	= NULL,
935 	.sv_maxssiz	= NULL,
936 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
937 	.sv_set_syscall_retval = cpu_set_syscall_retval,
938 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
939 	.sv_syscallnames = NULL,
940 	.sv_shared_page_base = LINUX_SHAREDPAGE,
941 	.sv_shared_page_len = PAGE_SIZE,
942 	.sv_schedtail	= linux_schedtail,
943 	.sv_thread_detach = linux_thread_detach,
944 	.sv_trap	= NULL,
945 };
946 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
947 
948 struct sysentvec elf_linux_sysvec = {
949 	.sv_size	= LINUX_SYS_MAXSYSCALL,
950 	.sv_table	= linux_sysent,
951 	.sv_mask	= 0,
952 	.sv_errsize	= ELAST + 1,
953 	.sv_errtbl	= bsd_to_linux_errno,
954 	.sv_transtrap	= translate_traps,
955 	.sv_fixup	= elf_linux_fixup,
956 	.sv_sendsig	= linux_sendsig,
957 	.sv_sigcode	= &_binary_linux_locore_o_start,
958 	.sv_szsigcode	= &linux_szsigcode,
959 	.sv_name	= "Linux ELF",
960 	.sv_coredump	= elf32_coredump,
961 	.sv_imgact_try	= exec_linux_imgact_try,
962 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
963 	.sv_pagesize	= PAGE_SIZE,
964 	.sv_minuser	= VM_MIN_ADDRESS,
965 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
966 	.sv_usrstack	= LINUX_USRSTACK,
967 	.sv_psstrings	= LINUX_PS_STRINGS,
968 	.sv_stackprot	= VM_PROT_ALL,
969 	.sv_copyout_strings = linux_copyout_strings,
970 	.sv_setregs	= exec_linux_setregs,
971 	.sv_fixlimit	= NULL,
972 	.sv_maxssiz	= NULL,
973 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
974 	.sv_set_syscall_retval = cpu_set_syscall_retval,
975 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
976 	.sv_syscallnames = NULL,
977 	.sv_shared_page_base = LINUX_SHAREDPAGE,
978 	.sv_shared_page_len = PAGE_SIZE,
979 	.sv_schedtail	= linux_schedtail,
980 	.sv_thread_detach = linux_thread_detach,
981 	.sv_trap	= NULL,
982 };
983 
984 static void
985 linux_vdso_install(void *param)
986 {
987 
988 	linux_szsigcode = (&_binary_linux_locore_o_end -
989 	    &_binary_linux_locore_o_start);
990 
991 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
992 		panic("Linux invalid vdso size\n");
993 
994 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
995 
996 	linux_shared_page_obj = __elfN(linux_shared_page_init)
997 	    (&linux_shared_page_mapping);
998 
999 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
1000 
1001 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1002 	    linux_szsigcode);
1003 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1004 }
1005 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1006     (sysinit_cfunc_t)linux_vdso_install, NULL);
1007 
1008 static void
1009 linux_vdso_deinstall(void *param)
1010 {
1011 
1012 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1013 };
1014 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1015     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1016 
1017 static char GNU_ABI_VENDOR[] = "GNU";
1018 static int GNULINUX_ABI_DESC = 0;
1019 
1020 static bool
1021 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1022 {
1023 	const Elf32_Word *desc;
1024 	uintptr_t p;
1025 
1026 	p = (uintptr_t)(note + 1);
1027 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1028 
1029 	desc = (const Elf32_Word *)p;
1030 	if (desc[0] != GNULINUX_ABI_DESC)
1031 		return (false);
1032 
1033 	/*
1034 	 * For Linux we encode osrel as follows (see linux_mib.c):
1035 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1036 	 */
1037 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1038 
1039 	return (true);
1040 }
1041 
1042 static Elf_Brandnote linux_brandnote = {
1043 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1044 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1045 	.hdr.n_type	= 1,
1046 	.vendor		= GNU_ABI_VENDOR,
1047 	.flags		= BN_TRANSLATE_OSREL,
1048 	.trans_osrel	= linux_trans_osrel
1049 };
1050 
1051 static Elf32_Brandinfo linux_brand = {
1052 	.brand		= ELFOSABI_LINUX,
1053 	.machine	= EM_386,
1054 	.compat_3_brand	= "Linux",
1055 	.emul_path	= "/compat/linux",
1056 	.interp_path	= "/lib/ld-linux.so.1",
1057 	.sysvec		= &elf_linux_sysvec,
1058 	.interp_newpath	= NULL,
1059 	.brand_note	= &linux_brandnote,
1060 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1061 };
1062 
1063 static Elf32_Brandinfo linux_glibc2brand = {
1064 	.brand		= ELFOSABI_LINUX,
1065 	.machine	= EM_386,
1066 	.compat_3_brand	= "Linux",
1067 	.emul_path	= "/compat/linux",
1068 	.interp_path	= "/lib/ld-linux.so.2",
1069 	.sysvec		= &elf_linux_sysvec,
1070 	.interp_newpath	= NULL,
1071 	.brand_note	= &linux_brandnote,
1072 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1073 };
1074 
1075 static Elf32_Brandinfo linux_muslbrand = {
1076 	.brand		= ELFOSABI_LINUX,
1077 	.machine	= EM_386,
1078 	.compat_3_brand	= "Linux",
1079 	.emul_path	= "/compat/linux",
1080 	.interp_path	= "/lib/ld-musl-i386.so.1",
1081 	.sysvec		= &elf_linux_sysvec,
1082 	.interp_newpath	= NULL,
1083 	.brand_note	= &linux_brandnote,
1084 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1085 };
1086 
1087 Elf32_Brandinfo *linux_brandlist[] = {
1088 	&linux_brand,
1089 	&linux_glibc2brand,
1090 	&linux_muslbrand,
1091 	NULL
1092 };
1093 
1094 static int
1095 linux_elf_modevent(module_t mod, int type, void *data)
1096 {
1097 	Elf32_Brandinfo **brandinfo;
1098 	int error;
1099 	struct linux_ioctl_handler **lihp;
1100 
1101 	error = 0;
1102 
1103 	switch(type) {
1104 	case MOD_LOAD:
1105 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1106 		     ++brandinfo)
1107 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1108 				error = EINVAL;
1109 		if (error == 0) {
1110 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1111 				linux_ioctl_register_handler(*lihp);
1112 			LIST_INIT(&futex_list);
1113 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1114 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1115 			      NULL, 1000);
1116 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1117 			      NULL, 1000);
1118 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1119 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1120 			linux_get_machine(&linux_kplatform);
1121 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1122 			    sizeof(char *));
1123 			linux_osd_jail_register();
1124 			stclohz = (stathz ? stathz : hz);
1125 			if (bootverbose)
1126 				printf("Linux ELF exec handler installed\n");
1127 		} else
1128 			printf("cannot insert Linux ELF brand handler\n");
1129 		break;
1130 	case MOD_UNLOAD:
1131 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1132 		     ++brandinfo)
1133 			if (elf32_brand_inuse(*brandinfo))
1134 				error = EBUSY;
1135 		if (error == 0) {
1136 			for (brandinfo = &linux_brandlist[0];
1137 			     *brandinfo != NULL; ++brandinfo)
1138 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1139 					error = EINVAL;
1140 		}
1141 		if (error == 0) {
1142 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1143 				linux_ioctl_unregister_handler(*lihp);
1144 			mtx_destroy(&futex_mtx);
1145 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1146 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1147 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1148 			linux_osd_jail_deregister();
1149 			if (bootverbose)
1150 				printf("Linux ELF exec handler removed\n");
1151 		} else
1152 			printf("Could not deinstall ELF interpreter entry\n");
1153 		break;
1154 	default:
1155 		return (EOPNOTSUPP);
1156 	}
1157 	return (error);
1158 }
1159 
1160 static moduledata_t linux_elf_mod = {
1161 	"linuxelf",
1162 	linux_elf_modevent,
1163 	0
1164 };
1165 
1166 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1167 FEATURE(linux, "Linux 32bit support");
1168