1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 Søren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_FREEBSD32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_futex.h>
81 #include <compat/linux/linux_ioctl.h>
82 #include <compat/linux/linux_mib.h>
83 #include <compat/linux/linux_misc.h>
84 #include <compat/linux/linux_signal.h>
85 #include <compat/linux/linux_util.h>
86 #include <compat/linux/linux_vdso.h>
87 
88 MODULE_VERSION(linux, 1);
89 
90 #define	AUXARGS_ENTRY_32(pos, id, val)	\
91 	do {				\
92 		suword32(pos++, id);	\
93 		suword32(pos++, val);	\
94 	} while (0)
95 
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC      0x2123 /* #! */
98 #else
99 #define SHELLMAGIC      0x2321
100 #endif
101 
102 /*
103  * Allow the sendsig functions to use the ldebug() facility
104  * even though they are not syscalls themselves. Map them
105  * to syscall 0. This is slightly less bogus than using
106  * ldebug(sigreturn).
107  */
108 #define	LINUX_SYS_linux_rt_sendsig	0
109 #define	LINUX_SYS_linux_sendsig		0
110 
111 const char *linux_kplatform;
112 static int linux_szsigcode;
113 static vm_object_t linux_shared_page_obj;
114 static char *linux_shared_page_mapping;
115 extern char _binary_linux32_locore_o_start;
116 extern char _binary_linux32_locore_o_end;
117 
118 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
119 
120 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
121 
122 static int	elf_linux_fixup(register_t **stack_base,
123 		    struct image_params *iparams);
124 static register_t *linux_copyout_strings(struct image_params *imgp);
125 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126 static void	exec_linux_setregs(struct thread *td,
127 				   struct image_params *imgp, u_long stack);
128 static void	linux32_fixlimit(struct rlimit *rl, int which);
129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130 static void	linux_vdso_install(void *param);
131 static void	linux_vdso_deinstall(void *param);
132 
133 /*
134  * Linux syscalls return negative errno's, we do positive and map them
135  * Reference:
136  *   FreeBSD: src/sys/sys/errno.h
137  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138  *            linux-2.6.17.8/include/asm-generic/errno.h
139  */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 	 -72, -67, -71
151 };
152 
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163 
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 	SIGIO, SIGURG, SIGSYS
173 };
174 
175 #define LINUX_T_UNKNOWN  255
176 static int _bsd_to_linux_trapcode[] = {
177 	LINUX_T_UNKNOWN,	/* 0 */
178 	6,			/* 1  T_PRIVINFLT */
179 	LINUX_T_UNKNOWN,	/* 2 */
180 	3,			/* 3  T_BPTFLT */
181 	LINUX_T_UNKNOWN,	/* 4 */
182 	LINUX_T_UNKNOWN,	/* 5 */
183 	16,			/* 6  T_ARITHTRAP */
184 	254,			/* 7  T_ASTFLT */
185 	LINUX_T_UNKNOWN,	/* 8 */
186 	13,			/* 9  T_PROTFLT */
187 	1,			/* 10 T_TRCTRAP */
188 	LINUX_T_UNKNOWN,	/* 11 */
189 	14,			/* 12 T_PAGEFLT */
190 	LINUX_T_UNKNOWN,	/* 13 */
191 	17,			/* 14 T_ALIGNFLT */
192 	LINUX_T_UNKNOWN,	/* 15 */
193 	LINUX_T_UNKNOWN,	/* 16 */
194 	LINUX_T_UNKNOWN,	/* 17 */
195 	0,			/* 18 T_DIVIDE */
196 	2,			/* 19 T_NMI */
197 	4,			/* 20 T_OFLOW */
198 	5,			/* 21 T_BOUND */
199 	7,			/* 22 T_DNA */
200 	8,			/* 23 T_DOUBLEFLT */
201 	9,			/* 24 T_FPOPFLT */
202 	10,			/* 25 T_TSSFLT */
203 	11,			/* 26 T_SEGNPFLT */
204 	12,			/* 27 T_STKFLT */
205 	18,			/* 28 T_MCHK */
206 	19,			/* 29 T_XMMFLT */
207 	15			/* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211      _bsd_to_linux_trapcode[(code)]: \
212      LINUX_T_UNKNOWN)
213 
214 struct linux32_ps_strings {
215 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216 	u_int ps_nargvstr;	/* the number of argument strings */
217 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218 	u_int ps_nenvstr;	/* the number of environment strings */
219 };
220 
221 LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
222 LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
223 LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
224 LINUX_VDSO_SYM_CHAR(linux_platform);
225 
226 /*
227  * If FreeBSD & Linux have a difference of opinion about what a trap
228  * means, deal with it here.
229  *
230  * MPSAFE
231  */
232 static int
233 translate_traps(int signal, int trap_code)
234 {
235 	if (signal != SIGBUS)
236 		return signal;
237 	switch (trap_code) {
238 	case T_PROTFLT:
239 	case T_TSSFLT:
240 	case T_DOUBLEFLT:
241 	case T_PAGEFLT:
242 		return SIGSEGV;
243 	default:
244 		return signal;
245 	}
246 }
247 
248 static int
249 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
250 {
251 	Elf32_Auxargs *args;
252 	Elf32_Addr *base;
253 	Elf32_Addr *pos;
254 	struct linux32_ps_strings *arginfo;
255 
256 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
257 
258 	KASSERT(curthread->td_proc == imgp->proc,
259 	    ("unsafe elf_linux_fixup(), should be curproc"));
260 	base = (Elf32_Addr *)*stack_base;
261 	args = (Elf32_Auxargs *)imgp->auxargs;
262 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
263 
264 	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
265 	    imgp->proc->p_sysent->sv_shared_page_base);
266 	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
267 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
268 
269 	/*
270 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
271 	 * as it has appeared in the 2.4.0-rc7 first time.
272 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
273 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
274 	 * is not present.
275 	 * Also see linux_times() implementation.
276 	 */
277 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
278 		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
279 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
280 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
281 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
282 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
283 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
284 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
285 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
286 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
287 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
288 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
289 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
290 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
291 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
292 	if (args->execfd != -1)
293 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
294 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
295 
296 	free(imgp->auxargs, M_TEMP);
297 	imgp->auxargs = NULL;
298 
299 	base--;
300 	suword32(base, (uint32_t)imgp->args->argc);
301 	*stack_base = (register_t *)base;
302 	return (0);
303 }
304 
305 static void
306 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
307 {
308 	struct thread *td = curthread;
309 	struct proc *p = td->td_proc;
310 	struct sigacts *psp;
311 	struct trapframe *regs;
312 	struct l_rt_sigframe *fp, frame;
313 	int oonstack;
314 	int sig;
315 	int code;
316 
317 	sig = ksi->ksi_signo;
318 	code = ksi->ksi_code;
319 	PROC_LOCK_ASSERT(p, MA_OWNED);
320 	psp = p->p_sigacts;
321 	mtx_assert(&psp->ps_mtx, MA_OWNED);
322 	regs = td->td_frame;
323 	oonstack = sigonstack(regs->tf_rsp);
324 
325 #ifdef DEBUG
326 	if (ldebug(rt_sendsig))
327 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
328 		    catcher, sig, (void*)mask, code);
329 #endif
330 	/*
331 	 * Allocate space for the signal handler context.
332 	 */
333 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
334 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
335 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
336 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
337 	} else
338 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
339 	mtx_unlock(&psp->ps_mtx);
340 
341 	/*
342 	 * Build the argument list for the signal handler.
343 	 */
344 	if (p->p_sysent->sv_sigtbl)
345 		if (sig <= p->p_sysent->sv_sigsize)
346 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
347 
348 	bzero(&frame, sizeof(frame));
349 
350 	frame.sf_handler = PTROUT(catcher);
351 	frame.sf_sig = sig;
352 	frame.sf_siginfo = PTROUT(&fp->sf_si);
353 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
354 
355 	/* Fill in POSIX parts */
356 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
357 
358 	/*
359 	 * Build the signal context to be used by sigreturn
360 	 * and libgcc unwind.
361 	 */
362 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
363 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
364 
365 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
366 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
367 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
368 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
369 	PROC_UNLOCK(p);
370 
371 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
372 
373 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
374 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
375 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
376 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
377 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
378 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_rsp;
379 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
380 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
381 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
382 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
383 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
384 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
385 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
386 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
387 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
388 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
389 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
390 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
391 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
392 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
393 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
394 
395 #ifdef DEBUG
396 	if (ldebug(rt_sendsig))
397 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
398 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
399 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
400 #endif
401 
402 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
403 		/*
404 		 * Process has trashed its stack; give it an illegal
405 		 * instruction to halt it in its tracks.
406 		 */
407 #ifdef DEBUG
408 		if (ldebug(rt_sendsig))
409 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
410 			    fp, oonstack);
411 #endif
412 		PROC_LOCK(p);
413 		sigexit(td, SIGILL);
414 	}
415 
416 	/*
417 	 * Build context to run handler in.
418 	 */
419 	regs->tf_rsp = PTROUT(fp);
420 	regs->tf_rip = linux32_rt_sigcode;
421 	regs->tf_rflags &= ~(PSL_T | PSL_D);
422 	regs->tf_cs = _ucode32sel;
423 	regs->tf_ss = _udatasel;
424 	regs->tf_ds = _udatasel;
425 	regs->tf_es = _udatasel;
426 	regs->tf_fs = _ufssel;
427 	regs->tf_gs = _ugssel;
428 	regs->tf_flags = TF_HASSEGS;
429 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
430 	PROC_LOCK(p);
431 	mtx_lock(&psp->ps_mtx);
432 }
433 
434 
435 /*
436  * Send an interrupt to process.
437  *
438  * Stack is set up to allow sigcode stored
439  * in u. to call routine, followed by kcall
440  * to sigreturn routine below.  After sigreturn
441  * resets the signal mask, the stack, and the
442  * frame pointer, it returns to the user
443  * specified pc, psl.
444  */
445 static void
446 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
447 {
448 	struct thread *td = curthread;
449 	struct proc *p = td->td_proc;
450 	struct sigacts *psp;
451 	struct trapframe *regs;
452 	struct l_sigframe *fp, frame;
453 	l_sigset_t lmask;
454 	int oonstack, i;
455 	int sig, code;
456 
457 	sig = ksi->ksi_signo;
458 	code = ksi->ksi_code;
459 	PROC_LOCK_ASSERT(p, MA_OWNED);
460 	psp = p->p_sigacts;
461 	mtx_assert(&psp->ps_mtx, MA_OWNED);
462 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
463 		/* Signal handler installed with SA_SIGINFO. */
464 		linux_rt_sendsig(catcher, ksi, mask);
465 		return;
466 	}
467 
468 	regs = td->td_frame;
469 	oonstack = sigonstack(regs->tf_rsp);
470 
471 #ifdef DEBUG
472 	if (ldebug(sendsig))
473 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
474 		    catcher, sig, (void*)mask, code);
475 #endif
476 
477 	/*
478 	 * Allocate space for the signal handler context.
479 	 */
480 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
481 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
482 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
483 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
484 	} else
485 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
486 	mtx_unlock(&psp->ps_mtx);
487 	PROC_UNLOCK(p);
488 
489 	/*
490 	 * Build the argument list for the signal handler.
491 	 */
492 	if (p->p_sysent->sv_sigtbl)
493 		if (sig <= p->p_sysent->sv_sigsize)
494 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
495 
496 	bzero(&frame, sizeof(frame));
497 
498 	frame.sf_handler = PTROUT(catcher);
499 	frame.sf_sig = sig;
500 
501 	bsd_to_linux_sigset(mask, &lmask);
502 
503 	/*
504 	 * Build the signal context to be used by sigreturn.
505 	 */
506 	frame.sf_sc.sc_mask   = lmask.__bits[0];
507 	frame.sf_sc.sc_gs     = regs->tf_gs;
508 	frame.sf_sc.sc_fs     = regs->tf_fs;
509 	frame.sf_sc.sc_es     = regs->tf_es;
510 	frame.sf_sc.sc_ds     = regs->tf_ds;
511 	frame.sf_sc.sc_edi    = regs->tf_rdi;
512 	frame.sf_sc.sc_esi    = regs->tf_rsi;
513 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
514 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
515 	frame.sf_sc.sc_esp    = regs->tf_rsp;
516 	frame.sf_sc.sc_edx    = regs->tf_rdx;
517 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
518 	frame.sf_sc.sc_eax    = regs->tf_rax;
519 	frame.sf_sc.sc_eip    = regs->tf_rip;
520 	frame.sf_sc.sc_cs     = regs->tf_cs;
521 	frame.sf_sc.sc_eflags = regs->tf_rflags;
522 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
523 	frame.sf_sc.sc_ss     = regs->tf_ss;
524 	frame.sf_sc.sc_err    = regs->tf_err;
525 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
526 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
527 
528 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
529 		frame.sf_extramask[i] = lmask.__bits[i+1];
530 
531 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
532 		/*
533 		 * Process has trashed its stack; give it an illegal
534 		 * instruction to halt it in its tracks.
535 		 */
536 		PROC_LOCK(p);
537 		sigexit(td, SIGILL);
538 	}
539 
540 	/*
541 	 * Build context to run handler in.
542 	 */
543 	regs->tf_rsp = PTROUT(fp);
544 	regs->tf_rip = linux32_sigcode;
545 	regs->tf_rflags &= ~(PSL_T | PSL_D);
546 	regs->tf_cs = _ucode32sel;
547 	regs->tf_ss = _udatasel;
548 	regs->tf_ds = _udatasel;
549 	regs->tf_es = _udatasel;
550 	regs->tf_fs = _ufssel;
551 	regs->tf_gs = _ugssel;
552 	regs->tf_flags = TF_HASSEGS;
553 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
554 	PROC_LOCK(p);
555 	mtx_lock(&psp->ps_mtx);
556 }
557 
558 /*
559  * System call to cleanup state after a signal
560  * has been taken.  Reset signal mask and
561  * stack state from context left by sendsig (above).
562  * Return to previous pc and psl as specified by
563  * context left by sendsig. Check carefully to
564  * make sure that the user has not modified the
565  * psl to gain improper privileges or to cause
566  * a machine fault.
567  */
568 int
569 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
570 {
571 	struct l_sigframe frame;
572 	struct trapframe *regs;
573 	sigset_t bmask;
574 	l_sigset_t lmask;
575 	int eflags, i;
576 	ksiginfo_t ksi;
577 
578 	regs = td->td_frame;
579 
580 #ifdef DEBUG
581 	if (ldebug(sigreturn))
582 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
583 #endif
584 	/*
585 	 * The trampoline code hands us the sigframe.
586 	 * It is unsafe to keep track of it ourselves, in the event that a
587 	 * program jumps out of a signal handler.
588 	 */
589 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
590 		return (EFAULT);
591 
592 	/*
593 	 * Check for security violations.
594 	 */
595 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
596 	eflags = frame.sf_sc.sc_eflags;
597 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
598 		return(EINVAL);
599 
600 	/*
601 	 * Don't allow users to load a valid privileged %cs.  Let the
602 	 * hardware check for invalid selectors, excess privilege in
603 	 * other selectors, invalid %eip's and invalid %esp's.
604 	 */
605 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
606 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
607 		ksiginfo_init_trap(&ksi);
608 		ksi.ksi_signo = SIGBUS;
609 		ksi.ksi_code = BUS_OBJERR;
610 		ksi.ksi_trapno = T_PROTFLT;
611 		ksi.ksi_addr = (void *)regs->tf_rip;
612 		trapsignal(td, &ksi);
613 		return(EINVAL);
614 	}
615 
616 	lmask.__bits[0] = frame.sf_sc.sc_mask;
617 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
618 		lmask.__bits[i+1] = frame.sf_extramask[i];
619 	linux_to_bsd_sigset(&lmask, &bmask);
620 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
621 
622 	/*
623 	 * Restore signal context.
624 	 */
625 	regs->tf_rdi    = frame.sf_sc.sc_edi;
626 	regs->tf_rsi    = frame.sf_sc.sc_esi;
627 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
628 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
629 	regs->tf_rdx    = frame.sf_sc.sc_edx;
630 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
631 	regs->tf_rax    = frame.sf_sc.sc_eax;
632 	regs->tf_rip    = frame.sf_sc.sc_eip;
633 	regs->tf_cs     = frame.sf_sc.sc_cs;
634 	regs->tf_ds     = frame.sf_sc.sc_ds;
635 	regs->tf_es     = frame.sf_sc.sc_es;
636 	regs->tf_fs     = frame.sf_sc.sc_fs;
637 	regs->tf_gs     = frame.sf_sc.sc_gs;
638 	regs->tf_rflags = eflags;
639 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
640 	regs->tf_ss     = frame.sf_sc.sc_ss;
641 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
642 
643 	return (EJUSTRETURN);
644 }
645 
646 /*
647  * System call to cleanup state after a signal
648  * has been taken.  Reset signal mask and
649  * stack state from context left by rt_sendsig (above).
650  * Return to previous pc and psl as specified by
651  * context left by sendsig. Check carefully to
652  * make sure that the user has not modified the
653  * psl to gain improper privileges or to cause
654  * a machine fault.
655  */
656 int
657 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
658 {
659 	struct l_ucontext uc;
660 	struct l_sigcontext *context;
661 	sigset_t bmask;
662 	l_stack_t *lss;
663 	stack_t ss;
664 	struct trapframe *regs;
665 	int eflags;
666 	ksiginfo_t ksi;
667 
668 	regs = td->td_frame;
669 
670 #ifdef DEBUG
671 	if (ldebug(rt_sigreturn))
672 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
673 #endif
674 	/*
675 	 * The trampoline code hands us the ucontext.
676 	 * It is unsafe to keep track of it ourselves, in the event that a
677 	 * program jumps out of a signal handler.
678 	 */
679 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
680 		return (EFAULT);
681 
682 	context = &uc.uc_mcontext;
683 
684 	/*
685 	 * Check for security violations.
686 	 */
687 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
688 	eflags = context->sc_eflags;
689 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
690 		return(EINVAL);
691 
692 	/*
693 	 * Don't allow users to load a valid privileged %cs.  Let the
694 	 * hardware check for invalid selectors, excess privilege in
695 	 * other selectors, invalid %eip's and invalid %esp's.
696 	 */
697 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
698 	if (!CS_SECURE(context->sc_cs)) {
699 		ksiginfo_init_trap(&ksi);
700 		ksi.ksi_signo = SIGBUS;
701 		ksi.ksi_code = BUS_OBJERR;
702 		ksi.ksi_trapno = T_PROTFLT;
703 		ksi.ksi_addr = (void *)regs->tf_rip;
704 		trapsignal(td, &ksi);
705 		return(EINVAL);
706 	}
707 
708 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
709 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
710 
711 	/*
712 	 * Restore signal context
713 	 */
714 	regs->tf_gs	= context->sc_gs;
715 	regs->tf_fs	= context->sc_fs;
716 	regs->tf_es	= context->sc_es;
717 	regs->tf_ds	= context->sc_ds;
718 	regs->tf_rdi    = context->sc_edi;
719 	regs->tf_rsi    = context->sc_esi;
720 	regs->tf_rbp    = context->sc_ebp;
721 	regs->tf_rbx    = context->sc_ebx;
722 	regs->tf_rdx    = context->sc_edx;
723 	regs->tf_rcx    = context->sc_ecx;
724 	regs->tf_rax    = context->sc_eax;
725 	regs->tf_rip    = context->sc_eip;
726 	regs->tf_cs     = context->sc_cs;
727 	regs->tf_rflags = eflags;
728 	regs->tf_rsp    = context->sc_esp_at_signal;
729 	regs->tf_ss     = context->sc_ss;
730 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
731 
732 	/*
733 	 * call sigaltstack & ignore results..
734 	 */
735 	lss = &uc.uc_stack;
736 	ss.ss_sp = PTRIN(lss->ss_sp);
737 	ss.ss_size = lss->ss_size;
738 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
739 
740 #ifdef DEBUG
741 	if (ldebug(rt_sigreturn))
742 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
743 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
744 #endif
745 	(void)kern_sigaltstack(td, &ss, NULL);
746 
747 	return (EJUSTRETURN);
748 }
749 
750 static int
751 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
752 {
753 	struct proc *p;
754 	struct trapframe *frame;
755 
756 	p = td->td_proc;
757 	frame = td->td_frame;
758 
759 	sa->args[0] = frame->tf_rbx;
760 	sa->args[1] = frame->tf_rcx;
761 	sa->args[2] = frame->tf_rdx;
762 	sa->args[3] = frame->tf_rsi;
763 	sa->args[4] = frame->tf_rdi;
764 	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
765 	sa->code = frame->tf_rax;
766 
767 	if (sa->code >= p->p_sysent->sv_size)
768 		sa->callp = &p->p_sysent->sv_table[0];
769 	else
770 		sa->callp = &p->p_sysent->sv_table[sa->code];
771 	sa->narg = sa->callp->sy_narg;
772 
773 	td->td_retval[0] = 0;
774 	td->td_retval[1] = frame->tf_rdx;
775 
776 	return (0);
777 }
778 
779 /*
780  * If a linux binary is exec'ing something, try this image activator
781  * first.  We override standard shell script execution in order to
782  * be able to modify the interpreter path.  We only do this if a linux
783  * binary is doing the exec, so we do not create an EXEC module for it.
784  */
785 static int	exec_linux_imgact_try(struct image_params *iparams);
786 
787 static int
788 exec_linux_imgact_try(struct image_params *imgp)
789 {
790 	const char *head = (const char *)imgp->image_header;
791 	char *rpath;
792 	int error = -1;
793 
794 	/*
795 	* The interpreter for shell scripts run from a linux binary needs
796 	* to be located in /compat/linux if possible in order to recursively
797 	* maintain linux path emulation.
798 	*/
799 	if (((const short *)head)[0] == SHELLMAGIC) {
800 		/*
801 		* Run our normal shell image activator.  If it succeeds attempt
802 		* to use the alternate path for the interpreter.  If an
803 		* alternate * path is found, use our stringspace to store it.
804 		*/
805 		if ((error = exec_shell_imgact(imgp)) == 0) {
806 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
807 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
808 			    AT_FDCWD);
809 			if (rpath != NULL)
810 				imgp->args->fname_buf =
811 				    imgp->interpreter_name = rpath;
812 		}
813 	}
814 	return (error);
815 }
816 
817 /*
818  * Clear registers on exec
819  * XXX copied from ia32_signal.c.
820  */
821 static void
822 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
823 {
824 	struct trapframe *regs = td->td_frame;
825 	struct pcb *pcb = td->td_pcb;
826 
827 	mtx_lock(&dt_lock);
828 	if (td->td_proc->p_md.md_ldt != NULL)
829 		user_ldt_free(td);
830 	else
831 		mtx_unlock(&dt_lock);
832 
833 	critical_enter();
834 	wrmsr(MSR_FSBASE, 0);
835 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
836 	pcb->pcb_fsbase = 0;
837 	pcb->pcb_gsbase = 0;
838 	critical_exit();
839 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
840 
841 	bzero((char *)regs, sizeof(struct trapframe));
842 	regs->tf_rip = imgp->entry_addr;
843 	regs->tf_rsp = stack;
844 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
845 	regs->tf_gs = _ugssel;
846 	regs->tf_fs = _ufssel;
847 	regs->tf_es = _udatasel;
848 	regs->tf_ds = _udatasel;
849 	regs->tf_ss = _udatasel;
850 	regs->tf_flags = TF_HASSEGS;
851 	regs->tf_cs = _ucode32sel;
852 	regs->tf_rbx = imgp->ps_strings;
853 
854 	fpstate_drop(td);
855 
856 	/* Do full restore on return so that we can change to a different %cs */
857 	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
858 	td->td_retval[1] = 0;
859 }
860 
861 /*
862  * XXX copied from ia32_sysvec.c.
863  */
864 static register_t *
865 linux_copyout_strings(struct image_params *imgp)
866 {
867 	int argc, envc;
868 	u_int32_t *vectp;
869 	char *stringp, *destp;
870 	u_int32_t *stack_base;
871 	struct linux32_ps_strings *arginfo;
872 
873 	/*
874 	 * Calculate string base and vector table pointers.
875 	 */
876 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
877 	destp =	(caddr_t)arginfo - SPARE_USRSPACE -
878 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
879 
880 	/*
881 	 * If we have a valid auxargs ptr, prepare some room
882 	 * on the stack.
883 	 */
884 	if (imgp->auxargs) {
885 		/*
886 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
887 		 * lower compatibility.
888 		 */
889 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
890 		    (LINUX_AT_COUNT * 2);
891 		/*
892 		 * The '+ 2' is for the null pointers at the end of each of
893 		 * the arg and env vector sets,and imgp->auxarg_size is room
894 		 * for argument of Runtime loader.
895 		 */
896 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
897 		    imgp->args->envc + 2 + imgp->auxarg_size) *
898 		    sizeof(u_int32_t));
899 
900 	} else
901 		/*
902 		 * The '+ 2' is for the null pointers at the end of each of
903 		 * the arg and env vector sets
904 		 */
905 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
906 		    imgp->args->envc + 2) * sizeof(u_int32_t));
907 
908 	/*
909 	 * vectp also becomes our initial stack base
910 	 */
911 	stack_base = vectp;
912 
913 	stringp = imgp->args->begin_argv;
914 	argc = imgp->args->argc;
915 	envc = imgp->args->envc;
916 	/*
917 	 * Copy out strings - arguments and environment.
918 	 */
919 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
920 
921 	/*
922 	 * Fill in "ps_strings" struct for ps, w, etc.
923 	 */
924 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
925 	suword32(&arginfo->ps_nargvstr, argc);
926 
927 	/*
928 	 * Fill in argument portion of vector table.
929 	 */
930 	for (; argc > 0; --argc) {
931 		suword32(vectp++, (uint32_t)(intptr_t)destp);
932 		while (*stringp++ != 0)
933 			destp++;
934 		destp++;
935 	}
936 
937 	/* a null vector table pointer separates the argp's from the envp's */
938 	suword32(vectp++, 0);
939 
940 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
941 	suword32(&arginfo->ps_nenvstr, envc);
942 
943 	/*
944 	 * Fill in environment portion of vector table.
945 	 */
946 	for (; envc > 0; --envc) {
947 		suword32(vectp++, (uint32_t)(intptr_t)destp);
948 		while (*stringp++ != 0)
949 			destp++;
950 		destp++;
951 	}
952 
953 	/* end of vector table is a null pointer */
954 	suword32(vectp, 0);
955 
956 	return ((register_t *)stack_base);
957 }
958 
959 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
960     "32-bit Linux emulation");
961 
962 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
963 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
964     &linux32_maxdsiz, 0, "");
965 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
966 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
967     &linux32_maxssiz, 0, "");
968 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
969 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
970     &linux32_maxvmem, 0, "");
971 
972 #if defined(DEBUG)
973 SYSCTL_PROC(_compat_linux32, OID_AUTO, debug,
974             CTLTYPE_STRING | CTLFLAG_RW,
975             0, 0, linux_sysctl_debug, "A",
976             "Linux debugging control");
977 #endif
978 
979 static void
980 linux32_fixlimit(struct rlimit *rl, int which)
981 {
982 
983 	switch (which) {
984 	case RLIMIT_DATA:
985 		if (linux32_maxdsiz != 0) {
986 			if (rl->rlim_cur > linux32_maxdsiz)
987 				rl->rlim_cur = linux32_maxdsiz;
988 			if (rl->rlim_max > linux32_maxdsiz)
989 				rl->rlim_max = linux32_maxdsiz;
990 		}
991 		break;
992 	case RLIMIT_STACK:
993 		if (linux32_maxssiz != 0) {
994 			if (rl->rlim_cur > linux32_maxssiz)
995 				rl->rlim_cur = linux32_maxssiz;
996 			if (rl->rlim_max > linux32_maxssiz)
997 				rl->rlim_max = linux32_maxssiz;
998 		}
999 		break;
1000 	case RLIMIT_VMEM:
1001 		if (linux32_maxvmem != 0) {
1002 			if (rl->rlim_cur > linux32_maxvmem)
1003 				rl->rlim_cur = linux32_maxvmem;
1004 			if (rl->rlim_max > linux32_maxvmem)
1005 				rl->rlim_max = linux32_maxvmem;
1006 		}
1007 		break;
1008 	}
1009 }
1010 
1011 struct sysentvec elf_linux_sysvec = {
1012 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1013 	.sv_table	= linux_sysent,
1014 	.sv_mask	= 0,
1015 	.sv_sigsize	= LINUX_SIGTBLSZ,
1016 	.sv_sigtbl	= bsd_to_linux_signal,
1017 	.sv_errsize	= ELAST + 1,
1018 	.sv_errtbl	= bsd_to_linux_errno,
1019 	.sv_transtrap	= translate_traps,
1020 	.sv_fixup	= elf_linux_fixup,
1021 	.sv_sendsig	= linux_sendsig,
1022 	.sv_sigcode	= &_binary_linux32_locore_o_start,
1023 	.sv_szsigcode	= &linux_szsigcode,
1024 	.sv_prepsyscall	= NULL,
1025 	.sv_name	= "Linux ELF32",
1026 	.sv_coredump	= elf32_coredump,
1027 	.sv_imgact_try	= exec_linux_imgact_try,
1028 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1029 	.sv_pagesize	= PAGE_SIZE,
1030 	.sv_minuser	= VM_MIN_ADDRESS,
1031 	.sv_maxuser	= LINUX32_MAXUSER,
1032 	.sv_usrstack	= LINUX32_USRSTACK,
1033 	.sv_psstrings	= LINUX32_PS_STRINGS,
1034 	.sv_stackprot	= VM_PROT_ALL,
1035 	.sv_copyout_strings = linux_copyout_strings,
1036 	.sv_setregs	= exec_linux_setregs,
1037 	.sv_fixlimit	= linux32_fixlimit,
1038 	.sv_maxssiz	= &linux32_maxssiz,
1039 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1040 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1041 	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1042 	.sv_syscallnames = NULL,
1043 	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1044 	.sv_shared_page_len = PAGE_SIZE,
1045 	.sv_schedtail	= linux_schedtail,
1046 	.sv_thread_detach = linux_thread_detach,
1047 };
1048 
1049 static void
1050 linux_vdso_install(void *param)
1051 {
1052 
1053 	linux_szsigcode = (&_binary_linux32_locore_o_end -
1054 	    &_binary_linux32_locore_o_start);
1055 
1056 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1057 		panic("Linux invalid vdso size\n");
1058 
1059 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1060 
1061 	linux_shared_page_obj = __elfN(linux_shared_page_init)
1062 	    (&linux_shared_page_mapping);
1063 
1064 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1065 
1066 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1067 	    linux_szsigcode);
1068 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1069 
1070 	linux_kplatform = linux_shared_page_mapping +
1071 	    (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1072 }
1073 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1074     (sysinit_cfunc_t)linux_vdso_install, NULL);
1075 
1076 static void
1077 linux_vdso_deinstall(void *param)
1078 {
1079 
1080 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1081 };
1082 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1083     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1084 
1085 static char GNU_ABI_VENDOR[] = "GNU";
1086 static int GNULINUX_ABI_DESC = 0;
1087 
1088 static boolean_t
1089 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1090 {
1091 	const Elf32_Word *desc;
1092 	uintptr_t p;
1093 
1094 	p = (uintptr_t)(note + 1);
1095 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1096 
1097 	desc = (const Elf32_Word *)p;
1098 	if (desc[0] != GNULINUX_ABI_DESC)
1099 		return (FALSE);
1100 
1101 	/*
1102 	 * For linux we encode osrel as follows (see linux_mib.c):
1103 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1104 	 */
1105 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1106 
1107 	return (TRUE);
1108 }
1109 
1110 static Elf_Brandnote linux32_brandnote = {
1111 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1112 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1113 	.hdr.n_type	= 1,
1114 	.vendor		= GNU_ABI_VENDOR,
1115 	.flags		= BN_TRANSLATE_OSREL,
1116 	.trans_osrel	= linux32_trans_osrel
1117 };
1118 
1119 static Elf32_Brandinfo linux_brand = {
1120 	.brand		= ELFOSABI_LINUX,
1121 	.machine	= EM_386,
1122 	.compat_3_brand	= "Linux",
1123 	.emul_path	= "/compat/linux",
1124 	.interp_path	= "/lib/ld-linux.so.1",
1125 	.sysvec		= &elf_linux_sysvec,
1126 	.interp_newpath	= NULL,
1127 	.brand_note	= &linux32_brandnote,
1128 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1129 };
1130 
1131 static Elf32_Brandinfo linux_glibc2brand = {
1132 	.brand		= ELFOSABI_LINUX,
1133 	.machine	= EM_386,
1134 	.compat_3_brand	= "Linux",
1135 	.emul_path	= "/compat/linux",
1136 	.interp_path	= "/lib/ld-linux.so.2",
1137 	.sysvec		= &elf_linux_sysvec,
1138 	.interp_newpath	= NULL,
1139 	.brand_note	= &linux32_brandnote,
1140 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1141 };
1142 
1143 Elf32_Brandinfo *linux_brandlist[] = {
1144 	&linux_brand,
1145 	&linux_glibc2brand,
1146 	NULL
1147 };
1148 
1149 static int
1150 linux_elf_modevent(module_t mod, int type, void *data)
1151 {
1152 	Elf32_Brandinfo **brandinfo;
1153 	int error;
1154 	struct linux_ioctl_handler **lihp;
1155 
1156 	error = 0;
1157 
1158 	switch(type) {
1159 	case MOD_LOAD:
1160 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1161 		     ++brandinfo)
1162 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1163 				error = EINVAL;
1164 		if (error == 0) {
1165 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1166 				linux_ioctl_register_handler(*lihp);
1167 			LIST_INIT(&futex_list);
1168 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1169 			stclohz = (stathz ? stathz : hz);
1170 			if (bootverbose)
1171 				printf("Linux ELF exec handler installed\n");
1172 		} else
1173 			printf("cannot insert Linux ELF brand handler\n");
1174 		break;
1175 	case MOD_UNLOAD:
1176 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1177 		     ++brandinfo)
1178 			if (elf32_brand_inuse(*brandinfo))
1179 				error = EBUSY;
1180 		if (error == 0) {
1181 			for (brandinfo = &linux_brandlist[0];
1182 			     *brandinfo != NULL; ++brandinfo)
1183 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1184 					error = EINVAL;
1185 		}
1186 		if (error == 0) {
1187 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1188 				linux_ioctl_unregister_handler(*lihp);
1189 			mtx_destroy(&futex_mtx);
1190 			if (bootverbose)
1191 				printf("Linux ELF exec handler removed\n");
1192 		} else
1193 			printf("Could not deinstall ELF interpreter entry\n");
1194 		break;
1195 	default:
1196 		return (EOPNOTSUPP);
1197 	}
1198 	return (error);
1199 }
1200 
1201 static moduledata_t linux_elf_mod = {
1202 	"linuxelf",
1203 	linux_elf_modevent,
1204 	0
1205 };
1206 
1207 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1208 MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);
1209