1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25  */
26 
27 #include <assert.h>
28 #include <fcntl.h>
29 #include <poll.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <zlib.h>
34 #include <libgen.h>
35 #include <sys/assfail.h>
36 #include <sys/spa.h>
37 #include <sys/stat.h>
38 #include <sys/processor.h>
39 #include <sys/zfs_context.h>
40 #include <sys/rrwlock.h>
41 #include <sys/zmod.h>
42 #include <sys/utsname.h>
43 #include <sys/systeminfo.h>
44 
45 /*
46  * Emulation of kernel services in userland.
47  */
48 
49 #ifndef __FreeBSD__
50 int aok;
51 #endif
52 uint64_t physmem;
53 vnode_t *rootdir = (vnode_t *)0xabcd1234;
54 char hw_serial[HW_HOSTID_LEN];
55 #ifdef illumos
56 kmutex_t cpu_lock;
57 #endif
58 
59 /* If set, all blocks read will be copied to the specified directory. */
60 char *vn_dumpdir = NULL;
61 
62 struct utsname utsname = {
63 	"userland", "libzpool", "1", "1", "na"
64 };
65 
66 /* this only exists to have its address taken */
67 struct proc p0;
68 
69 /*
70  * =========================================================================
71  * threads
72  * =========================================================================
73  */
74 /*ARGSUSED*/
75 kthread_t *
zk_thread_create(void (* func)(),void * arg)76 zk_thread_create(void (*func)(), void *arg)
77 {
78 	thread_t tid;
79 
80 	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
81 	    &tid) == 0);
82 
83 	return ((void *)(uintptr_t)tid);
84 }
85 
86 /*
87  * =========================================================================
88  * kstats
89  * =========================================================================
90  */
91 /*ARGSUSED*/
92 kstat_t *
kstat_create(char * module,int instance,char * name,char * class,uchar_t type,ulong_t ndata,uchar_t ks_flag)93 kstat_create(char *module, int instance, char *name, char *class,
94     uchar_t type, ulong_t ndata, uchar_t ks_flag)
95 {
96 	return (NULL);
97 }
98 
99 /*ARGSUSED*/
100 void
kstat_named_init(kstat_named_t * knp,const char * name,uchar_t type)101 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
102 {}
103 
104 /*ARGSUSED*/
105 void
kstat_install(kstat_t * ksp)106 kstat_install(kstat_t *ksp)
107 {}
108 
109 /*ARGSUSED*/
110 void
kstat_delete(kstat_t * ksp)111 kstat_delete(kstat_t *ksp)
112 {}
113 
114 /*
115  * =========================================================================
116  * mutexes
117  * =========================================================================
118  */
119 void
zmutex_init(kmutex_t * mp)120 zmutex_init(kmutex_t *mp)
121 {
122 	mp->m_owner = NULL;
123 	mp->initialized = B_TRUE;
124 	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
125 }
126 
127 void
zmutex_destroy(kmutex_t * mp)128 zmutex_destroy(kmutex_t *mp)
129 {
130 	ASSERT(mp->initialized == B_TRUE);
131 	ASSERT(mp->m_owner == NULL);
132 	(void) _mutex_destroy(&(mp)->m_lock);
133 	mp->m_owner = (void *)-1UL;
134 	mp->initialized = B_FALSE;
135 }
136 
137 int
zmutex_owned(kmutex_t * mp)138 zmutex_owned(kmutex_t *mp)
139 {
140 	ASSERT(mp->initialized == B_TRUE);
141 
142 	return (mp->m_owner == curthread);
143 }
144 
145 void
mutex_enter(kmutex_t * mp)146 mutex_enter(kmutex_t *mp)
147 {
148 	ASSERT(mp->initialized == B_TRUE);
149 	ASSERT(mp->m_owner != (void *)-1UL);
150 	ASSERT(mp->m_owner != curthread);
151 	VERIFY(mutex_lock(&mp->m_lock) == 0);
152 	ASSERT(mp->m_owner == NULL);
153 	mp->m_owner = curthread;
154 }
155 
156 int
mutex_tryenter(kmutex_t * mp)157 mutex_tryenter(kmutex_t *mp)
158 {
159 	ASSERT(mp->initialized == B_TRUE);
160 	ASSERT(mp->m_owner != (void *)-1UL);
161 	if (0 == mutex_trylock(&mp->m_lock)) {
162 		ASSERT(mp->m_owner == NULL);
163 		mp->m_owner = curthread;
164 		return (1);
165 	} else {
166 		return (0);
167 	}
168 }
169 
170 void
mutex_exit(kmutex_t * mp)171 mutex_exit(kmutex_t *mp)
172 {
173 	ASSERT(mp->initialized == B_TRUE);
174 	ASSERT(mutex_owner(mp) == curthread);
175 	mp->m_owner = NULL;
176 	VERIFY(mutex_unlock(&mp->m_lock) == 0);
177 }
178 
179 void *
mutex_owner(kmutex_t * mp)180 mutex_owner(kmutex_t *mp)
181 {
182 	ASSERT(mp->initialized == B_TRUE);
183 	return (mp->m_owner);
184 }
185 
186 /*
187  * =========================================================================
188  * rwlocks
189  * =========================================================================
190  */
191 /*ARGSUSED*/
192 void
rw_init(krwlock_t * rwlp,char * name,int type,void * arg)193 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
194 {
195 	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
196 	rwlp->rw_owner = NULL;
197 	rwlp->initialized = B_TRUE;
198 	rwlp->rw_count = 0;
199 }
200 
201 void
rw_destroy(krwlock_t * rwlp)202 rw_destroy(krwlock_t *rwlp)
203 {
204 	ASSERT(rwlp->rw_count == 0);
205 	rwlock_destroy(&rwlp->rw_lock);
206 	rwlp->rw_owner = (void *)-1UL;
207 	rwlp->initialized = B_FALSE;
208 }
209 
210 void
rw_enter(krwlock_t * rwlp,krw_t rw)211 rw_enter(krwlock_t *rwlp, krw_t rw)
212 {
213 	//ASSERT(!RW_LOCK_HELD(rwlp));
214 	ASSERT(rwlp->initialized == B_TRUE);
215 	ASSERT(rwlp->rw_owner != (void *)-1UL);
216 	ASSERT(rwlp->rw_owner != curthread);
217 
218 	if (rw == RW_READER) {
219 		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
220 		ASSERT(rwlp->rw_count >= 0);
221 		atomic_add_int(&rwlp->rw_count, 1);
222 	} else {
223 		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
224 		ASSERT(rwlp->rw_count == 0);
225 		rwlp->rw_count = -1;
226 		rwlp->rw_owner = curthread;
227 	}
228 }
229 
230 void
rw_exit(krwlock_t * rwlp)231 rw_exit(krwlock_t *rwlp)
232 {
233 	ASSERT(rwlp->initialized == B_TRUE);
234 	ASSERT(rwlp->rw_owner != (void *)-1UL);
235 
236 	if (rwlp->rw_owner == curthread) {
237 		/* Write locked. */
238 		ASSERT(rwlp->rw_count == -1);
239 		rwlp->rw_count = 0;
240 		rwlp->rw_owner = NULL;
241 	} else {
242 		/* Read locked. */
243 		ASSERT(rwlp->rw_count > 0);
244 		atomic_add_int(&rwlp->rw_count, -1);
245 	}
246 	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
247 }
248 
249 int
rw_tryenter(krwlock_t * rwlp,krw_t rw)250 rw_tryenter(krwlock_t *rwlp, krw_t rw)
251 {
252 	int rv;
253 
254 	ASSERT(rwlp->initialized == B_TRUE);
255 	ASSERT(rwlp->rw_owner != (void *)-1UL);
256 	ASSERT(rwlp->rw_owner != curthread);
257 
258 	if (rw == RW_READER)
259 		rv = rw_tryrdlock(&rwlp->rw_lock);
260 	else
261 		rv = rw_trywrlock(&rwlp->rw_lock);
262 
263 	if (rv == 0) {
264 		ASSERT(rwlp->rw_owner == NULL);
265 		if (rw == RW_READER) {
266 			ASSERT(rwlp->rw_count >= 0);
267 			atomic_add_int(&rwlp->rw_count, 1);
268 		} else {
269 			ASSERT(rwlp->rw_count == 0);
270 			rwlp->rw_count = -1;
271 			rwlp->rw_owner = curthread;
272 		}
273 		return (1);
274 	}
275 
276 	return (0);
277 }
278 
279 /*ARGSUSED*/
280 int
rw_tryupgrade(krwlock_t * rwlp)281 rw_tryupgrade(krwlock_t *rwlp)
282 {
283 	ASSERT(rwlp->initialized == B_TRUE);
284 	ASSERT(rwlp->rw_owner != (void *)-1UL);
285 
286 	return (0);
287 }
288 
289 int
rw_lock_held(krwlock_t * rwlp)290 rw_lock_held(krwlock_t *rwlp)
291 {
292 
293 	return (rwlp->rw_count != 0);
294 }
295 
296 /*
297  * =========================================================================
298  * condition variables
299  * =========================================================================
300  */
301 /*ARGSUSED*/
302 void
cv_init(kcondvar_t * cv,char * name,int type,void * arg)303 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
304 {
305 	VERIFY(cond_init(cv, name, NULL) == 0);
306 }
307 
308 void
cv_destroy(kcondvar_t * cv)309 cv_destroy(kcondvar_t *cv)
310 {
311 	VERIFY(cond_destroy(cv) == 0);
312 }
313 
314 void
cv_wait(kcondvar_t * cv,kmutex_t * mp)315 cv_wait(kcondvar_t *cv, kmutex_t *mp)
316 {
317 	ASSERT(mutex_owner(mp) == curthread);
318 	mp->m_owner = NULL;
319 	int ret = cond_wait(cv, &mp->m_lock);
320 	VERIFY(ret == 0 || ret == EINTR);
321 	mp->m_owner = curthread;
322 }
323 
324 clock_t
cv_timedwait(kcondvar_t * cv,kmutex_t * mp,clock_t abstime)325 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
326 {
327 	int error;
328 	struct timespec ts;
329 	struct timeval tv;
330 	clock_t delta;
331 
332 	abstime += ddi_get_lbolt();
333 top:
334 	delta = abstime - ddi_get_lbolt();
335 	if (delta <= 0)
336 		return (-1);
337 
338 	if (gettimeofday(&tv, NULL) != 0)
339 		assert(!"gettimeofday() failed");
340 
341 	ts.tv_sec = tv.tv_sec + delta / hz;
342 	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
343 	ASSERT(ts.tv_nsec >= 0);
344 
345 	if (ts.tv_nsec >= NANOSEC) {
346 		ts.tv_sec++;
347 		ts.tv_nsec -= NANOSEC;
348 	}
349 
350 	ASSERT(mutex_owner(mp) == curthread);
351 	mp->m_owner = NULL;
352 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
353 	mp->m_owner = curthread;
354 
355 	if (error == EINTR)
356 		goto top;
357 
358 	if (error == ETIMEDOUT)
359 		return (-1);
360 
361 	ASSERT(error == 0);
362 
363 	return (1);
364 }
365 
366 /*ARGSUSED*/
367 clock_t
cv_timedwait_hires(kcondvar_t * cv,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)368 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
369     int flag)
370 {
371 	int error;
372 	timespec_t ts;
373 	hrtime_t delta;
374 
375 	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
376 
377 top:
378 	delta = tim;
379 	if (flag & CALLOUT_FLAG_ABSOLUTE)
380 		delta -= gethrtime();
381 
382 	if (delta <= 0)
383 		return (-1);
384 
385 	clock_gettime(CLOCK_REALTIME, &ts);
386 	ts.tv_sec += delta / NANOSEC;
387 	ts.tv_nsec += delta % NANOSEC;
388 	if (ts.tv_nsec >= NANOSEC) {
389 		ts.tv_sec++;
390 		ts.tv_nsec -= NANOSEC;
391 	}
392 
393 	ASSERT(mutex_owner(mp) == curthread);
394 	mp->m_owner = NULL;
395 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
396 	mp->m_owner = curthread;
397 
398 	if (error == ETIMEDOUT)
399 		return (-1);
400 
401 	if (error == EINTR)
402 		goto top;
403 
404 	ASSERT(error == 0);
405 
406 	return (1);
407 }
408 
409 void
cv_signal(kcondvar_t * cv)410 cv_signal(kcondvar_t *cv)
411 {
412 	VERIFY(cond_signal(cv) == 0);
413 }
414 
415 void
cv_broadcast(kcondvar_t * cv)416 cv_broadcast(kcondvar_t *cv)
417 {
418 	VERIFY(cond_broadcast(cv) == 0);
419 }
420 
421 /*
422  * =========================================================================
423  * vnode operations
424  * =========================================================================
425  */
426 /*
427  * Note: for the xxxat() versions of these functions, we assume that the
428  * starting vp is always rootdir (which is true for spa_directory.c, the only
429  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
430  * them by adding '/' in front of the path.
431  */
432 
433 /*ARGSUSED*/
434 int
vn_open(char * path,int x1,int flags,int mode,vnode_t ** vpp,int x2,int x3)435 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
436 {
437 	int fd;
438 	int dump_fd;
439 	vnode_t *vp;
440 	int old_umask;
441 	char realpath[MAXPATHLEN];
442 	struct stat64 st;
443 
444 	/*
445 	 * If we're accessing a real disk from userland, we need to use
446 	 * the character interface to avoid caching.  This is particularly
447 	 * important if we're trying to look at a real in-kernel storage
448 	 * pool from userland, e.g. via zdb, because otherwise we won't
449 	 * see the changes occurring under the segmap cache.
450 	 * On the other hand, the stupid character device returns zero
451 	 * for its size.  So -- gag -- we open the block device to get
452 	 * its size, and remember it for subsequent VOP_GETATTR().
453 	 */
454 	if (strncmp(path, "/dev/", 5) == 0) {
455 		char *dsk;
456 		fd = open64(path, O_RDONLY);
457 		if (fd == -1)
458 			return (errno);
459 		if (fstat64(fd, &st) == -1) {
460 			close(fd);
461 			return (errno);
462 		}
463 		close(fd);
464 		(void) sprintf(realpath, "%s", path);
465 		dsk = strstr(path, "/dsk/");
466 		if (dsk != NULL)
467 			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
468 			    dsk + 1);
469 	} else {
470 		(void) sprintf(realpath, "%s", path);
471 		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
472 			return (errno);
473 	}
474 
475 	if (flags & FCREAT)
476 		old_umask = umask(0);
477 
478 	/*
479 	 * The construct 'flags - FREAD' conveniently maps combinations of
480 	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
481 	 */
482 	fd = open64(realpath, flags - FREAD, mode);
483 
484 	if (flags & FCREAT)
485 		(void) umask(old_umask);
486 
487 	if (vn_dumpdir != NULL) {
488 		char dumppath[MAXPATHLEN];
489 		(void) snprintf(dumppath, sizeof (dumppath),
490 		    "%s/%s", vn_dumpdir, basename(realpath));
491 		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
492 		if (dump_fd == -1)
493 			return (errno);
494 	} else {
495 		dump_fd = -1;
496 	}
497 
498 	if (fd == -1)
499 		return (errno);
500 
501 	if (fstat64(fd, &st) == -1) {
502 		close(fd);
503 		return (errno);
504 	}
505 
506 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
507 
508 	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
509 
510 	vp->v_fd = fd;
511 	vp->v_size = st.st_size;
512 	vp->v_path = spa_strdup(path);
513 	vp->v_dump_fd = dump_fd;
514 
515 	return (0);
516 }
517 
518 /*ARGSUSED*/
519 int
vn_openat(char * path,int x1,int flags,int mode,vnode_t ** vpp,int x2,int x3,vnode_t * startvp,int fd)520 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
521     int x3, vnode_t *startvp, int fd)
522 {
523 	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
524 	int ret;
525 
526 	ASSERT(startvp == rootdir);
527 	(void) sprintf(realpath, "/%s", path);
528 
529 	/* fd ignored for now, need if want to simulate nbmand support */
530 	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
531 
532 	umem_free(realpath, strlen(path) + 2);
533 
534 	return (ret);
535 }
536 
537 /*ARGSUSED*/
538 int
vn_rdwr(int uio,vnode_t * vp,void * addr,ssize_t len,offset_t offset,int x1,int x2,rlim64_t x3,void * x4,ssize_t * residp)539 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
540     int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
541 {
542 	ssize_t iolen, split;
543 
544 	if (uio == UIO_READ) {
545 		iolen = pread64(vp->v_fd, addr, len, offset);
546 		if (vp->v_dump_fd != -1) {
547 			int status =
548 			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
549 			ASSERT(status != -1);
550 		}
551 	} else {
552 		/*
553 		 * To simulate partial disk writes, we split writes into two
554 		 * system calls so that the process can be killed in between.
555 		 */
556 		int sectors = len >> SPA_MINBLOCKSHIFT;
557 		split = (sectors > 0 ? rand() % sectors : 0) <<
558 		    SPA_MINBLOCKSHIFT;
559 		iolen = pwrite64(vp->v_fd, addr, split, offset);
560 		iolen += pwrite64(vp->v_fd, (char *)addr + split,
561 		    len - split, offset + split);
562 	}
563 
564 	if (iolen == -1)
565 		return (errno);
566 	if (residp)
567 		*residp = len - iolen;
568 	else if (iolen != len)
569 		return (EIO);
570 	return (0);
571 }
572 
573 void
vn_close(vnode_t * vp,int openflag,cred_t * cr,kthread_t * td)574 vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
575 {
576 	close(vp->v_fd);
577 	if (vp->v_dump_fd != -1)
578 		close(vp->v_dump_fd);
579 	spa_strfree(vp->v_path);
580 	umem_free(vp, sizeof (vnode_t));
581 }
582 
583 /*
584  * At a minimum we need to update the size since vdev_reopen()
585  * will no longer call vn_openat().
586  */
587 int
fop_getattr(vnode_t * vp,vattr_t * vap)588 fop_getattr(vnode_t *vp, vattr_t *vap)
589 {
590 	struct stat64 st;
591 
592 	if (fstat64(vp->v_fd, &st) == -1) {
593 		close(vp->v_fd);
594 		return (errno);
595 	}
596 
597 	vap->va_size = st.st_size;
598 	return (0);
599 }
600 
601 #ifdef ZFS_DEBUG
602 
603 /*
604  * =========================================================================
605  * Figure out which debugging statements to print
606  * =========================================================================
607  */
608 
609 static char *dprintf_string;
610 static int dprintf_print_all;
611 
612 int
dprintf_find_string(const char * string)613 dprintf_find_string(const char *string)
614 {
615 	char *tmp_str = dprintf_string;
616 	int len = strlen(string);
617 
618 	/*
619 	 * Find out if this is a string we want to print.
620 	 * String format: file1.c,function_name1,file2.c,file3.c
621 	 */
622 
623 	while (tmp_str != NULL) {
624 		if (strncmp(tmp_str, string, len) == 0 &&
625 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
626 			return (1);
627 		tmp_str = strchr(tmp_str, ',');
628 		if (tmp_str != NULL)
629 			tmp_str++; /* Get rid of , */
630 	}
631 	return (0);
632 }
633 
634 void
dprintf_setup(int * argc,char ** argv)635 dprintf_setup(int *argc, char **argv)
636 {
637 	int i, j;
638 
639 	/*
640 	 * Debugging can be specified two ways: by setting the
641 	 * environment variable ZFS_DEBUG, or by including a
642 	 * "debug=..."  argument on the command line.  The command
643 	 * line setting overrides the environment variable.
644 	 */
645 
646 	for (i = 1; i < *argc; i++) {
647 		int len = strlen("debug=");
648 		/* First look for a command line argument */
649 		if (strncmp("debug=", argv[i], len) == 0) {
650 			dprintf_string = argv[i] + len;
651 			/* Remove from args */
652 			for (j = i; j < *argc; j++)
653 				argv[j] = argv[j+1];
654 			argv[j] = NULL;
655 			(*argc)--;
656 		}
657 	}
658 
659 	if (dprintf_string == NULL) {
660 		/* Look for ZFS_DEBUG environment variable */
661 		dprintf_string = getenv("ZFS_DEBUG");
662 	}
663 
664 	/*
665 	 * Are we just turning on all debugging?
666 	 */
667 	if (dprintf_find_string("on"))
668 		dprintf_print_all = 1;
669 
670 	if (dprintf_string != NULL)
671 		zfs_flags |= ZFS_DEBUG_DPRINTF;
672 }
673 
674 int
sysctl_handle_64(SYSCTL_HANDLER_ARGS)675 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
676 {
677 	return (0);
678 }
679 
680 /*
681  * =========================================================================
682  * debug printfs
683  * =========================================================================
684  */
685 void
__dprintf(const char * file,const char * func,int line,const char * fmt,...)686 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
687 {
688 	const char *newfile;
689 	va_list adx;
690 
691 	/*
692 	 * Get rid of annoying "../common/" prefix to filename.
693 	 */
694 	newfile = strrchr(file, '/');
695 	if (newfile != NULL) {
696 		newfile = newfile + 1; /* Get rid of leading / */
697 	} else {
698 		newfile = file;
699 	}
700 
701 	if (dprintf_print_all ||
702 	    dprintf_find_string(newfile) ||
703 	    dprintf_find_string(func)) {
704 		/* Print out just the function name if requested */
705 		flockfile(stdout);
706 		if (dprintf_find_string("pid"))
707 			(void) printf("%d ", getpid());
708 		if (dprintf_find_string("tid"))
709 			(void) printf("%lu ", thr_self());
710 #if 0
711 		if (dprintf_find_string("cpu"))
712 			(void) printf("%u ", getcpuid());
713 #endif
714 		if (dprintf_find_string("time"))
715 			(void) printf("%llu ", gethrtime());
716 		if (dprintf_find_string("long"))
717 			(void) printf("%s, line %d: ", newfile, line);
718 		(void) printf("%s: ", func);
719 		va_start(adx, fmt);
720 		(void) vprintf(fmt, adx);
721 		va_end(adx);
722 		funlockfile(stdout);
723 	}
724 }
725 
726 #endif /* ZFS_DEBUG */
727 
728 /*
729  * =========================================================================
730  * cmn_err() and panic()
731  * =========================================================================
732  */
733 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
734 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
735 
736 void
vpanic(const char * fmt,va_list adx)737 vpanic(const char *fmt, va_list adx)
738 {
739 	char buf[512];
740 	(void) vsnprintf(buf, 512, fmt, adx);
741 	assfail(buf, NULL, 0);
742 	abort(); /* necessary to make vpanic meet noreturn requirements */
743 }
744 
745 void
panic(const char * fmt,...)746 panic(const char *fmt, ...)
747 {
748 	va_list adx;
749 
750 	va_start(adx, fmt);
751 	vpanic(fmt, adx);
752 	va_end(adx);
753 }
754 
755 void
vcmn_err(int ce,const char * fmt,va_list adx)756 vcmn_err(int ce, const char *fmt, va_list adx)
757 {
758 	if (ce == CE_PANIC)
759 		vpanic(fmt, adx);
760 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
761 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
762 		(void) vfprintf(stderr, fmt, adx);
763 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
764 	}
765 }
766 
767 /*PRINTFLIKE2*/
768 void
cmn_err(int ce,const char * fmt,...)769 cmn_err(int ce, const char *fmt, ...)
770 {
771 	va_list adx;
772 
773 	va_start(adx, fmt);
774 	vcmn_err(ce, fmt, adx);
775 	va_end(adx);
776 }
777 
778 /*
779  * =========================================================================
780  * kobj interfaces
781  * =========================================================================
782  */
783 struct _buf *
kobj_open_file(char * name)784 kobj_open_file(char *name)
785 {
786 	struct _buf *file;
787 	vnode_t *vp;
788 
789 	/* set vp as the _fd field of the file */
790 	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
791 	    -1) != 0)
792 		return ((void *)-1UL);
793 
794 	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
795 	file->_fd = (intptr_t)vp;
796 	return (file);
797 }
798 
799 int
kobj_read_file(struct _buf * file,char * buf,unsigned size,unsigned off)800 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
801 {
802 	ssize_t resid;
803 
804 	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
805 	    UIO_SYSSPACE, 0, 0, 0, &resid);
806 
807 	return (size - resid);
808 }
809 
810 void
kobj_close_file(struct _buf * file)811 kobj_close_file(struct _buf *file)
812 {
813 	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
814 	umem_free(file, sizeof (struct _buf));
815 }
816 
817 int
kobj_get_filesize(struct _buf * file,uint64_t * size)818 kobj_get_filesize(struct _buf *file, uint64_t *size)
819 {
820 	struct stat64 st;
821 	vnode_t *vp = (vnode_t *)file->_fd;
822 
823 	if (fstat64(vp->v_fd, &st) == -1) {
824 		vn_close(vp, 0, NULL, NULL);
825 		return (errno);
826 	}
827 	*size = st.st_size;
828 	return (0);
829 }
830 
831 /*
832  * =========================================================================
833  * misc routines
834  * =========================================================================
835  */
836 
837 void
delay(clock_t ticks)838 delay(clock_t ticks)
839 {
840 	poll(0, 0, ticks * (1000 / hz));
841 }
842 
843 #if 0
844 /*
845  * Find highest one bit set.
846  *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
847  */
848 int
849 highbit64(uint64_t i)
850 {
851 	int h = 1;
852 
853 	if (i == 0)
854 		return (0);
855 	if (i & 0xffffffff00000000ULL) {
856 		h += 32; i >>= 32;
857 	}
858 	if (i & 0xffff0000) {
859 		h += 16; i >>= 16;
860 	}
861 	if (i & 0xff00) {
862 		h += 8; i >>= 8;
863 	}
864 	if (i & 0xf0) {
865 		h += 4; i >>= 4;
866 	}
867 	if (i & 0xc) {
868 		h += 2; i >>= 2;
869 	}
870 	if (i & 0x2) {
871 		h += 1;
872 	}
873 	return (h);
874 }
875 #endif
876 
877 static int random_fd = -1, urandom_fd = -1;
878 
879 static int
random_get_bytes_common(uint8_t * ptr,size_t len,int fd)880 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
881 {
882 	size_t resid = len;
883 	ssize_t bytes;
884 
885 	ASSERT(fd != -1);
886 
887 	while (resid != 0) {
888 		bytes = read(fd, ptr, resid);
889 		ASSERT3S(bytes, >=, 0);
890 		ptr += bytes;
891 		resid -= bytes;
892 	}
893 
894 	return (0);
895 }
896 
897 int
random_get_bytes(uint8_t * ptr,size_t len)898 random_get_bytes(uint8_t *ptr, size_t len)
899 {
900 	return (random_get_bytes_common(ptr, len, random_fd));
901 }
902 
903 int
random_get_pseudo_bytes(uint8_t * ptr,size_t len)904 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
905 {
906 	return (random_get_bytes_common(ptr, len, urandom_fd));
907 }
908 
909 int
ddi_strtoul(const char * hw_serial,char ** nptr,int base,unsigned long * result)910 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
911 {
912 	char *end;
913 
914 	*result = strtoul(hw_serial, &end, base);
915 	if (*result == 0)
916 		return (errno);
917 	return (0);
918 }
919 
920 int
ddi_strtoull(const char * str,char ** nptr,int base,u_longlong_t * result)921 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
922 {
923 	char *end;
924 
925 	*result = strtoull(str, &end, base);
926 	if (*result == 0)
927 		return (errno);
928 	return (0);
929 }
930 
931 #ifdef illumos
932 /* ARGSUSED */
933 cyclic_id_t
cyclic_add(cyc_handler_t * hdlr,cyc_time_t * when)934 cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
935 {
936 	return (1);
937 }
938 
939 /* ARGSUSED */
940 void
cyclic_remove(cyclic_id_t id)941 cyclic_remove(cyclic_id_t id)
942 {
943 }
944 
945 /* ARGSUSED */
946 int
cyclic_reprogram(cyclic_id_t id,hrtime_t expiration)947 cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
948 {
949 	return (1);
950 }
951 #endif
952 
953 /*
954  * =========================================================================
955  * kernel emulation setup & teardown
956  * =========================================================================
957  */
958 static int
umem_out_of_memory(void)959 umem_out_of_memory(void)
960 {
961 	char errmsg[] = "out of memory -- generating core dump\n";
962 
963 	write(fileno(stderr), errmsg, sizeof (errmsg));
964 	abort();
965 	return (0);
966 }
967 
968 void
kernel_init(int mode)969 kernel_init(int mode)
970 {
971 	extern uint_t rrw_tsd_key;
972 
973 	umem_nofail_callback(umem_out_of_memory);
974 
975 	physmem = sysconf(_SC_PHYS_PAGES);
976 
977 	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
978 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
979 
980 	(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
981 	    (mode & FWRITE) ? (unsigned long)gethostid() : 0);
982 
983 	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
984 	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
985 
986 	system_taskq_init();
987 
988 #ifdef illumos
989 	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
990 #endif
991 
992 	spa_init(mode);
993 
994 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
995 }
996 
997 void
kernel_fini(void)998 kernel_fini(void)
999 {
1000 	spa_fini();
1001 
1002 	system_taskq_fini();
1003 
1004 	close(random_fd);
1005 	close(urandom_fd);
1006 
1007 	random_fd = -1;
1008 	urandom_fd = -1;
1009 }
1010 
1011 /* ARGSUSED */
1012 uint32_t
zone_get_hostid(void * zonep)1013 zone_get_hostid(void *zonep)
1014 {
1015 	/*
1016 	 * We're emulating the system's hostid in userland.
1017 	 */
1018 	return (strtoul(hw_serial, NULL, 10));
1019 }
1020 
1021 int
z_uncompress(void * dst,size_t * dstlen,const void * src,size_t srclen)1022 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
1023 {
1024 	int ret;
1025 	uLongf len = *dstlen;
1026 
1027 	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
1028 		*dstlen = (size_t)len;
1029 
1030 	return (ret);
1031 }
1032 
1033 int
z_compress_level(void * dst,size_t * dstlen,const void * src,size_t srclen,int level)1034 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
1035     int level)
1036 {
1037 	int ret;
1038 	uLongf len = *dstlen;
1039 
1040 	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
1041 		*dstlen = (size_t)len;
1042 
1043 	return (ret);
1044 }
1045 
1046 uid_t
crgetuid(cred_t * cr)1047 crgetuid(cred_t *cr)
1048 {
1049 	return (0);
1050 }
1051 
1052 uid_t
crgetruid(cred_t * cr)1053 crgetruid(cred_t *cr)
1054 {
1055 	return (0);
1056 }
1057 
1058 gid_t
crgetgid(cred_t * cr)1059 crgetgid(cred_t *cr)
1060 {
1061 	return (0);
1062 }
1063 
1064 int
crgetngroups(cred_t * cr)1065 crgetngroups(cred_t *cr)
1066 {
1067 	return (0);
1068 }
1069 
1070 gid_t *
crgetgroups(cred_t * cr)1071 crgetgroups(cred_t *cr)
1072 {
1073 	return (NULL);
1074 }
1075 
1076 int
zfs_secpolicy_snapshot_perms(const char * name,cred_t * cr)1077 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1078 {
1079 	return (0);
1080 }
1081 
1082 int
zfs_secpolicy_rename_perms(const char * from,const char * to,cred_t * cr)1083 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1084 {
1085 	return (0);
1086 }
1087 
1088 int
zfs_secpolicy_destroy_perms(const char * name,cred_t * cr)1089 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1090 {
1091 	return (0);
1092 }
1093 
1094 ksiddomain_t *
ksid_lookupdomain(const char * dom)1095 ksid_lookupdomain(const char *dom)
1096 {
1097 	ksiddomain_t *kd;
1098 
1099 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1100 	kd->kd_name = spa_strdup(dom);
1101 	return (kd);
1102 }
1103 
1104 void
ksiddomain_rele(ksiddomain_t * ksid)1105 ksiddomain_rele(ksiddomain_t *ksid)
1106 {
1107 	spa_strfree(ksid->kd_name);
1108 	umem_free(ksid, sizeof (ksiddomain_t));
1109 }
1110 
1111 /*
1112  * Do not change the length of the returned string; it must be freed
1113  * with strfree().
1114  */
1115 char *
kmem_asprintf(const char * fmt,...)1116 kmem_asprintf(const char *fmt, ...)
1117 {
1118 	int size;
1119 	va_list adx;
1120 	char *buf;
1121 
1122 	va_start(adx, fmt);
1123 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1124 	va_end(adx);
1125 
1126 	buf = kmem_alloc(size, KM_SLEEP);
1127 
1128 	va_start(adx, fmt);
1129 	size = vsnprintf(buf, size, fmt, adx);
1130 	va_end(adx);
1131 
1132 	return (buf);
1133 }
1134 
1135 /* ARGSUSED */
1136 int
zfs_onexit_fd_hold(int fd,minor_t * minorp)1137 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1138 {
1139 	*minorp = 0;
1140 	return (0);
1141 }
1142 
1143 /* ARGSUSED */
1144 void
zfs_onexit_fd_rele(int fd)1145 zfs_onexit_fd_rele(int fd)
1146 {
1147 }
1148 
1149 /* ARGSUSED */
1150 int
zfs_onexit_add_cb(minor_t minor,void (* func)(void *),void * data,uint64_t * action_handle)1151 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1152     uint64_t *action_handle)
1153 {
1154 	return (0);
1155 }
1156 
1157 /* ARGSUSED */
1158 int
zfs_onexit_del_cb(minor_t minor,uint64_t action_handle,boolean_t fire)1159 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1160 {
1161 	return (0);
1162 }
1163 
1164 /* ARGSUSED */
1165 int
zfs_onexit_cb_data(minor_t minor,uint64_t action_handle,void ** data)1166 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1167 {
1168 	return (0);
1169 }
1170 
1171 #ifdef __FreeBSD__
1172 /* ARGSUSED */
1173 int
zvol_create_minors(const char * name)1174 zvol_create_minors(const char *name)
1175 {
1176 	return (0);
1177 }
1178 #endif
1179 
1180 #ifdef illumos
1181 void
bioinit(buf_t * bp)1182 bioinit(buf_t *bp)
1183 {
1184 	bzero(bp, sizeof (buf_t));
1185 }
1186 
1187 void
biodone(buf_t * bp)1188 biodone(buf_t *bp)
1189 {
1190 	if (bp->b_iodone != NULL) {
1191 		(*(bp->b_iodone))(bp);
1192 		return;
1193 	}
1194 	ASSERT((bp->b_flags & B_DONE) == 0);
1195 	bp->b_flags |= B_DONE;
1196 }
1197 
1198 void
bioerror(buf_t * bp,int error)1199 bioerror(buf_t *bp, int error)
1200 {
1201 	ASSERT(bp != NULL);
1202 	ASSERT(error >= 0);
1203 
1204 	if (error != 0) {
1205 		bp->b_flags |= B_ERROR;
1206 	} else {
1207 		bp->b_flags &= ~B_ERROR;
1208 	}
1209 	bp->b_error = error;
1210 }
1211 
1212 
1213 int
geterror(struct buf * bp)1214 geterror(struct buf *bp)
1215 {
1216 	int error = 0;
1217 
1218 	if (bp->b_flags & B_ERROR) {
1219 		error = bp->b_error;
1220 		if (!error)
1221 			error = EIO;
1222 	}
1223 	return (error);
1224 }
1225 #endif
1226