1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 */
26
27 #include <assert.h>
28 #include <fcntl.h>
29 #include <poll.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <zlib.h>
34 #include <libgen.h>
35 #include <sys/assfail.h>
36 #include <sys/spa.h>
37 #include <sys/stat.h>
38 #include <sys/processor.h>
39 #include <sys/zfs_context.h>
40 #include <sys/rrwlock.h>
41 #include <sys/zmod.h>
42 #include <sys/utsname.h>
43 #include <sys/systeminfo.h>
44
45 /*
46 * Emulation of kernel services in userland.
47 */
48
49 #ifndef __FreeBSD__
50 int aok;
51 #endif
52 uint64_t physmem;
53 vnode_t *rootdir = (vnode_t *)0xabcd1234;
54 char hw_serial[HW_HOSTID_LEN];
55 #ifdef illumos
56 kmutex_t cpu_lock;
57 #endif
58
59 /* If set, all blocks read will be copied to the specified directory. */
60 char *vn_dumpdir = NULL;
61
62 struct utsname utsname = {
63 "userland", "libzpool", "1", "1", "na"
64 };
65
66 /* this only exists to have its address taken */
67 struct proc p0;
68
69 /*
70 * =========================================================================
71 * threads
72 * =========================================================================
73 */
74 /*ARGSUSED*/
75 kthread_t *
zk_thread_create(void (* func)(),void * arg)76 zk_thread_create(void (*func)(), void *arg)
77 {
78 thread_t tid;
79
80 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
81 &tid) == 0);
82
83 return ((void *)(uintptr_t)tid);
84 }
85
86 /*
87 * =========================================================================
88 * kstats
89 * =========================================================================
90 */
91 /*ARGSUSED*/
92 kstat_t *
kstat_create(char * module,int instance,char * name,char * class,uchar_t type,ulong_t ndata,uchar_t ks_flag)93 kstat_create(char *module, int instance, char *name, char *class,
94 uchar_t type, ulong_t ndata, uchar_t ks_flag)
95 {
96 return (NULL);
97 }
98
99 /*ARGSUSED*/
100 void
kstat_named_init(kstat_named_t * knp,const char * name,uchar_t type)101 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
102 {}
103
104 /*ARGSUSED*/
105 void
kstat_install(kstat_t * ksp)106 kstat_install(kstat_t *ksp)
107 {}
108
109 /*ARGSUSED*/
110 void
kstat_delete(kstat_t * ksp)111 kstat_delete(kstat_t *ksp)
112 {}
113
114 /*
115 * =========================================================================
116 * mutexes
117 * =========================================================================
118 */
119 void
zmutex_init(kmutex_t * mp)120 zmutex_init(kmutex_t *mp)
121 {
122 mp->m_owner = NULL;
123 mp->initialized = B_TRUE;
124 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
125 }
126
127 void
zmutex_destroy(kmutex_t * mp)128 zmutex_destroy(kmutex_t *mp)
129 {
130 ASSERT(mp->initialized == B_TRUE);
131 ASSERT(mp->m_owner == NULL);
132 (void) _mutex_destroy(&(mp)->m_lock);
133 mp->m_owner = (void *)-1UL;
134 mp->initialized = B_FALSE;
135 }
136
137 int
zmutex_owned(kmutex_t * mp)138 zmutex_owned(kmutex_t *mp)
139 {
140 ASSERT(mp->initialized == B_TRUE);
141
142 return (mp->m_owner == curthread);
143 }
144
145 void
mutex_enter(kmutex_t * mp)146 mutex_enter(kmutex_t *mp)
147 {
148 ASSERT(mp->initialized == B_TRUE);
149 ASSERT(mp->m_owner != (void *)-1UL);
150 ASSERT(mp->m_owner != curthread);
151 VERIFY(mutex_lock(&mp->m_lock) == 0);
152 ASSERT(mp->m_owner == NULL);
153 mp->m_owner = curthread;
154 }
155
156 int
mutex_tryenter(kmutex_t * mp)157 mutex_tryenter(kmutex_t *mp)
158 {
159 ASSERT(mp->initialized == B_TRUE);
160 ASSERT(mp->m_owner != (void *)-1UL);
161 if (0 == mutex_trylock(&mp->m_lock)) {
162 ASSERT(mp->m_owner == NULL);
163 mp->m_owner = curthread;
164 return (1);
165 } else {
166 return (0);
167 }
168 }
169
170 void
mutex_exit(kmutex_t * mp)171 mutex_exit(kmutex_t *mp)
172 {
173 ASSERT(mp->initialized == B_TRUE);
174 ASSERT(mutex_owner(mp) == curthread);
175 mp->m_owner = NULL;
176 VERIFY(mutex_unlock(&mp->m_lock) == 0);
177 }
178
179 void *
mutex_owner(kmutex_t * mp)180 mutex_owner(kmutex_t *mp)
181 {
182 ASSERT(mp->initialized == B_TRUE);
183 return (mp->m_owner);
184 }
185
186 /*
187 * =========================================================================
188 * rwlocks
189 * =========================================================================
190 */
191 /*ARGSUSED*/
192 void
rw_init(krwlock_t * rwlp,char * name,int type,void * arg)193 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
194 {
195 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
196 rwlp->rw_owner = NULL;
197 rwlp->initialized = B_TRUE;
198 rwlp->rw_count = 0;
199 }
200
201 void
rw_destroy(krwlock_t * rwlp)202 rw_destroy(krwlock_t *rwlp)
203 {
204 ASSERT(rwlp->rw_count == 0);
205 rwlock_destroy(&rwlp->rw_lock);
206 rwlp->rw_owner = (void *)-1UL;
207 rwlp->initialized = B_FALSE;
208 }
209
210 void
rw_enter(krwlock_t * rwlp,krw_t rw)211 rw_enter(krwlock_t *rwlp, krw_t rw)
212 {
213 //ASSERT(!RW_LOCK_HELD(rwlp));
214 ASSERT(rwlp->initialized == B_TRUE);
215 ASSERT(rwlp->rw_owner != (void *)-1UL);
216 ASSERT(rwlp->rw_owner != curthread);
217
218 if (rw == RW_READER) {
219 VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
220 ASSERT(rwlp->rw_count >= 0);
221 atomic_add_int(&rwlp->rw_count, 1);
222 } else {
223 VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
224 ASSERT(rwlp->rw_count == 0);
225 rwlp->rw_count = -1;
226 rwlp->rw_owner = curthread;
227 }
228 }
229
230 void
rw_exit(krwlock_t * rwlp)231 rw_exit(krwlock_t *rwlp)
232 {
233 ASSERT(rwlp->initialized == B_TRUE);
234 ASSERT(rwlp->rw_owner != (void *)-1UL);
235
236 if (rwlp->rw_owner == curthread) {
237 /* Write locked. */
238 ASSERT(rwlp->rw_count == -1);
239 rwlp->rw_count = 0;
240 rwlp->rw_owner = NULL;
241 } else {
242 /* Read locked. */
243 ASSERT(rwlp->rw_count > 0);
244 atomic_add_int(&rwlp->rw_count, -1);
245 }
246 VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
247 }
248
249 int
rw_tryenter(krwlock_t * rwlp,krw_t rw)250 rw_tryenter(krwlock_t *rwlp, krw_t rw)
251 {
252 int rv;
253
254 ASSERT(rwlp->initialized == B_TRUE);
255 ASSERT(rwlp->rw_owner != (void *)-1UL);
256 ASSERT(rwlp->rw_owner != curthread);
257
258 if (rw == RW_READER)
259 rv = rw_tryrdlock(&rwlp->rw_lock);
260 else
261 rv = rw_trywrlock(&rwlp->rw_lock);
262
263 if (rv == 0) {
264 ASSERT(rwlp->rw_owner == NULL);
265 if (rw == RW_READER) {
266 ASSERT(rwlp->rw_count >= 0);
267 atomic_add_int(&rwlp->rw_count, 1);
268 } else {
269 ASSERT(rwlp->rw_count == 0);
270 rwlp->rw_count = -1;
271 rwlp->rw_owner = curthread;
272 }
273 return (1);
274 }
275
276 return (0);
277 }
278
279 /*ARGSUSED*/
280 int
rw_tryupgrade(krwlock_t * rwlp)281 rw_tryupgrade(krwlock_t *rwlp)
282 {
283 ASSERT(rwlp->initialized == B_TRUE);
284 ASSERT(rwlp->rw_owner != (void *)-1UL);
285
286 return (0);
287 }
288
289 int
rw_lock_held(krwlock_t * rwlp)290 rw_lock_held(krwlock_t *rwlp)
291 {
292
293 return (rwlp->rw_count != 0);
294 }
295
296 /*
297 * =========================================================================
298 * condition variables
299 * =========================================================================
300 */
301 /*ARGSUSED*/
302 void
cv_init(kcondvar_t * cv,char * name,int type,void * arg)303 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
304 {
305 VERIFY(cond_init(cv, name, NULL) == 0);
306 }
307
308 void
cv_destroy(kcondvar_t * cv)309 cv_destroy(kcondvar_t *cv)
310 {
311 VERIFY(cond_destroy(cv) == 0);
312 }
313
314 void
cv_wait(kcondvar_t * cv,kmutex_t * mp)315 cv_wait(kcondvar_t *cv, kmutex_t *mp)
316 {
317 ASSERT(mutex_owner(mp) == curthread);
318 mp->m_owner = NULL;
319 int ret = cond_wait(cv, &mp->m_lock);
320 VERIFY(ret == 0 || ret == EINTR);
321 mp->m_owner = curthread;
322 }
323
324 clock_t
cv_timedwait(kcondvar_t * cv,kmutex_t * mp,clock_t abstime)325 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
326 {
327 int error;
328 struct timespec ts;
329 struct timeval tv;
330 clock_t delta;
331
332 abstime += ddi_get_lbolt();
333 top:
334 delta = abstime - ddi_get_lbolt();
335 if (delta <= 0)
336 return (-1);
337
338 if (gettimeofday(&tv, NULL) != 0)
339 assert(!"gettimeofday() failed");
340
341 ts.tv_sec = tv.tv_sec + delta / hz;
342 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
343 ASSERT(ts.tv_nsec >= 0);
344
345 if (ts.tv_nsec >= NANOSEC) {
346 ts.tv_sec++;
347 ts.tv_nsec -= NANOSEC;
348 }
349
350 ASSERT(mutex_owner(mp) == curthread);
351 mp->m_owner = NULL;
352 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
353 mp->m_owner = curthread;
354
355 if (error == EINTR)
356 goto top;
357
358 if (error == ETIMEDOUT)
359 return (-1);
360
361 ASSERT(error == 0);
362
363 return (1);
364 }
365
366 /*ARGSUSED*/
367 clock_t
cv_timedwait_hires(kcondvar_t * cv,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)368 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
369 int flag)
370 {
371 int error;
372 timespec_t ts;
373 hrtime_t delta;
374
375 ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
376
377 top:
378 delta = tim;
379 if (flag & CALLOUT_FLAG_ABSOLUTE)
380 delta -= gethrtime();
381
382 if (delta <= 0)
383 return (-1);
384
385 clock_gettime(CLOCK_REALTIME, &ts);
386 ts.tv_sec += delta / NANOSEC;
387 ts.tv_nsec += delta % NANOSEC;
388 if (ts.tv_nsec >= NANOSEC) {
389 ts.tv_sec++;
390 ts.tv_nsec -= NANOSEC;
391 }
392
393 ASSERT(mutex_owner(mp) == curthread);
394 mp->m_owner = NULL;
395 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
396 mp->m_owner = curthread;
397
398 if (error == ETIMEDOUT)
399 return (-1);
400
401 if (error == EINTR)
402 goto top;
403
404 ASSERT(error == 0);
405
406 return (1);
407 }
408
409 void
cv_signal(kcondvar_t * cv)410 cv_signal(kcondvar_t *cv)
411 {
412 VERIFY(cond_signal(cv) == 0);
413 }
414
415 void
cv_broadcast(kcondvar_t * cv)416 cv_broadcast(kcondvar_t *cv)
417 {
418 VERIFY(cond_broadcast(cv) == 0);
419 }
420
421 /*
422 * =========================================================================
423 * vnode operations
424 * =========================================================================
425 */
426 /*
427 * Note: for the xxxat() versions of these functions, we assume that the
428 * starting vp is always rootdir (which is true for spa_directory.c, the only
429 * ZFS consumer of these interfaces). We assert this is true, and then emulate
430 * them by adding '/' in front of the path.
431 */
432
433 /*ARGSUSED*/
434 int
vn_open(char * path,int x1,int flags,int mode,vnode_t ** vpp,int x2,int x3)435 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
436 {
437 int fd;
438 int dump_fd;
439 vnode_t *vp;
440 int old_umask;
441 char realpath[MAXPATHLEN];
442 struct stat64 st;
443
444 /*
445 * If we're accessing a real disk from userland, we need to use
446 * the character interface to avoid caching. This is particularly
447 * important if we're trying to look at a real in-kernel storage
448 * pool from userland, e.g. via zdb, because otherwise we won't
449 * see the changes occurring under the segmap cache.
450 * On the other hand, the stupid character device returns zero
451 * for its size. So -- gag -- we open the block device to get
452 * its size, and remember it for subsequent VOP_GETATTR().
453 */
454 if (strncmp(path, "/dev/", 5) == 0) {
455 char *dsk;
456 fd = open64(path, O_RDONLY);
457 if (fd == -1)
458 return (errno);
459 if (fstat64(fd, &st) == -1) {
460 close(fd);
461 return (errno);
462 }
463 close(fd);
464 (void) sprintf(realpath, "%s", path);
465 dsk = strstr(path, "/dsk/");
466 if (dsk != NULL)
467 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
468 dsk + 1);
469 } else {
470 (void) sprintf(realpath, "%s", path);
471 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
472 return (errno);
473 }
474
475 if (flags & FCREAT)
476 old_umask = umask(0);
477
478 /*
479 * The construct 'flags - FREAD' conveniently maps combinations of
480 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
481 */
482 fd = open64(realpath, flags - FREAD, mode);
483
484 if (flags & FCREAT)
485 (void) umask(old_umask);
486
487 if (vn_dumpdir != NULL) {
488 char dumppath[MAXPATHLEN];
489 (void) snprintf(dumppath, sizeof (dumppath),
490 "%s/%s", vn_dumpdir, basename(realpath));
491 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
492 if (dump_fd == -1)
493 return (errno);
494 } else {
495 dump_fd = -1;
496 }
497
498 if (fd == -1)
499 return (errno);
500
501 if (fstat64(fd, &st) == -1) {
502 close(fd);
503 return (errno);
504 }
505
506 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
507
508 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
509
510 vp->v_fd = fd;
511 vp->v_size = st.st_size;
512 vp->v_path = spa_strdup(path);
513 vp->v_dump_fd = dump_fd;
514
515 return (0);
516 }
517
518 /*ARGSUSED*/
519 int
vn_openat(char * path,int x1,int flags,int mode,vnode_t ** vpp,int x2,int x3,vnode_t * startvp,int fd)520 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
521 int x3, vnode_t *startvp, int fd)
522 {
523 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
524 int ret;
525
526 ASSERT(startvp == rootdir);
527 (void) sprintf(realpath, "/%s", path);
528
529 /* fd ignored for now, need if want to simulate nbmand support */
530 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
531
532 umem_free(realpath, strlen(path) + 2);
533
534 return (ret);
535 }
536
537 /*ARGSUSED*/
538 int
vn_rdwr(int uio,vnode_t * vp,void * addr,ssize_t len,offset_t offset,int x1,int x2,rlim64_t x3,void * x4,ssize_t * residp)539 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
540 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
541 {
542 ssize_t iolen, split;
543
544 if (uio == UIO_READ) {
545 iolen = pread64(vp->v_fd, addr, len, offset);
546 if (vp->v_dump_fd != -1) {
547 int status =
548 pwrite64(vp->v_dump_fd, addr, iolen, offset);
549 ASSERT(status != -1);
550 }
551 } else {
552 /*
553 * To simulate partial disk writes, we split writes into two
554 * system calls so that the process can be killed in between.
555 */
556 int sectors = len >> SPA_MINBLOCKSHIFT;
557 split = (sectors > 0 ? rand() % sectors : 0) <<
558 SPA_MINBLOCKSHIFT;
559 iolen = pwrite64(vp->v_fd, addr, split, offset);
560 iolen += pwrite64(vp->v_fd, (char *)addr + split,
561 len - split, offset + split);
562 }
563
564 if (iolen == -1)
565 return (errno);
566 if (residp)
567 *residp = len - iolen;
568 else if (iolen != len)
569 return (EIO);
570 return (0);
571 }
572
573 void
vn_close(vnode_t * vp,int openflag,cred_t * cr,kthread_t * td)574 vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
575 {
576 close(vp->v_fd);
577 if (vp->v_dump_fd != -1)
578 close(vp->v_dump_fd);
579 spa_strfree(vp->v_path);
580 umem_free(vp, sizeof (vnode_t));
581 }
582
583 /*
584 * At a minimum we need to update the size since vdev_reopen()
585 * will no longer call vn_openat().
586 */
587 int
fop_getattr(vnode_t * vp,vattr_t * vap)588 fop_getattr(vnode_t *vp, vattr_t *vap)
589 {
590 struct stat64 st;
591
592 if (fstat64(vp->v_fd, &st) == -1) {
593 close(vp->v_fd);
594 return (errno);
595 }
596
597 vap->va_size = st.st_size;
598 return (0);
599 }
600
601 #ifdef ZFS_DEBUG
602
603 /*
604 * =========================================================================
605 * Figure out which debugging statements to print
606 * =========================================================================
607 */
608
609 static char *dprintf_string;
610 static int dprintf_print_all;
611
612 int
dprintf_find_string(const char * string)613 dprintf_find_string(const char *string)
614 {
615 char *tmp_str = dprintf_string;
616 int len = strlen(string);
617
618 /*
619 * Find out if this is a string we want to print.
620 * String format: file1.c,function_name1,file2.c,file3.c
621 */
622
623 while (tmp_str != NULL) {
624 if (strncmp(tmp_str, string, len) == 0 &&
625 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
626 return (1);
627 tmp_str = strchr(tmp_str, ',');
628 if (tmp_str != NULL)
629 tmp_str++; /* Get rid of , */
630 }
631 return (0);
632 }
633
634 void
dprintf_setup(int * argc,char ** argv)635 dprintf_setup(int *argc, char **argv)
636 {
637 int i, j;
638
639 /*
640 * Debugging can be specified two ways: by setting the
641 * environment variable ZFS_DEBUG, or by including a
642 * "debug=..." argument on the command line. The command
643 * line setting overrides the environment variable.
644 */
645
646 for (i = 1; i < *argc; i++) {
647 int len = strlen("debug=");
648 /* First look for a command line argument */
649 if (strncmp("debug=", argv[i], len) == 0) {
650 dprintf_string = argv[i] + len;
651 /* Remove from args */
652 for (j = i; j < *argc; j++)
653 argv[j] = argv[j+1];
654 argv[j] = NULL;
655 (*argc)--;
656 }
657 }
658
659 if (dprintf_string == NULL) {
660 /* Look for ZFS_DEBUG environment variable */
661 dprintf_string = getenv("ZFS_DEBUG");
662 }
663
664 /*
665 * Are we just turning on all debugging?
666 */
667 if (dprintf_find_string("on"))
668 dprintf_print_all = 1;
669
670 if (dprintf_string != NULL)
671 zfs_flags |= ZFS_DEBUG_DPRINTF;
672 }
673
674 int
sysctl_handle_64(SYSCTL_HANDLER_ARGS)675 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
676 {
677 return (0);
678 }
679
680 /*
681 * =========================================================================
682 * debug printfs
683 * =========================================================================
684 */
685 void
__dprintf(const char * file,const char * func,int line,const char * fmt,...)686 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
687 {
688 const char *newfile;
689 va_list adx;
690
691 /*
692 * Get rid of annoying "../common/" prefix to filename.
693 */
694 newfile = strrchr(file, '/');
695 if (newfile != NULL) {
696 newfile = newfile + 1; /* Get rid of leading / */
697 } else {
698 newfile = file;
699 }
700
701 if (dprintf_print_all ||
702 dprintf_find_string(newfile) ||
703 dprintf_find_string(func)) {
704 /* Print out just the function name if requested */
705 flockfile(stdout);
706 if (dprintf_find_string("pid"))
707 (void) printf("%d ", getpid());
708 if (dprintf_find_string("tid"))
709 (void) printf("%lu ", thr_self());
710 #if 0
711 if (dprintf_find_string("cpu"))
712 (void) printf("%u ", getcpuid());
713 #endif
714 if (dprintf_find_string("time"))
715 (void) printf("%llu ", gethrtime());
716 if (dprintf_find_string("long"))
717 (void) printf("%s, line %d: ", newfile, line);
718 (void) printf("%s: ", func);
719 va_start(adx, fmt);
720 (void) vprintf(fmt, adx);
721 va_end(adx);
722 funlockfile(stdout);
723 }
724 }
725
726 #endif /* ZFS_DEBUG */
727
728 /*
729 * =========================================================================
730 * cmn_err() and panic()
731 * =========================================================================
732 */
733 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
734 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
735
736 void
vpanic(const char * fmt,va_list adx)737 vpanic(const char *fmt, va_list adx)
738 {
739 char buf[512];
740 (void) vsnprintf(buf, 512, fmt, adx);
741 assfail(buf, NULL, 0);
742 abort(); /* necessary to make vpanic meet noreturn requirements */
743 }
744
745 void
panic(const char * fmt,...)746 panic(const char *fmt, ...)
747 {
748 va_list adx;
749
750 va_start(adx, fmt);
751 vpanic(fmt, adx);
752 va_end(adx);
753 }
754
755 void
vcmn_err(int ce,const char * fmt,va_list adx)756 vcmn_err(int ce, const char *fmt, va_list adx)
757 {
758 if (ce == CE_PANIC)
759 vpanic(fmt, adx);
760 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
761 (void) fprintf(stderr, "%s", ce_prefix[ce]);
762 (void) vfprintf(stderr, fmt, adx);
763 (void) fprintf(stderr, "%s", ce_suffix[ce]);
764 }
765 }
766
767 /*PRINTFLIKE2*/
768 void
cmn_err(int ce,const char * fmt,...)769 cmn_err(int ce, const char *fmt, ...)
770 {
771 va_list adx;
772
773 va_start(adx, fmt);
774 vcmn_err(ce, fmt, adx);
775 va_end(adx);
776 }
777
778 /*
779 * =========================================================================
780 * kobj interfaces
781 * =========================================================================
782 */
783 struct _buf *
kobj_open_file(char * name)784 kobj_open_file(char *name)
785 {
786 struct _buf *file;
787 vnode_t *vp;
788
789 /* set vp as the _fd field of the file */
790 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
791 -1) != 0)
792 return ((void *)-1UL);
793
794 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
795 file->_fd = (intptr_t)vp;
796 return (file);
797 }
798
799 int
kobj_read_file(struct _buf * file,char * buf,unsigned size,unsigned off)800 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
801 {
802 ssize_t resid;
803
804 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
805 UIO_SYSSPACE, 0, 0, 0, &resid);
806
807 return (size - resid);
808 }
809
810 void
kobj_close_file(struct _buf * file)811 kobj_close_file(struct _buf *file)
812 {
813 vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
814 umem_free(file, sizeof (struct _buf));
815 }
816
817 int
kobj_get_filesize(struct _buf * file,uint64_t * size)818 kobj_get_filesize(struct _buf *file, uint64_t *size)
819 {
820 struct stat64 st;
821 vnode_t *vp = (vnode_t *)file->_fd;
822
823 if (fstat64(vp->v_fd, &st) == -1) {
824 vn_close(vp, 0, NULL, NULL);
825 return (errno);
826 }
827 *size = st.st_size;
828 return (0);
829 }
830
831 /*
832 * =========================================================================
833 * misc routines
834 * =========================================================================
835 */
836
837 void
delay(clock_t ticks)838 delay(clock_t ticks)
839 {
840 poll(0, 0, ticks * (1000 / hz));
841 }
842
843 #if 0
844 /*
845 * Find highest one bit set.
846 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
847 */
848 int
849 highbit64(uint64_t i)
850 {
851 int h = 1;
852
853 if (i == 0)
854 return (0);
855 if (i & 0xffffffff00000000ULL) {
856 h += 32; i >>= 32;
857 }
858 if (i & 0xffff0000) {
859 h += 16; i >>= 16;
860 }
861 if (i & 0xff00) {
862 h += 8; i >>= 8;
863 }
864 if (i & 0xf0) {
865 h += 4; i >>= 4;
866 }
867 if (i & 0xc) {
868 h += 2; i >>= 2;
869 }
870 if (i & 0x2) {
871 h += 1;
872 }
873 return (h);
874 }
875 #endif
876
877 static int random_fd = -1, urandom_fd = -1;
878
879 static int
random_get_bytes_common(uint8_t * ptr,size_t len,int fd)880 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
881 {
882 size_t resid = len;
883 ssize_t bytes;
884
885 ASSERT(fd != -1);
886
887 while (resid != 0) {
888 bytes = read(fd, ptr, resid);
889 ASSERT3S(bytes, >=, 0);
890 ptr += bytes;
891 resid -= bytes;
892 }
893
894 return (0);
895 }
896
897 int
random_get_bytes(uint8_t * ptr,size_t len)898 random_get_bytes(uint8_t *ptr, size_t len)
899 {
900 return (random_get_bytes_common(ptr, len, random_fd));
901 }
902
903 int
random_get_pseudo_bytes(uint8_t * ptr,size_t len)904 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
905 {
906 return (random_get_bytes_common(ptr, len, urandom_fd));
907 }
908
909 int
ddi_strtoul(const char * hw_serial,char ** nptr,int base,unsigned long * result)910 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
911 {
912 char *end;
913
914 *result = strtoul(hw_serial, &end, base);
915 if (*result == 0)
916 return (errno);
917 return (0);
918 }
919
920 int
ddi_strtoull(const char * str,char ** nptr,int base,u_longlong_t * result)921 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
922 {
923 char *end;
924
925 *result = strtoull(str, &end, base);
926 if (*result == 0)
927 return (errno);
928 return (0);
929 }
930
931 #ifdef illumos
932 /* ARGSUSED */
933 cyclic_id_t
cyclic_add(cyc_handler_t * hdlr,cyc_time_t * when)934 cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
935 {
936 return (1);
937 }
938
939 /* ARGSUSED */
940 void
cyclic_remove(cyclic_id_t id)941 cyclic_remove(cyclic_id_t id)
942 {
943 }
944
945 /* ARGSUSED */
946 int
cyclic_reprogram(cyclic_id_t id,hrtime_t expiration)947 cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
948 {
949 return (1);
950 }
951 #endif
952
953 /*
954 * =========================================================================
955 * kernel emulation setup & teardown
956 * =========================================================================
957 */
958 static int
umem_out_of_memory(void)959 umem_out_of_memory(void)
960 {
961 char errmsg[] = "out of memory -- generating core dump\n";
962
963 write(fileno(stderr), errmsg, sizeof (errmsg));
964 abort();
965 return (0);
966 }
967
968 void
kernel_init(int mode)969 kernel_init(int mode)
970 {
971 extern uint_t rrw_tsd_key;
972
973 umem_nofail_callback(umem_out_of_memory);
974
975 physmem = sysconf(_SC_PHYS_PAGES);
976
977 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
978 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
979
980 (void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
981 (mode & FWRITE) ? (unsigned long)gethostid() : 0);
982
983 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
984 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
985
986 system_taskq_init();
987
988 #ifdef illumos
989 mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
990 #endif
991
992 spa_init(mode);
993
994 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
995 }
996
997 void
kernel_fini(void)998 kernel_fini(void)
999 {
1000 spa_fini();
1001
1002 system_taskq_fini();
1003
1004 close(random_fd);
1005 close(urandom_fd);
1006
1007 random_fd = -1;
1008 urandom_fd = -1;
1009 }
1010
1011 /* ARGSUSED */
1012 uint32_t
zone_get_hostid(void * zonep)1013 zone_get_hostid(void *zonep)
1014 {
1015 /*
1016 * We're emulating the system's hostid in userland.
1017 */
1018 return (strtoul(hw_serial, NULL, 10));
1019 }
1020
1021 int
z_uncompress(void * dst,size_t * dstlen,const void * src,size_t srclen)1022 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
1023 {
1024 int ret;
1025 uLongf len = *dstlen;
1026
1027 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
1028 *dstlen = (size_t)len;
1029
1030 return (ret);
1031 }
1032
1033 int
z_compress_level(void * dst,size_t * dstlen,const void * src,size_t srclen,int level)1034 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
1035 int level)
1036 {
1037 int ret;
1038 uLongf len = *dstlen;
1039
1040 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
1041 *dstlen = (size_t)len;
1042
1043 return (ret);
1044 }
1045
1046 uid_t
crgetuid(cred_t * cr)1047 crgetuid(cred_t *cr)
1048 {
1049 return (0);
1050 }
1051
1052 uid_t
crgetruid(cred_t * cr)1053 crgetruid(cred_t *cr)
1054 {
1055 return (0);
1056 }
1057
1058 gid_t
crgetgid(cred_t * cr)1059 crgetgid(cred_t *cr)
1060 {
1061 return (0);
1062 }
1063
1064 int
crgetngroups(cred_t * cr)1065 crgetngroups(cred_t *cr)
1066 {
1067 return (0);
1068 }
1069
1070 gid_t *
crgetgroups(cred_t * cr)1071 crgetgroups(cred_t *cr)
1072 {
1073 return (NULL);
1074 }
1075
1076 int
zfs_secpolicy_snapshot_perms(const char * name,cred_t * cr)1077 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1078 {
1079 return (0);
1080 }
1081
1082 int
zfs_secpolicy_rename_perms(const char * from,const char * to,cred_t * cr)1083 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1084 {
1085 return (0);
1086 }
1087
1088 int
zfs_secpolicy_destroy_perms(const char * name,cred_t * cr)1089 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1090 {
1091 return (0);
1092 }
1093
1094 ksiddomain_t *
ksid_lookupdomain(const char * dom)1095 ksid_lookupdomain(const char *dom)
1096 {
1097 ksiddomain_t *kd;
1098
1099 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1100 kd->kd_name = spa_strdup(dom);
1101 return (kd);
1102 }
1103
1104 void
ksiddomain_rele(ksiddomain_t * ksid)1105 ksiddomain_rele(ksiddomain_t *ksid)
1106 {
1107 spa_strfree(ksid->kd_name);
1108 umem_free(ksid, sizeof (ksiddomain_t));
1109 }
1110
1111 /*
1112 * Do not change the length of the returned string; it must be freed
1113 * with strfree().
1114 */
1115 char *
kmem_asprintf(const char * fmt,...)1116 kmem_asprintf(const char *fmt, ...)
1117 {
1118 int size;
1119 va_list adx;
1120 char *buf;
1121
1122 va_start(adx, fmt);
1123 size = vsnprintf(NULL, 0, fmt, adx) + 1;
1124 va_end(adx);
1125
1126 buf = kmem_alloc(size, KM_SLEEP);
1127
1128 va_start(adx, fmt);
1129 size = vsnprintf(buf, size, fmt, adx);
1130 va_end(adx);
1131
1132 return (buf);
1133 }
1134
1135 /* ARGSUSED */
1136 int
zfs_onexit_fd_hold(int fd,minor_t * minorp)1137 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1138 {
1139 *minorp = 0;
1140 return (0);
1141 }
1142
1143 /* ARGSUSED */
1144 void
zfs_onexit_fd_rele(int fd)1145 zfs_onexit_fd_rele(int fd)
1146 {
1147 }
1148
1149 /* ARGSUSED */
1150 int
zfs_onexit_add_cb(minor_t minor,void (* func)(void *),void * data,uint64_t * action_handle)1151 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1152 uint64_t *action_handle)
1153 {
1154 return (0);
1155 }
1156
1157 /* ARGSUSED */
1158 int
zfs_onexit_del_cb(minor_t minor,uint64_t action_handle,boolean_t fire)1159 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1160 {
1161 return (0);
1162 }
1163
1164 /* ARGSUSED */
1165 int
zfs_onexit_cb_data(minor_t minor,uint64_t action_handle,void ** data)1166 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1167 {
1168 return (0);
1169 }
1170
1171 #ifdef __FreeBSD__
1172 /* ARGSUSED */
1173 int
zvol_create_minors(const char * name)1174 zvol_create_minors(const char *name)
1175 {
1176 return (0);
1177 }
1178 #endif
1179
1180 #ifdef illumos
1181 void
bioinit(buf_t * bp)1182 bioinit(buf_t *bp)
1183 {
1184 bzero(bp, sizeof (buf_t));
1185 }
1186
1187 void
biodone(buf_t * bp)1188 biodone(buf_t *bp)
1189 {
1190 if (bp->b_iodone != NULL) {
1191 (*(bp->b_iodone))(bp);
1192 return;
1193 }
1194 ASSERT((bp->b_flags & B_DONE) == 0);
1195 bp->b_flags |= B_DONE;
1196 }
1197
1198 void
bioerror(buf_t * bp,int error)1199 bioerror(buf_t *bp, int error)
1200 {
1201 ASSERT(bp != NULL);
1202 ASSERT(error >= 0);
1203
1204 if (error != 0) {
1205 bp->b_flags |= B_ERROR;
1206 } else {
1207 bp->b_flags &= ~B_ERROR;
1208 }
1209 bp->b_error = error;
1210 }
1211
1212
1213 int
geterror(struct buf * bp)1214 geterror(struct buf *bp)
1215 {
1216 int error = 0;
1217
1218 if (bp->b_flags & B_ERROR) {
1219 error = bp->b_error;
1220 if (!error)
1221 error = EIO;
1222 }
1223 return (error);
1224 }
1225 #endif
1226