1 /*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 /*
33 * Stand-alone file reading package.
34 */
35
36 #include <stand.h>
37 #include <sys/disk.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <part.h>
42 #include <stddef.h>
43 #include <stdarg.h>
44 #include <string.h>
45 #include <bootstrap.h>
46
47 #include "libzfs.h"
48
49 #include "zfsimpl.c"
50
51 /* Define the range of indexes to be populated with ZFS Boot Environments */
52 #define ZFS_BE_FIRST 4
53 #define ZFS_BE_LAST 8
54
55 static int zfs_open(const char *path, struct open_file *f);
56 static int zfs_close(struct open_file *f);
57 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
58 static off_t zfs_seek(struct open_file *f, off_t offset, int where);
59 static int zfs_stat(struct open_file *f, struct stat *sb);
60 static int zfs_readdir(struct open_file *f, struct dirent *d);
61 static int zfs_mount(const char *dev, const char *path, void **data);
62 static int zfs_unmount(const char *dev, void *data);
63
64 static void zfs_bootenv_initial(const char *envname, spa_t *spa,
65 const char *name, const char *dsname, int checkpoint);
66 static void zfs_checkpoints_initial(spa_t *spa, const char *name,
67 const char *dsname);
68
69 struct devsw zfs_dev;
70
71 struct fs_ops zfs_fsops = {
72 .fs_name = "zfs",
73 .fo_open = zfs_open,
74 .fo_close = zfs_close,
75 .fo_read = zfs_read,
76 .fo_write = null_write,
77 .fo_seek = zfs_seek,
78 .fo_stat = zfs_stat,
79 .fo_readdir = zfs_readdir,
80 .fo_mount = zfs_mount,
81 .fo_unmount = zfs_unmount
82 };
83
84 /*
85 * In-core open file.
86 */
87 struct file {
88 off_t f_seekp; /* seek pointer */
89 dnode_phys_t f_dnode;
90 uint64_t f_zap_type; /* zap type for readdir */
91 uint64_t f_num_leafs; /* number of fzap leaf blocks */
92 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */
93 };
94
95 static int zfs_env_index;
96 static int zfs_env_count;
97
98 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
99 struct zfs_be_list *zfs_be_headp;
100 struct zfs_be_entry {
101 char *name;
102 SLIST_ENTRY(zfs_be_entry) entries;
103 } *zfs_be, *zfs_be_tmp;
104
105 /*
106 * Open a file.
107 */
108 static int
zfs_open(const char * upath,struct open_file * f)109 zfs_open(const char *upath, struct open_file *f)
110 {
111 struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
112 struct file *fp;
113 int rc;
114
115 if (f->f_dev != &zfs_dev)
116 return (EINVAL);
117
118 /* allocate file system specific data structure */
119 fp = calloc(1, sizeof(struct file));
120 if (fp == NULL)
121 return (ENOMEM);
122 f->f_fsdata = fp;
123
124 rc = zfs_lookup(mount, upath, &fp->f_dnode);
125 fp->f_seekp = 0;
126 if (rc) {
127 f->f_fsdata = NULL;
128 free(fp);
129 }
130 return (rc);
131 }
132
133 static int
zfs_close(struct open_file * f)134 zfs_close(struct open_file *f)
135 {
136 struct file *fp = (struct file *)f->f_fsdata;
137
138 dnode_cache_obj = NULL;
139 f->f_fsdata = NULL;
140
141 free(fp);
142 return (0);
143 }
144
145 /*
146 * Copy a portion of a file into kernel memory.
147 * Cross block boundaries when necessary.
148 */
149 static int
zfs_read(struct open_file * f,void * start,size_t size,size_t * resid)150 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
151 {
152 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
153 struct file *fp = (struct file *)f->f_fsdata;
154 struct stat sb;
155 size_t n;
156 int rc;
157
158 rc = zfs_stat(f, &sb);
159 if (rc)
160 return (rc);
161 n = size;
162 if (fp->f_seekp + n > sb.st_size)
163 n = sb.st_size - fp->f_seekp;
164
165 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
166 if (rc)
167 return (rc);
168
169 if (0) {
170 int i;
171 for (i = 0; i < n; i++)
172 putchar(((char*) start)[i]);
173 }
174 fp->f_seekp += n;
175 if (resid)
176 *resid = size - n;
177
178 return (0);
179 }
180
181 static off_t
zfs_seek(struct open_file * f,off_t offset,int where)182 zfs_seek(struct open_file *f, off_t offset, int where)
183 {
184 struct file *fp = (struct file *)f->f_fsdata;
185
186 switch (where) {
187 case SEEK_SET:
188 fp->f_seekp = offset;
189 break;
190 case SEEK_CUR:
191 fp->f_seekp += offset;
192 break;
193 case SEEK_END:
194 {
195 struct stat sb;
196 int error;
197
198 error = zfs_stat(f, &sb);
199 if (error != 0) {
200 errno = error;
201 return (-1);
202 }
203 fp->f_seekp = sb.st_size - offset;
204 break;
205 }
206 default:
207 errno = EINVAL;
208 return (-1);
209 }
210 return (fp->f_seekp);
211 }
212
213 static int
zfs_stat(struct open_file * f,struct stat * sb)214 zfs_stat(struct open_file *f, struct stat *sb)
215 {
216 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
217 struct file *fp = (struct file *)f->f_fsdata;
218
219 return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
220 }
221
222 static int
zfs_readdir(struct open_file * f,struct dirent * d)223 zfs_readdir(struct open_file *f, struct dirent *d)
224 {
225 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
226 struct file *fp = (struct file *)f->f_fsdata;
227 mzap_ent_phys_t mze;
228 struct stat sb;
229 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
230 int rc;
231
232 rc = zfs_stat(f, &sb);
233 if (rc)
234 return (rc);
235 if (!S_ISDIR(sb.st_mode))
236 return (ENOTDIR);
237
238 /*
239 * If this is the first read, get the zap type.
240 */
241 if (fp->f_seekp == 0) {
242 rc = dnode_read(spa, &fp->f_dnode,
243 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
244 if (rc)
245 return (rc);
246
247 if (fp->f_zap_type == ZBT_MICRO) {
248 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
249 } else {
250 rc = dnode_read(spa, &fp->f_dnode,
251 offsetof(zap_phys_t, zap_num_leafs),
252 &fp->f_num_leafs,
253 sizeof(fp->f_num_leafs));
254 if (rc)
255 return (rc);
256
257 fp->f_seekp = bsize;
258 fp->f_zap_leaf = malloc(bsize);
259 if (fp->f_zap_leaf == NULL)
260 return (ENOMEM);
261 rc = dnode_read(spa, &fp->f_dnode,
262 fp->f_seekp,
263 fp->f_zap_leaf,
264 bsize);
265 if (rc)
266 return (rc);
267 }
268 }
269
270 if (fp->f_zap_type == ZBT_MICRO) {
271 mzap_next:
272 if (fp->f_seekp >= bsize)
273 return (ENOENT);
274
275 rc = dnode_read(spa, &fp->f_dnode,
276 fp->f_seekp, &mze, sizeof(mze));
277 if (rc)
278 return (rc);
279 fp->f_seekp += sizeof(mze);
280
281 if (!mze.mze_name[0])
282 goto mzap_next;
283
284 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
285 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
286 strcpy(d->d_name, mze.mze_name);
287 d->d_namlen = strlen(d->d_name);
288 return (0);
289 } else {
290 zap_leaf_t zl;
291 zap_leaf_chunk_t *zc, *nc;
292 int chunk;
293 size_t namelen;
294 char *p;
295 uint64_t value;
296
297 /*
298 * Initialise this so we can use the ZAP size
299 * calculating macros.
300 */
301 zl.l_bs = ilog2(bsize);
302 zl.l_phys = fp->f_zap_leaf;
303
304 /*
305 * Figure out which chunk we are currently looking at
306 * and consider seeking to the next leaf. We use the
307 * low bits of f_seekp as a simple chunk index.
308 */
309 fzap_next:
310 chunk = fp->f_seekp & (bsize - 1);
311 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
312 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
313 chunk = 0;
314
315 /*
316 * Check for EOF and read the new leaf.
317 */
318 if (fp->f_seekp >= bsize * fp->f_num_leafs)
319 return (ENOENT);
320
321 rc = dnode_read(spa, &fp->f_dnode,
322 fp->f_seekp,
323 fp->f_zap_leaf,
324 bsize);
325 if (rc)
326 return (rc);
327 }
328
329 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
330 fp->f_seekp++;
331 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
332 goto fzap_next;
333
334 namelen = zc->l_entry.le_name_numints;
335 if (namelen > sizeof(d->d_name))
336 namelen = sizeof(d->d_name);
337
338 /*
339 * Paste the name back together.
340 */
341 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
342 p = d->d_name;
343 while (namelen > 0) {
344 int len;
345 len = namelen;
346 if (len > ZAP_LEAF_ARRAY_BYTES)
347 len = ZAP_LEAF_ARRAY_BYTES;
348 memcpy(p, nc->l_array.la_array, len);
349 p += len;
350 namelen -= len;
351 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
352 }
353 d->d_name[sizeof(d->d_name) - 1] = 0;
354
355 /*
356 * Assume the first eight bytes of the value are
357 * a uint64_t.
358 */
359 value = fzap_leaf_value(&zl, zc);
360
361 d->d_fileno = ZFS_DIRENT_OBJ(value);
362 d->d_type = ZFS_DIRENT_TYPE(value);
363 d->d_namlen = strlen(d->d_name);
364
365 return (0);
366 }
367 }
368
369 /*
370 * if path is NULL, create mount structure, but do not add it to list.
371 */
372 static int
zfs_mount(const char * dev,const char * path,void ** data)373 zfs_mount(const char *dev, const char *path, void **data)
374 {
375 struct zfs_devdesc *zfsdev;
376 spa_t *spa;
377 struct zfsmount *mnt;
378 int rv;
379
380 errno = 0;
381 zfsdev = malloc(sizeof(*zfsdev));
382 if (zfsdev == NULL)
383 return (errno);
384
385 rv = zfs_parsedev(zfsdev, dev + 3, NULL);
386 if (rv != 0) {
387 free(zfsdev);
388 return (rv);
389 }
390
391 spa = spa_find_by_dev(zfsdev);
392 if (spa == NULL)
393 return (ENXIO);
394
395 mnt = calloc(1, sizeof(*mnt));
396 if (mnt != NULL && path != NULL)
397 mnt->path = strdup(path);
398 rv = errno;
399
400 if (mnt != NULL)
401 rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
402 free(zfsdev);
403
404 if (rv == 0 && mnt != NULL && mnt->objset.os_type != DMU_OST_ZFS) {
405 printf("Unexpected object set type %ju\n",
406 (uintmax_t)mnt->objset.os_type);
407 rv = EIO;
408 }
409
410 if (rv != 0) {
411 if (mnt != NULL)
412 free(mnt->path);
413 free(mnt);
414 return (rv);
415 }
416
417 if (mnt != NULL) {
418 *data = mnt;
419 if (path != NULL)
420 STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
421 }
422
423 return (rv);
424 }
425
426 static int
zfs_unmount(const char * dev,void * data)427 zfs_unmount(const char *dev, void *data)
428 {
429 struct zfsmount *mnt = data;
430
431 STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
432 free(mnt->path);
433 free(mnt);
434 return (0);
435 }
436
437 static int
vdev_read(vdev_t * vdev,void * priv,off_t offset,void * buf,size_t bytes)438 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
439 {
440 int fd, ret;
441 size_t res, head, tail, total_size, full_sec_size;
442 unsigned secsz, do_tail_read;
443 off_t start_sec;
444 char *outbuf, *bouncebuf;
445
446 fd = (uintptr_t) priv;
447 outbuf = (char *) buf;
448 bouncebuf = NULL;
449
450 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
451 if (ret != 0)
452 return (ret);
453
454 /*
455 * Handling reads of arbitrary offset and size - multi-sector case
456 * and single-sector case.
457 *
458 * Multi-sector Case
459 * (do_tail_read = true if tail > 0)
460 *
461 * |<----------------------total_size--------------------->|
462 * | |
463 * |<--head-->|<--------------bytes------------>|<--tail-->|
464 * | | | |
465 * | | |<~full_sec_size~>| | |
466 * +------------------+ +------------------+
467 * | |0101010| . . . |0101011| |
468 * +------------------+ +------------------+
469 * start_sec start_sec + n
470 *
471 *
472 * Single-sector Case
473 * (do_tail_read = false)
474 *
475 * |<------total_size = secsz----->|
476 * | |
477 * |<-head->|<---bytes--->|<-tail->|
478 * +-------------------------------+
479 * | |0101010101010| |
480 * +-------------------------------+
481 * start_sec
482 */
483 start_sec = offset / secsz;
484 head = offset % secsz;
485 total_size = roundup2(head + bytes, secsz);
486 tail = total_size - (head + bytes);
487 do_tail_read = ((tail > 0) && (head + bytes > secsz));
488 full_sec_size = total_size;
489 if (head > 0)
490 full_sec_size -= secsz;
491 if (do_tail_read)
492 full_sec_size -= secsz;
493
494 /* Return of partial sector data requires a bounce buffer. */
495 if ((head > 0) || do_tail_read || bytes < secsz) {
496 bouncebuf = malloc(secsz);
497 if (bouncebuf == NULL) {
498 printf("vdev_read: out of memory\n");
499 return (ENOMEM);
500 }
501 }
502
503 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
504 ret = errno;
505 goto error;
506 }
507
508 /* Partial data return from first sector */
509 if (head > 0) {
510 res = read(fd, bouncebuf, secsz);
511 if (res != secsz) {
512 ret = EIO;
513 goto error;
514 }
515 memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
516 outbuf += min(secsz - head, bytes);
517 }
518
519 /*
520 * Full data return from read sectors.
521 * Note, there is still corner case where we read
522 * from sector boundary, but less than sector size, e.g. reading 512B
523 * from 4k sector.
524 */
525 if (full_sec_size > 0) {
526 if (bytes < full_sec_size) {
527 res = read(fd, bouncebuf, secsz);
528 if (res != secsz) {
529 ret = EIO;
530 goto error;
531 }
532 memcpy(outbuf, bouncebuf, bytes);
533 } else {
534 res = read(fd, outbuf, full_sec_size);
535 if (res != full_sec_size) {
536 ret = EIO;
537 goto error;
538 }
539 outbuf += full_sec_size;
540 }
541 }
542
543 /* Partial data return from last sector */
544 if (do_tail_read) {
545 res = read(fd, bouncebuf, secsz);
546 if (res != secsz) {
547 ret = EIO;
548 goto error;
549 }
550 memcpy(outbuf, bouncebuf, secsz - tail);
551 }
552
553 ret = 0;
554 error:
555 free(bouncebuf);
556 return (ret);
557 }
558
559 static int
vdev_write(vdev_t * vdev,off_t offset,void * buf,size_t bytes)560 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
561 {
562 int fd, ret;
563 size_t head, tail, total_size, full_sec_size;
564 unsigned secsz, do_tail_write;
565 off_t start_sec;
566 ssize_t res;
567 char *outbuf, *bouncebuf;
568
569 fd = (uintptr_t)vdev->v_priv;
570 outbuf = (char *)buf;
571 bouncebuf = NULL;
572
573 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
574 if (ret != 0)
575 return (ret);
576
577 start_sec = offset / secsz;
578 head = offset % secsz;
579 total_size = roundup2(head + bytes, secsz);
580 tail = total_size - (head + bytes);
581 do_tail_write = ((tail > 0) && (head + bytes > secsz));
582 full_sec_size = total_size;
583 if (head > 0)
584 full_sec_size -= secsz;
585 if (do_tail_write)
586 full_sec_size -= secsz;
587
588 /* Partial sector write requires a bounce buffer. */
589 if ((head > 0) || do_tail_write || bytes < secsz) {
590 bouncebuf = malloc(secsz);
591 if (bouncebuf == NULL) {
592 printf("vdev_write: out of memory\n");
593 return (ENOMEM);
594 }
595 }
596
597 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
598 ret = errno;
599 goto error;
600 }
601
602 /* Partial data for first sector */
603 if (head > 0) {
604 res = read(fd, bouncebuf, secsz);
605 if ((unsigned)res != secsz) {
606 ret = EIO;
607 goto error;
608 }
609 memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
610 (void) lseek(fd, -secsz, SEEK_CUR);
611 res = write(fd, bouncebuf, secsz);
612 if ((unsigned)res != secsz) {
613 ret = EIO;
614 goto error;
615 }
616 outbuf += min(secsz - head, bytes);
617 }
618
619 /*
620 * Full data write to sectors.
621 * Note, there is still corner case where we write
622 * to sector boundary, but less than sector size, e.g. write 512B
623 * to 4k sector.
624 */
625 if (full_sec_size > 0) {
626 if (bytes < full_sec_size) {
627 res = read(fd, bouncebuf, secsz);
628 if ((unsigned)res != secsz) {
629 ret = EIO;
630 goto error;
631 }
632 memcpy(bouncebuf, outbuf, bytes);
633 (void) lseek(fd, -secsz, SEEK_CUR);
634 res = write(fd, bouncebuf, secsz);
635 if ((unsigned)res != secsz) {
636 ret = EIO;
637 goto error;
638 }
639 } else {
640 res = write(fd, outbuf, full_sec_size);
641 if ((unsigned)res != full_sec_size) {
642 ret = EIO;
643 goto error;
644 }
645 outbuf += full_sec_size;
646 }
647 }
648
649 /* Partial data write to last sector */
650 if (do_tail_write) {
651 res = read(fd, bouncebuf, secsz);
652 if ((unsigned)res != secsz) {
653 ret = EIO;
654 goto error;
655 }
656 memcpy(bouncebuf, outbuf, secsz - tail);
657 (void) lseek(fd, -secsz, SEEK_CUR);
658 res = write(fd, bouncebuf, secsz);
659 if ((unsigned)res != secsz) {
660 ret = EIO;
661 goto error;
662 }
663 }
664
665 ret = 0;
666 error:
667 free(bouncebuf);
668 return (ret);
669 }
670
671 static int
zfs_dev_init(void)672 zfs_dev_init(void)
673 {
674 spa_t *spa;
675 spa_t *next;
676 spa_t *prev;
677
678 zfs_init();
679 if (archsw.arch_zfs_probe == NULL)
680 return (ENXIO);
681 archsw.arch_zfs_probe();
682
683 prev = NULL;
684 spa = STAILQ_FIRST(&zfs_pools);
685 while (spa != NULL) {
686 next = STAILQ_NEXT(spa, spa_link);
687 if (zfs_spa_init(spa)) {
688 if (prev == NULL)
689 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
690 else
691 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
692 } else
693 prev = spa;
694 spa = next;
695 }
696 return (0);
697 }
698
699 struct zfs_probe_args {
700 int fd;
701 const char *devname;
702 uint64_t *pool_guid;
703 u_int secsz;
704 };
705
706 static int
zfs_diskread(void * arg,void * buf,size_t blocks,uint64_t offset)707 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
708 {
709 struct zfs_probe_args *ppa;
710
711 ppa = (struct zfs_probe_args *)arg;
712 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
713 offset * ppa->secsz, buf, blocks * ppa->secsz));
714 }
715
716 static int
zfs_probe(int fd,uint64_t * pool_guid)717 zfs_probe(int fd, uint64_t *pool_guid)
718 {
719 spa_t *spa;
720 int ret;
721
722 spa = NULL;
723 ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
724 if (ret == 0 && pool_guid != NULL)
725 if (*pool_guid == 0)
726 *pool_guid = spa->spa_guid;
727 return (ret);
728 }
729
730 static int
zfs_probe_partition(void * arg,const char * partname,const struct ptable_entry * part)731 zfs_probe_partition(void *arg, const char *partname,
732 const struct ptable_entry *part)
733 {
734 struct zfs_probe_args *ppa, pa;
735 struct ptable *table;
736 char devname[32];
737 int ret;
738
739 /* Probe only freebsd-zfs and freebsd partitions */
740 if (part->type != PART_FREEBSD &&
741 part->type != PART_FREEBSD_ZFS)
742 return (0);
743
744 ppa = (struct zfs_probe_args *)arg;
745 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
746 devname[strlen(ppa->devname) - 1] = '\0';
747 snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
748 pa.fd = open(devname, O_RDWR);
749 if (pa.fd == -1)
750 return (0);
751 ret = zfs_probe(pa.fd, ppa->pool_guid);
752 if (ret == 0)
753 return (0);
754 /* Do we have BSD label here? */
755 if (part->type == PART_FREEBSD) {
756 pa.devname = devname;
757 pa.pool_guid = ppa->pool_guid;
758 pa.secsz = ppa->secsz;
759 table = ptable_open(&pa, part->end - part->start + 1,
760 ppa->secsz, zfs_diskread);
761 if (table != NULL) {
762 ptable_iterate(table, &pa, zfs_probe_partition);
763 ptable_close(table);
764 }
765 }
766 close(pa.fd);
767 return (0);
768 }
769
770 /*
771 * Return bootenv nvlist from pool label.
772 */
773 int
zfs_get_bootenv(void * vdev,nvlist_t ** benvp)774 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
775 {
776 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
777 nvlist_t *benv = NULL;
778 vdev_t *vd;
779 spa_t *spa;
780
781 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
782 return (ENOTSUP);
783
784 if ((spa = spa_find_by_dev(dev)) == NULL)
785 return (ENXIO);
786
787 if (spa->spa_bootenv == NULL) {
788 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children,
789 v_childlink) {
790 benv = vdev_read_bootenv(vd);
791
792 if (benv != NULL)
793 break;
794 }
795 spa->spa_bootenv = benv;
796 } else {
797 benv = spa->spa_bootenv;
798 }
799
800 if (benv == NULL)
801 return (ENOENT);
802
803 *benvp = benv;
804 return (0);
805 }
806
807 /*
808 * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
809 */
810 int
zfs_set_bootenv(void * vdev,nvlist_t * benv)811 zfs_set_bootenv(void *vdev, nvlist_t *benv)
812 {
813 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
814 spa_t *spa;
815 vdev_t *vd;
816
817 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
818 return (ENOTSUP);
819
820 if ((spa = spa_find_by_dev(dev)) == NULL)
821 return (ENXIO);
822
823 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
824 vdev_write_bootenv(vd, benv);
825 }
826
827 spa->spa_bootenv = benv;
828 return (0);
829 }
830
831 /*
832 * Get bootonce value by key. The bootonce <key, value> pair is removed
833 * from the bootenv nvlist and the remaining nvlist is committed back to disk.
834 */
835 int
zfs_get_bootonce(void * vdev,const char * key,char * buf,size_t size)836 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
837 {
838 nvlist_t *benv;
839 char *result = NULL;
840 int result_size, rv;
841
842 if ((rv = zfs_get_bootenv(vdev, &benv)) != 0)
843 return (rv);
844
845 if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL,
846 &result, &result_size)) == 0) {
847 if (result_size == 0) {
848 /* ignore empty string */
849 rv = ENOENT;
850 } else {
851 size = MIN((size_t)result_size + 1, size);
852 strlcpy(buf, result, size);
853 }
854 (void) nvlist_remove(benv, key, DATA_TYPE_STRING);
855 (void) zfs_set_bootenv(vdev, benv);
856 }
857
858 return (rv);
859 }
860
861 /*
862 * nvstore backend.
863 */
864
865 static int zfs_nvstore_setter(void *, int, const char *,
866 const void *, size_t);
867 static int zfs_nvstore_setter_str(void *, const char *, const char *,
868 const char *);
869 static int zfs_nvstore_unset_impl(void *, const char *, bool);
870 static int zfs_nvstore_setenv(void *, void *);
871
872 /*
873 * nvstore is only present for current rootfs pool.
874 */
875 static int
zfs_nvstore_sethook(struct env_var * ev,int flags __unused,const void * value)876 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
877 {
878 struct zfs_devdesc *dev;
879 int rv;
880
881 archsw.arch_getdev((void **)&dev, NULL, NULL);
882 if (dev == NULL)
883 return (ENXIO);
884
885 rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
886
887 free(dev);
888 return (rv);
889 }
890
891 /*
892 * nvstore is only present for current rootfs pool.
893 */
894 static int
zfs_nvstore_unsethook(struct env_var * ev)895 zfs_nvstore_unsethook(struct env_var *ev)
896 {
897 struct zfs_devdesc *dev;
898 int rv;
899
900 archsw.arch_getdev((void **)&dev, NULL, NULL);
901 if (dev == NULL)
902 return (ENXIO);
903
904 rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
905
906 free(dev);
907 return (rv);
908 }
909
910 static int
zfs_nvstore_getter(void * vdev,const char * name,void ** data)911 zfs_nvstore_getter(void *vdev, const char *name, void **data)
912 {
913 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
914 spa_t *spa;
915 nvlist_t *nv;
916 char *str, **ptr;
917 int size;
918 int rv;
919
920 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
921 return (ENOTSUP);
922
923 if ((spa = spa_find_by_dev(dev)) == NULL)
924 return (ENXIO);
925
926 if (spa->spa_bootenv == NULL)
927 return (ENXIO);
928
929 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
930 NULL, &nv, NULL) != 0)
931 return (ENOENT);
932
933 rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
934 if (rv == 0) {
935 ptr = (char **)data;
936 asprintf(ptr, "%.*s", size, str);
937 if (*data == NULL)
938 rv = ENOMEM;
939 }
940 nvlist_destroy(nv);
941 return (rv);
942 }
943
944 static int
zfs_nvstore_setter(void * vdev,int type,const char * name,const void * data,size_t size)945 zfs_nvstore_setter(void *vdev, int type, const char *name,
946 const void *data, size_t size)
947 {
948 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
949 spa_t *spa;
950 nvlist_t *nv;
951 int rv;
952 bool env_set = true;
953
954 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
955 return (ENOTSUP);
956
957 if ((spa = spa_find_by_dev(dev)) == NULL)
958 return (ENXIO);
959
960 if (spa->spa_bootenv == NULL)
961 return (ENXIO);
962
963 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
964 NULL, &nv, NULL) != 0) {
965 nv = nvlist_create(NV_UNIQUE_NAME);
966 if (nv == NULL)
967 return (ENOMEM);
968 }
969
970 rv = 0;
971 switch (type) {
972 case DATA_TYPE_INT8:
973 if (size != sizeof (int8_t)) {
974 rv = EINVAL;
975 break;
976 }
977 rv = nvlist_add_int8(nv, name, *(int8_t *)data);
978 break;
979
980 case DATA_TYPE_INT16:
981 if (size != sizeof (int16_t)) {
982 rv = EINVAL;
983 break;
984 }
985 rv = nvlist_add_int16(nv, name, *(int16_t *)data);
986 break;
987
988 case DATA_TYPE_INT32:
989 if (size != sizeof (int32_t)) {
990 rv = EINVAL;
991 break;
992 }
993 rv = nvlist_add_int32(nv, name, *(int32_t *)data);
994 break;
995
996 case DATA_TYPE_INT64:
997 if (size != sizeof (int64_t)) {
998 rv = EINVAL;
999 break;
1000 }
1001 rv = nvlist_add_int64(nv, name, *(int64_t *)data);
1002 break;
1003
1004 case DATA_TYPE_BYTE:
1005 if (size != sizeof (uint8_t)) {
1006 rv = EINVAL;
1007 break;
1008 }
1009 rv = nvlist_add_byte(nv, name, *(int8_t *)data);
1010 break;
1011
1012 case DATA_TYPE_UINT8:
1013 if (size != sizeof (uint8_t)) {
1014 rv = EINVAL;
1015 break;
1016 }
1017 rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
1018 break;
1019
1020 case DATA_TYPE_UINT16:
1021 if (size != sizeof (uint16_t)) {
1022 rv = EINVAL;
1023 break;
1024 }
1025 rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
1026 break;
1027
1028 case DATA_TYPE_UINT32:
1029 if (size != sizeof (uint32_t)) {
1030 rv = EINVAL;
1031 break;
1032 }
1033 rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1034 break;
1035
1036 case DATA_TYPE_UINT64:
1037 if (size != sizeof (uint64_t)) {
1038 rv = EINVAL;
1039 break;
1040 }
1041 rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1042 break;
1043
1044 case DATA_TYPE_STRING:
1045 rv = nvlist_add_string(nv, name, data);
1046 break;
1047
1048 case DATA_TYPE_BOOLEAN_VALUE:
1049 if (size != sizeof (boolean_t)) {
1050 rv = EINVAL;
1051 break;
1052 }
1053 rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1054 break;
1055
1056 default:
1057 rv = EINVAL;
1058 break;
1059 }
1060
1061 if (rv == 0) {
1062 rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1063 if (rv == 0) {
1064 rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1065 }
1066 if (rv == 0) {
1067 if (env_set) {
1068 rv = zfs_nvstore_setenv(vdev,
1069 nvpair_find(nv, name));
1070 } else {
1071 env_discard(env_getenv(name));
1072 rv = 0;
1073 }
1074 }
1075 }
1076
1077 nvlist_destroy(nv);
1078 return (rv);
1079 }
1080
1081 static int
get_int64(const char * data,int64_t * ip)1082 get_int64(const char *data, int64_t *ip)
1083 {
1084 char *end;
1085 int64_t val;
1086
1087 errno = 0;
1088 val = strtoll(data, &end, 0);
1089 if (errno != 0 || *data == '\0' || *end != '\0')
1090 return (EINVAL);
1091
1092 *ip = val;
1093 return (0);
1094 }
1095
1096 static int
get_uint64(const char * data,uint64_t * ip)1097 get_uint64(const char *data, uint64_t *ip)
1098 {
1099 char *end;
1100 uint64_t val;
1101
1102 errno = 0;
1103 val = strtoull(data, &end, 0);
1104 if (errno != 0 || *data == '\0' || *end != '\0')
1105 return (EINVAL);
1106
1107 *ip = val;
1108 return (0);
1109 }
1110
1111 /*
1112 * Translate textual data to data type. If type is not set, and we are
1113 * creating new pair, use DATA_TYPE_STRING.
1114 */
1115 static int
zfs_nvstore_setter_str(void * vdev,const char * type,const char * name,const char * data)1116 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1117 const char *data)
1118 {
1119 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1120 spa_t *spa;
1121 nvlist_t *nv;
1122 int rv;
1123 data_type_t dt;
1124 int64_t val;
1125 uint64_t uval;
1126
1127 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1128 return (ENOTSUP);
1129
1130 if ((spa = spa_find_by_dev(dev)) == NULL)
1131 return (ENXIO);
1132
1133 if (spa->spa_bootenv == NULL)
1134 return (ENXIO);
1135
1136 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1137 NULL, &nv, NULL) != 0) {
1138 nv = NULL;
1139 }
1140
1141 if (type == NULL) {
1142 nvp_header_t *nvh;
1143
1144 /*
1145 * if there is no existing pair, default to string.
1146 * Otherwise, use type from existing pair.
1147 */
1148 nvh = nvpair_find(nv, name);
1149 if (nvh == NULL) {
1150 dt = DATA_TYPE_STRING;
1151 } else {
1152 nv_string_t *nvp_name;
1153 nv_pair_data_t *nvp_data;
1154
1155 nvp_name = (nv_string_t *)(nvh + 1);
1156 nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1157 NV_ALIGN4(nvp_name->nv_size));
1158 dt = nvp_data->nv_type;
1159 }
1160 } else {
1161 dt = nvpair_type_from_name(type);
1162 }
1163 nvlist_destroy(nv);
1164
1165 rv = 0;
1166 switch (dt) {
1167 case DATA_TYPE_INT8:
1168 rv = get_int64(data, &val);
1169 if (rv == 0) {
1170 int8_t v = val;
1171
1172 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1173 }
1174 break;
1175 case DATA_TYPE_INT16:
1176 rv = get_int64(data, &val);
1177 if (rv == 0) {
1178 int16_t v = val;
1179
1180 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1181 }
1182 break;
1183 case DATA_TYPE_INT32:
1184 rv = get_int64(data, &val);
1185 if (rv == 0) {
1186 int32_t v = val;
1187
1188 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1189 }
1190 break;
1191 case DATA_TYPE_INT64:
1192 rv = get_int64(data, &val);
1193 if (rv == 0) {
1194 rv = zfs_nvstore_setter(vdev, dt, name, &val,
1195 sizeof (val));
1196 }
1197 break;
1198
1199 case DATA_TYPE_BYTE:
1200 rv = get_uint64(data, &uval);
1201 if (rv == 0) {
1202 uint8_t v = uval;
1203
1204 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1205 }
1206 break;
1207
1208 case DATA_TYPE_UINT8:
1209 rv = get_uint64(data, &uval);
1210 if (rv == 0) {
1211 uint8_t v = uval;
1212
1213 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1214 }
1215 break;
1216
1217 case DATA_TYPE_UINT16:
1218 rv = get_uint64(data, &uval);
1219 if (rv == 0) {
1220 uint16_t v = uval;
1221
1222 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1223 }
1224 break;
1225
1226 case DATA_TYPE_UINT32:
1227 rv = get_uint64(data, &uval);
1228 if (rv == 0) {
1229 uint32_t v = uval;
1230
1231 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1232 }
1233 break;
1234
1235 case DATA_TYPE_UINT64:
1236 rv = get_uint64(data, &uval);
1237 if (rv == 0) {
1238 rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1239 sizeof (uval));
1240 }
1241 break;
1242
1243 case DATA_TYPE_STRING:
1244 rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1245 break;
1246
1247 case DATA_TYPE_BOOLEAN_VALUE:
1248 rv = get_int64(data, &val);
1249 if (rv == 0) {
1250 boolean_t v = val;
1251
1252 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1253 }
1254
1255 default:
1256 rv = EINVAL;
1257 }
1258 return (rv);
1259 }
1260
1261 static int
zfs_nvstore_unset_impl(void * vdev,const char * name,bool unset_env)1262 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1263 {
1264 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1265 spa_t *spa;
1266 nvlist_t *nv;
1267 int rv;
1268
1269 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1270 return (ENOTSUP);
1271
1272 if ((spa = spa_find_by_dev(dev)) == NULL)
1273 return (ENXIO);
1274
1275 if (spa->spa_bootenv == NULL)
1276 return (ENXIO);
1277
1278 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1279 NULL, &nv, NULL) != 0)
1280 return (ENOENT);
1281
1282 rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1283 if (rv == 0) {
1284 if (nvlist_next_nvpair(nv, NULL) == NULL) {
1285 rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1286 DATA_TYPE_NVLIST);
1287 } else {
1288 rv = nvlist_add_nvlist(spa->spa_bootenv,
1289 OS_NVSTORE, nv);
1290 }
1291 if (rv == 0)
1292 rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1293 }
1294
1295 if (unset_env)
1296 env_discard(env_getenv(name));
1297 return (rv);
1298 }
1299
1300 static int
zfs_nvstore_unset(void * vdev,const char * name)1301 zfs_nvstore_unset(void *vdev, const char *name)
1302 {
1303 return (zfs_nvstore_unset_impl(vdev, name, true));
1304 }
1305
1306 static int
zfs_nvstore_print(void * vdev __unused,void * ptr)1307 zfs_nvstore_print(void *vdev __unused, void *ptr)
1308 {
1309
1310 nvpair_print(ptr, 0);
1311 return (0);
1312 }
1313
1314 /*
1315 * Create environment variable from nvpair.
1316 * set hook will update nvstore with new value, unset hook will remove
1317 * variable from nvstore.
1318 */
1319 static int
zfs_nvstore_setenv(void * vdev __unused,void * ptr)1320 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1321 {
1322 nvp_header_t *nvh = ptr;
1323 nv_string_t *nvp_name, *nvp_value;
1324 nv_pair_data_t *nvp_data;
1325 char *name, *value;
1326 int rv = 0;
1327
1328 if (nvh == NULL)
1329 return (ENOENT);
1330
1331 nvp_name = (nv_string_t *)(nvh + 1);
1332 nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1333 NV_ALIGN4(nvp_name->nv_size));
1334
1335 if ((name = nvstring_get(nvp_name)) == NULL)
1336 return (ENOMEM);
1337
1338 value = NULL;
1339 switch (nvp_data->nv_type) {
1340 case DATA_TYPE_BYTE:
1341 case DATA_TYPE_UINT8:
1342 (void) asprintf(&value, "%uc",
1343 *(unsigned *)&nvp_data->nv_data[0]);
1344 if (value == NULL)
1345 rv = ENOMEM;
1346 break;
1347
1348 case DATA_TYPE_INT8:
1349 (void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1350 if (value == NULL)
1351 rv = ENOMEM;
1352 break;
1353
1354 case DATA_TYPE_INT16:
1355 (void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1356 if (value == NULL)
1357 rv = ENOMEM;
1358 break;
1359
1360 case DATA_TYPE_UINT16:
1361 (void) asprintf(&value, "%hu",
1362 *(unsigned short *)&nvp_data->nv_data[0]);
1363 if (value == NULL)
1364 rv = ENOMEM;
1365 break;
1366
1367 case DATA_TYPE_BOOLEAN_VALUE:
1368 case DATA_TYPE_INT32:
1369 (void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1370 if (value == NULL)
1371 rv = ENOMEM;
1372 break;
1373
1374 case DATA_TYPE_UINT32:
1375 (void) asprintf(&value, "%u",
1376 *(unsigned *)&nvp_data->nv_data[0]);
1377 if (value == NULL)
1378 rv = ENOMEM;
1379 break;
1380
1381 case DATA_TYPE_INT64:
1382 (void) asprintf(&value, "%jd",
1383 (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1384 if (value == NULL)
1385 rv = ENOMEM;
1386 break;
1387
1388 case DATA_TYPE_UINT64:
1389 (void) asprintf(&value, "%ju",
1390 (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1391 if (value == NULL)
1392 rv = ENOMEM;
1393 break;
1394
1395 case DATA_TYPE_STRING:
1396 nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1397 if ((value = nvstring_get(nvp_value)) == NULL) {
1398 rv = ENOMEM;
1399 break;
1400 }
1401 break;
1402
1403 default:
1404 rv = EINVAL;
1405 break;
1406 }
1407
1408 if (value != NULL) {
1409 rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1410 zfs_nvstore_sethook, zfs_nvstore_unsethook);
1411 free(value);
1412 }
1413 free(name);
1414 return (rv);
1415 }
1416
1417 static int
zfs_nvstore_iterate(void * vdev,int (* cb)(void *,void *))1418 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1419 {
1420 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1421 spa_t *spa;
1422 nvlist_t *nv;
1423 nvp_header_t *nvh;
1424 int rv;
1425
1426 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1427 return (ENOTSUP);
1428
1429 if ((spa = spa_find_by_dev(dev)) == NULL)
1430 return (ENXIO);
1431
1432 if (spa->spa_bootenv == NULL)
1433 return (ENXIO);
1434
1435 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1436 NULL, &nv, NULL) != 0)
1437 return (ENOENT);
1438
1439 rv = 0;
1440 nvh = NULL;
1441 while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1442 rv = cb(vdev, nvh);
1443 if (rv != 0)
1444 break;
1445 }
1446 return (rv);
1447 }
1448
1449 nvs_callbacks_t nvstore_zfs_cb = {
1450 .nvs_getter = zfs_nvstore_getter,
1451 .nvs_setter = zfs_nvstore_setter,
1452 .nvs_setter_str = zfs_nvstore_setter_str,
1453 .nvs_unset = zfs_nvstore_unset,
1454 .nvs_print = zfs_nvstore_print,
1455 .nvs_iterate = zfs_nvstore_iterate
1456 };
1457
1458 int
zfs_attach_nvstore(void * vdev)1459 zfs_attach_nvstore(void *vdev)
1460 {
1461 struct zfs_devdesc *dev = vdev;
1462 spa_t *spa;
1463 uint64_t version;
1464 int rv;
1465
1466 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1467 return (ENOTSUP);
1468
1469 if ((spa = spa_find_by_dev(dev)) == NULL)
1470 return (ENXIO);
1471
1472 rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1473 NULL, &version, NULL);
1474
1475 if (rv != 0 || version != VB_NVLIST) {
1476 return (ENXIO);
1477 }
1478
1479 dev = malloc(sizeof (*dev));
1480 if (dev == NULL)
1481 return (ENOMEM);
1482 memcpy(dev, vdev, sizeof (*dev));
1483
1484 rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1485 if (rv != 0)
1486 free(dev);
1487 else
1488 rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1489 return (rv);
1490 }
1491
1492 int
zfs_probe_dev(const char * devname,uint64_t * pool_guid)1493 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
1494 {
1495 struct ptable *table;
1496 struct zfs_probe_args pa;
1497 uint64_t mediasz;
1498 int ret;
1499
1500 if (pool_guid)
1501 *pool_guid = 0;
1502 pa.fd = open(devname, O_RDWR);
1503 if (pa.fd == -1)
1504 return (ENXIO);
1505 /* Probe the whole disk */
1506 ret = zfs_probe(pa.fd, pool_guid);
1507 if (ret == 0)
1508 return (0);
1509
1510 /* Probe each partition */
1511 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1512 if (ret == 0)
1513 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1514 if (ret == 0) {
1515 pa.devname = devname;
1516 pa.pool_guid = pool_guid;
1517 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1518 zfs_diskread);
1519 if (table != NULL) {
1520 ptable_iterate(table, &pa, zfs_probe_partition);
1521 ptable_close(table);
1522 }
1523 }
1524 close(pa.fd);
1525 if (pool_guid && *pool_guid == 0)
1526 ret = ENXIO;
1527 return (ret);
1528 }
1529
1530 /*
1531 * Print information about ZFS pools
1532 */
1533 static int
zfs_dev_print(int verbose)1534 zfs_dev_print(int verbose)
1535 {
1536 spa_t *spa;
1537 char line[80];
1538 int ret = 0;
1539
1540 if (STAILQ_EMPTY(&zfs_pools))
1541 return (0);
1542
1543 printf("%s devices:", zfs_dev.dv_name);
1544 if ((ret = pager_output("\n")) != 0)
1545 return (ret);
1546
1547 if (verbose) {
1548 return (spa_all_status());
1549 }
1550 STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1551 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name);
1552 ret = pager_output(line);
1553 if (ret != 0)
1554 break;
1555 }
1556 return (ret);
1557 }
1558
1559 /*
1560 * Attempt to open the pool described by (dev) for use by (f).
1561 */
1562 static int
zfs_dev_open(struct open_file * f,...)1563 zfs_dev_open(struct open_file *f, ...)
1564 {
1565 va_list args;
1566 struct zfs_devdesc *dev;
1567 struct zfsmount *mount;
1568 spa_t *spa;
1569 int rv;
1570
1571 va_start(args, f);
1572 dev = va_arg(args, struct zfs_devdesc *);
1573 va_end(args);
1574
1575 if ((spa = spa_find_by_dev(dev)) == NULL)
1576 return (ENXIO);
1577
1578 STAILQ_FOREACH(mount, &zfsmount, next) {
1579 if (spa->spa_guid == mount->spa->spa_guid)
1580 break;
1581 }
1582
1583 rv = 0;
1584 /* This device is not set as currdev, mount us private copy. */
1585 if (mount == NULL)
1586 rv = zfs_mount(zfs_fmtdev(dev), NULL, (void **)&mount);
1587
1588 if (rv == 0) {
1589 f->f_devdata = mount;
1590 free(dev);
1591 }
1592 return (rv);
1593 }
1594
1595 static int
zfs_dev_close(struct open_file * f)1596 zfs_dev_close(struct open_file *f)
1597 {
1598 struct zfsmount *mnt, *mount;
1599
1600 mnt = f->f_devdata;
1601
1602 STAILQ_FOREACH(mount, &zfsmount, next) {
1603 if (mnt->spa->spa_guid == mount->spa->spa_guid)
1604 break;
1605 }
1606
1607 /*
1608 * devclose() will free f->f_devdata, but since we do have
1609 * pointer to zfsmount structure in f->f_devdata, and
1610 * zfs_unmount() will also free the zfsmount structure,
1611 * we will get double free. To prevent double free,
1612 * we must set f_devdata to NULL there.
1613 */
1614 if (mount != NULL)
1615 f->f_devdata = NULL;
1616
1617 return (0);
1618 }
1619
1620 static int
zfs_dev_strategy(void * devdata,int rw,daddr_t dblk,size_t size,char * buf,size_t * rsize)1621 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1622 {
1623
1624 return (ENOSYS);
1625 }
1626
1627 struct devsw zfs_dev = {
1628 .dv_name = "zfs",
1629 .dv_type = DEVT_ZFS,
1630 .dv_init = zfs_dev_init,
1631 .dv_strategy = zfs_dev_strategy,
1632 .dv_open = zfs_dev_open,
1633 .dv_close = zfs_dev_close,
1634 .dv_ioctl = noioctl,
1635 .dv_print = zfs_dev_print,
1636 .dv_cleanup = NULL
1637 };
1638
1639 int
zfs_parsedev(struct zfs_devdesc * dev,const char * devspec,const char ** path)1640 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
1641 {
1642 static char rootname[ZFS_MAXNAMELEN];
1643 static char poolname[ZFS_MAXNAMELEN];
1644 spa_t *spa;
1645 const char *end;
1646 const char *np;
1647 const char *sep;
1648 int rv;
1649
1650 np = devspec;
1651 if (*np != ':')
1652 return (EINVAL);
1653 np++;
1654 end = strrchr(np, ':');
1655 if (end == NULL)
1656 return (EINVAL);
1657 sep = strchr(np, '/');
1658 if (sep == NULL || sep >= end)
1659 sep = end;
1660 memcpy(poolname, np, sep - np);
1661 poolname[sep - np] = '\0';
1662 if (sep < end) {
1663 sep++;
1664 memcpy(rootname, sep, end - sep);
1665 rootname[end - sep] = '\0';
1666 }
1667 else
1668 rootname[0] = '\0';
1669
1670 spa = spa_find_by_name(poolname);
1671 if (!spa)
1672 return (ENXIO);
1673 dev->pool_guid = spa->spa_guid;
1674 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1675 if (rv != 0)
1676 return (rv);
1677 if (path != NULL)
1678 *path = (*end == '\0') ? end : end + 1;
1679 dev->dd.d_dev = &zfs_dev;
1680 return (0);
1681 }
1682
1683 char *
zfs_fmtdev(void * vdev)1684 zfs_fmtdev(void *vdev)
1685 {
1686 static char rootname[ZFS_MAXNAMELEN];
1687 static char buf[2 * ZFS_MAXNAMELEN + 8];
1688 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1689 spa_t *spa;
1690
1691 buf[0] = '\0';
1692 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1693 return (buf);
1694
1695 /* Do we have any pools? */
1696 spa = STAILQ_FIRST(&zfs_pools);
1697 if (spa == NULL)
1698 return (buf);
1699
1700 if (dev->pool_guid == 0)
1701 dev->pool_guid = spa->spa_guid;
1702 else
1703 spa = spa_find_by_guid(dev->pool_guid);
1704
1705 if (spa == NULL) {
1706 printf("ZFS: can't find pool by guid\n");
1707 return (buf);
1708 }
1709 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1710 printf("ZFS: can't find root filesystem\n");
1711 return (buf);
1712 }
1713 if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1714 printf("ZFS: can't find filesystem by guid\n");
1715 return (buf);
1716 }
1717
1718 if (rootname[0] == '\0')
1719 snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1720 spa->spa_name);
1721 else
1722 snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1723 spa->spa_name, rootname);
1724 return (buf);
1725 }
1726
1727 static int
split_devname(const char * name,char * poolname,size_t size,const char ** dsnamep)1728 split_devname(const char *name, char *poolname, size_t size,
1729 const char **dsnamep)
1730 {
1731 const char *dsname;
1732 size_t len;
1733
1734 ASSERT(name != NULL);
1735 ASSERT(poolname != NULL);
1736
1737 len = strlen(name);
1738 dsname = strchr(name, '/');
1739 if (dsname != NULL) {
1740 len = dsname - name;
1741 dsname++;
1742 } else
1743 dsname = "";
1744
1745 if (len + 1 > size)
1746 return (EINVAL);
1747
1748 strlcpy(poolname, name, len + 1);
1749
1750 if (dsnamep != NULL)
1751 *dsnamep = dsname;
1752
1753 return (0);
1754 }
1755
1756 int
zfs_list(const char * name)1757 zfs_list(const char *name)
1758 {
1759 static char poolname[ZFS_MAXNAMELEN];
1760 uint64_t objid;
1761 spa_t *spa;
1762 const char *dsname;
1763 int rv;
1764
1765 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1766 return (EINVAL);
1767
1768 spa = spa_find_by_name(poolname);
1769 if (!spa)
1770 return (ENXIO);
1771 rv = zfs_lookup_dataset(spa, dsname, &objid);
1772 if (rv != 0)
1773 return (rv);
1774
1775 return (zfs_list_dataset(spa, objid));
1776 }
1777
1778 void
init_zfs_boot_options(const char * currdev_in)1779 init_zfs_boot_options(const char *currdev_in)
1780 {
1781 char poolname[ZFS_MAXNAMELEN];
1782 char *beroot, *currdev;
1783 spa_t *spa;
1784 int currdev_len;
1785 const char *dsname;
1786
1787 currdev = NULL;
1788 currdev_len = strlen(currdev_in);
1789 if (currdev_len == 0)
1790 return;
1791 if (strncmp(currdev_in, "zfs:", 4) != 0)
1792 return;
1793 currdev = strdup(currdev_in);
1794 if (currdev == NULL)
1795 return;
1796 /* Remove the trailing : */
1797 currdev[currdev_len - 1] = '\0';
1798
1799 setenv("zfs_be_active", currdev, 1);
1800 setenv("zfs_be_currpage", "1", 1);
1801 /* Remove the last element (current bootenv) */
1802 beroot = strrchr(currdev, '/');
1803 if (beroot != NULL)
1804 beroot[0] = '\0';
1805 beroot = strchr(currdev, ':') + 1;
1806 setenv("zfs_be_root", beroot, 1);
1807
1808 if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1809 return;
1810
1811 spa = spa_find_by_name(poolname);
1812 if (spa == NULL)
1813 return;
1814
1815 zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1816 zfs_checkpoints_initial(spa, beroot, dsname);
1817
1818 free(currdev);
1819 }
1820
1821 static void
zfs_checkpoints_initial(spa_t * spa,const char * name,const char * dsname)1822 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1823 {
1824 char envname[32];
1825
1826 if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1827 snprintf(envname, sizeof(envname), "zpool_checkpoint");
1828 setenv(envname, name, 1);
1829
1830 spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1831 spa->spa_mos = &spa->spa_mos_checkpoint;
1832
1833 zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1834
1835 spa->spa_uberblock = &spa->spa_uberblock_master;
1836 spa->spa_mos = &spa->spa_mos_master;
1837 }
1838 }
1839
1840 static void
zfs_bootenv_initial(const char * envprefix,spa_t * spa,const char * rootname,const char * dsname,int checkpoint)1841 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1842 const char *dsname, int checkpoint)
1843 {
1844 char envname[32], envval[256];
1845 uint64_t objid;
1846 int bootenvs_idx, rv;
1847
1848 SLIST_INIT(&zfs_be_head);
1849 zfs_env_count = 0;
1850
1851 rv = zfs_lookup_dataset(spa, dsname, &objid);
1852 if (rv != 0)
1853 return;
1854
1855 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1856 bootenvs_idx = 0;
1857 /* Populate the initial environment variables */
1858 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1859 /* Enumerate all bootenvs for general usage */
1860 snprintf(envname, sizeof(envname), "%s[%d]",
1861 envprefix, bootenvs_idx);
1862 snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1863 checkpoint ? "!" : "", rootname, zfs_be->name);
1864 rv = setenv(envname, envval, 1);
1865 if (rv != 0)
1866 break;
1867 bootenvs_idx++;
1868 }
1869 snprintf(envname, sizeof(envname), "%s_count", envprefix);
1870 snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1871 setenv(envname, envval, 1);
1872
1873 /* Clean up the SLIST of ZFS BEs */
1874 while (!SLIST_EMPTY(&zfs_be_head)) {
1875 zfs_be = SLIST_FIRST(&zfs_be_head);
1876 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1877 free(zfs_be->name);
1878 free(zfs_be);
1879 }
1880 }
1881
1882 int
zfs_bootenv(const char * name)1883 zfs_bootenv(const char *name)
1884 {
1885 char poolname[ZFS_MAXNAMELEN], *root;
1886 const char *dsname;
1887 char becount[4];
1888 uint64_t objid;
1889 spa_t *spa;
1890 int rv, pages, perpage, currpage;
1891
1892 if (name == NULL)
1893 return (EINVAL);
1894 if ((root = getenv("zfs_be_root")) == NULL)
1895 return (EINVAL);
1896
1897 if (strcmp(name, root) != 0) {
1898 if (setenv("zfs_be_root", name, 1) != 0)
1899 return (ENOMEM);
1900 }
1901
1902 SLIST_INIT(&zfs_be_head);
1903 zfs_env_count = 0;
1904
1905 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1906 return (EINVAL);
1907
1908 spa = spa_find_by_name(poolname);
1909 if (!spa)
1910 return (ENXIO);
1911 rv = zfs_lookup_dataset(spa, dsname, &objid);
1912 if (rv != 0)
1913 return (rv);
1914 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1915
1916 /* Calculate and store the number of pages of BEs */
1917 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1918 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1919 snprintf(becount, 4, "%d", pages);
1920 if (setenv("zfs_be_pages", becount, 1) != 0)
1921 return (ENOMEM);
1922
1923 /* Roll over the page counter if it has exceeded the maximum */
1924 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1925 if (currpage > pages) {
1926 if (setenv("zfs_be_currpage", "1", 1) != 0)
1927 return (ENOMEM);
1928 }
1929
1930 /* Populate the menu environment variables */
1931 zfs_set_env();
1932
1933 /* Clean up the SLIST of ZFS BEs */
1934 while (!SLIST_EMPTY(&zfs_be_head)) {
1935 zfs_be = SLIST_FIRST(&zfs_be_head);
1936 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1937 free(zfs_be->name);
1938 free(zfs_be);
1939 }
1940
1941 return (rv);
1942 }
1943
1944 int
zfs_belist_add(const char * name,uint64_t value __unused)1945 zfs_belist_add(const char *name, uint64_t value __unused)
1946 {
1947
1948 /* Skip special datasets that start with a $ character */
1949 if (strncmp(name, "$", 1) == 0) {
1950 return (0);
1951 }
1952 /* Add the boot environment to the head of the SLIST */
1953 zfs_be = malloc(sizeof(struct zfs_be_entry));
1954 if (zfs_be == NULL) {
1955 return (ENOMEM);
1956 }
1957 zfs_be->name = strdup(name);
1958 if (zfs_be->name == NULL) {
1959 free(zfs_be);
1960 return (ENOMEM);
1961 }
1962 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1963 zfs_env_count++;
1964
1965 return (0);
1966 }
1967
1968 int
zfs_set_env(void)1969 zfs_set_env(void)
1970 {
1971 char envname[32], envval[256];
1972 char *beroot, *pagenum;
1973 int rv, page, ctr;
1974
1975 beroot = getenv("zfs_be_root");
1976 if (beroot == NULL) {
1977 return (1);
1978 }
1979
1980 pagenum = getenv("zfs_be_currpage");
1981 if (pagenum != NULL) {
1982 page = strtol(pagenum, NULL, 10);
1983 } else {
1984 page = 1;
1985 }
1986
1987 ctr = 1;
1988 rv = 0;
1989 zfs_env_index = ZFS_BE_FIRST;
1990 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1991 /* Skip to the requested page number */
1992 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1993 ctr++;
1994 continue;
1995 }
1996
1997 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1998 snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1999 rv = setenv(envname, envval, 1);
2000 if (rv != 0) {
2001 break;
2002 }
2003
2004 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2005 rv = setenv(envname, envval, 1);
2006 if (rv != 0){
2007 break;
2008 }
2009
2010 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2011 rv = setenv(envname, "set_bootenv", 1);
2012 if (rv != 0){
2013 break;
2014 }
2015
2016 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2017 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
2018 rv = setenv(envname, envval, 1);
2019 if (rv != 0){
2020 break;
2021 }
2022
2023 zfs_env_index++;
2024 if (zfs_env_index > ZFS_BE_LAST) {
2025 break;
2026 }
2027
2028 }
2029
2030 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2031 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2032 (void)unsetenv(envname);
2033 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2034 (void)unsetenv(envname);
2035 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2036 (void)unsetenv(envname);
2037 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2038 (void)unsetenv(envname);
2039 }
2040
2041 return (rv);
2042 }
2043