1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25 * Copyright 2015 RackTop Systems.
26 * Copyright (c) 2016, Intel Corporation.
27 */
28
29 /*
30 * Pool import support functions.
31 *
32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33 * these commands are expected to run in the global zone, we can assume
34 * that the devices are all readable when called.
35 *
36 * To import a pool, we rely on reading the configuration information from the
37 * ZFS label of each device. If we successfully read the label, then we
38 * organize the configuration information in the following hierarchy:
39 *
40 * pool guid -> toplevel vdev guid -> label txg
41 *
42 * Duplicate entries matching this same tuple will be discarded. Once we have
43 * examined every device, we pick the best label txg config for each toplevel
44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
45 * update any paths that have changed. Finally, we attempt to import the pool
46 * using our derived config, and record the results.
47 */
48
49 #include <ctype.h>
50 #include <dirent.h>
51 #include <errno.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <sys/stat.h>
59 #include <unistd.h>
60 #include <fcntl.h>
61 #include <sys/dktp/fdisk.h>
62 #include <sys/vdev_impl.h>
63 #include <sys/fs/zfs.h>
64 #include <sys/vdev_impl.h>
65
66 #include <thread_pool.h>
67 #include <libzutil.h>
68 #include <libnvpair.h>
69
70 #include "zutil_import.h"
71
72 #ifdef HAVE_LIBUDEV
73 #include <libudev.h>
74 #include <sched.h>
75 #endif
76 #include <blkid/blkid.h>
77
78 #define DEFAULT_IMPORT_PATH_SIZE 9
79 #define DEV_BYID_PATH "/dev/disk/by-id/"
80
81 static boolean_t
is_watchdog_dev(char * dev)82 is_watchdog_dev(char *dev)
83 {
84 /* For 'watchdog' dev */
85 if (strcmp(dev, "watchdog") == 0)
86 return (B_TRUE);
87
88 /* For 'watchdog<digit><whatever> */
89 if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
90 return (B_TRUE);
91
92 return (B_FALSE);
93 }
94
95 int
zfs_dev_flush(int fd)96 zfs_dev_flush(int fd)
97 {
98 return (ioctl(fd, BLKFLSBUF));
99 }
100
101 void
zpool_open_func(void * arg)102 zpool_open_func(void *arg)
103 {
104 rdsk_node_t *rn = arg;
105 libpc_handle_t *hdl = rn->rn_hdl;
106 struct stat64 statbuf;
107 nvlist_t *config;
108 char *bname, *dupname;
109 uint64_t vdev_guid = 0;
110 int error;
111 int num_labels = 0;
112 int fd;
113
114 /*
115 * Skip devices with well known prefixes there can be side effects
116 * when opening devices which need to be avoided.
117 *
118 * hpet - High Precision Event Timer
119 * watchdog - Watchdog must be closed in a special way.
120 */
121 dupname = zutil_strdup(hdl, rn->rn_name);
122 bname = basename(dupname);
123 error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
124 free(dupname);
125 if (error)
126 return;
127
128 /*
129 * Ignore failed stats. We only want regular files and block devices.
130 */
131 if (stat64(rn->rn_name, &statbuf) != 0 ||
132 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
133 return;
134
135 /*
136 * Preferentially open using O_DIRECT to bypass the block device
137 * cache which may be stale for multipath devices. An EINVAL errno
138 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
139 */
140 fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
141 if ((fd < 0) && (errno == EINVAL))
142 fd = open(rn->rn_name, O_RDONLY);
143 if ((fd < 0) && (errno == EACCES))
144 hdl->lpc_open_access_error = B_TRUE;
145 if (fd < 0)
146 return;
147
148 /*
149 * This file is too small to hold a zpool
150 */
151 if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
152 (void) close(fd);
153 return;
154 }
155
156 error = zpool_read_label(fd, &config, &num_labels);
157 if (error != 0) {
158 (void) close(fd);
159 return;
160 }
161
162 if (num_labels == 0) {
163 (void) close(fd);
164 nvlist_free(config);
165 return;
166 }
167
168 /*
169 * Check that the vdev is for the expected guid. Additional entries
170 * are speculatively added based on the paths stored in the labels.
171 * Entries with valid paths but incorrect guids must be removed.
172 */
173 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
174 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
175 (void) close(fd);
176 nvlist_free(config);
177 return;
178 }
179
180 (void) close(fd);
181
182 rn->rn_config = config;
183 rn->rn_num_labels = num_labels;
184
185 /*
186 * Add additional entries for paths described by this label.
187 */
188 if (rn->rn_labelpaths) {
189 char *path = NULL;
190 char *devid = NULL;
191 char *env = NULL;
192 rdsk_node_t *slice;
193 avl_index_t where;
194 int timeout;
195 int error;
196
197 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
198 return;
199
200 env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
201 if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
202 timeout < 0) {
203 timeout = DISK_LABEL_WAIT;
204 }
205
206 /*
207 * Allow devlinks to stabilize so all paths are available.
208 */
209 zpool_label_disk_wait(rn->rn_name, timeout);
210
211 if (path != NULL) {
212 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
213 slice->rn_name = zutil_strdup(hdl, path);
214 slice->rn_vdev_guid = vdev_guid;
215 slice->rn_avl = rn->rn_avl;
216 slice->rn_hdl = hdl;
217 slice->rn_order = IMPORT_ORDER_PREFERRED_1;
218 slice->rn_labelpaths = B_FALSE;
219 pthread_mutex_lock(rn->rn_lock);
220 if (avl_find(rn->rn_avl, slice, &where)) {
221 pthread_mutex_unlock(rn->rn_lock);
222 free(slice->rn_name);
223 free(slice);
224 } else {
225 avl_insert(rn->rn_avl, slice, where);
226 pthread_mutex_unlock(rn->rn_lock);
227 zpool_open_func(slice);
228 }
229 }
230
231 if (devid != NULL) {
232 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
233 error = asprintf(&slice->rn_name, "%s%s",
234 DEV_BYID_PATH, devid);
235 if (error == -1) {
236 free(slice);
237 return;
238 }
239
240 slice->rn_vdev_guid = vdev_guid;
241 slice->rn_avl = rn->rn_avl;
242 slice->rn_hdl = hdl;
243 slice->rn_order = IMPORT_ORDER_PREFERRED_2;
244 slice->rn_labelpaths = B_FALSE;
245 pthread_mutex_lock(rn->rn_lock);
246 if (avl_find(rn->rn_avl, slice, &where)) {
247 pthread_mutex_unlock(rn->rn_lock);
248 free(slice->rn_name);
249 free(slice);
250 } else {
251 avl_insert(rn->rn_avl, slice, where);
252 pthread_mutex_unlock(rn->rn_lock);
253 zpool_open_func(slice);
254 }
255 }
256 }
257 }
258
259 static char *
260 zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
261 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
262 "/dev/mapper", /* Use multipath devices before components */
263 "/dev/disk/by-partlabel", /* Single unique entry set by user */
264 "/dev/disk/by-partuuid", /* Generated partition uuid */
265 "/dev/disk/by-label", /* Custom persistent labels */
266 "/dev/disk/by-uuid", /* Single unique entry and persistent */
267 "/dev/disk/by-id", /* May be multiple entries and persistent */
268 "/dev/disk/by-path", /* Encodes physical location and persistent */
269 "/dev" /* UNSAFE device names will change */
270 };
271
272 const char * const *
zpool_default_search_paths(size_t * count)273 zpool_default_search_paths(size_t *count)
274 {
275 *count = DEFAULT_IMPORT_PATH_SIZE;
276 return ((const char * const *)zpool_default_import_path);
277 }
278
279 /*
280 * Given a full path to a device determine if that device appears in the
281 * import search path. If it does return the first match and store the
282 * index in the passed 'order' variable, otherwise return an error.
283 */
284 static int
zfs_path_order(char * name,int * order)285 zfs_path_order(char *name, int *order)
286 {
287 int i = 0, error = ENOENT;
288 char *dir, *env, *envdup;
289
290 env = getenv("ZPOOL_IMPORT_PATH");
291 if (env) {
292 envdup = strdup(env);
293 dir = strtok(envdup, ":");
294 while (dir) {
295 if (strncmp(name, dir, strlen(dir)) == 0) {
296 *order = i;
297 error = 0;
298 break;
299 }
300 dir = strtok(NULL, ":");
301 i++;
302 }
303 free(envdup);
304 } else {
305 for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
306 if (strncmp(name, zpool_default_import_path[i],
307 strlen(zpool_default_import_path[i])) == 0) {
308 *order = i;
309 error = 0;
310 break;
311 }
312 }
313 }
314
315 return (error);
316 }
317
318 /*
319 * Use libblkid to quickly enumerate all known zfs devices.
320 */
321 int
zpool_find_import_blkid(libpc_handle_t * hdl,pthread_mutex_t * lock,avl_tree_t ** slice_cache)322 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
323 avl_tree_t **slice_cache)
324 {
325 rdsk_node_t *slice;
326 blkid_cache cache;
327 blkid_dev_iterate iter;
328 blkid_dev dev;
329 avl_index_t where;
330 int error;
331
332 *slice_cache = NULL;
333
334 error = blkid_get_cache(&cache, NULL);
335 if (error != 0)
336 return (error);
337
338 error = blkid_probe_all_new(cache);
339 if (error != 0) {
340 blkid_put_cache(cache);
341 return (error);
342 }
343
344 iter = blkid_dev_iterate_begin(cache);
345 if (iter == NULL) {
346 blkid_put_cache(cache);
347 return (EINVAL);
348 }
349
350 error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
351 if (error != 0) {
352 blkid_dev_iterate_end(iter);
353 blkid_put_cache(cache);
354 return (error);
355 }
356
357 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
358 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
359 offsetof(rdsk_node_t, rn_node));
360
361 while (blkid_dev_next(iter, &dev) == 0) {
362 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
363 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
364 slice->rn_vdev_guid = 0;
365 slice->rn_lock = lock;
366 slice->rn_avl = *slice_cache;
367 slice->rn_hdl = hdl;
368 slice->rn_labelpaths = B_TRUE;
369
370 error = zfs_path_order(slice->rn_name, &slice->rn_order);
371 if (error == 0)
372 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
373 else
374 slice->rn_order = IMPORT_ORDER_DEFAULT;
375
376 pthread_mutex_lock(lock);
377 if (avl_find(*slice_cache, slice, &where)) {
378 free(slice->rn_name);
379 free(slice);
380 } else {
381 avl_insert(*slice_cache, slice, where);
382 }
383 pthread_mutex_unlock(lock);
384 }
385
386 blkid_dev_iterate_end(iter);
387 blkid_put_cache(cache);
388
389 return (0);
390 }
391
392 /*
393 * Linux persistent device strings for vdev labels
394 *
395 * based on libudev for consistency with libudev disk add/remove events
396 */
397
398 typedef struct vdev_dev_strs {
399 char vds_devid[128];
400 char vds_devphys[128];
401 } vdev_dev_strs_t;
402
403 #ifdef HAVE_LIBUDEV
404
405 /*
406 * Obtain the persistent device id string (describes what)
407 *
408 * used by ZED vdev matching for auto-{online,expand,replace}
409 */
410 int
zfs_device_get_devid(struct udev_device * dev,char * bufptr,size_t buflen)411 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
412 {
413 struct udev_list_entry *entry;
414 const char *bus;
415 char devbyid[MAXPATHLEN];
416
417 /* The bus based by-id path is preferred */
418 bus = udev_device_get_property_value(dev, "ID_BUS");
419
420 if (bus == NULL) {
421 const char *dm_uuid;
422
423 /*
424 * For multipath nodes use the persistent uuid based identifier
425 *
426 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
427 */
428 dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
429 if (dm_uuid != NULL) {
430 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
431 return (0);
432 }
433
434 /*
435 * For volumes use the persistent /dev/zvol/dataset identifier
436 */
437 entry = udev_device_get_devlinks_list_entry(dev);
438 while (entry != NULL) {
439 const char *name;
440
441 name = udev_list_entry_get_name(entry);
442 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
443 (void) strlcpy(bufptr, name, buflen);
444 return (0);
445 }
446 entry = udev_list_entry_get_next(entry);
447 }
448
449 /*
450 * NVME 'by-id' symlinks are similar to bus case
451 */
452 struct udev_device *parent;
453
454 parent = udev_device_get_parent_with_subsystem_devtype(dev,
455 "nvme", NULL);
456 if (parent != NULL)
457 bus = "nvme"; /* continue with bus symlink search */
458 else
459 return (ENODATA);
460 }
461
462 /*
463 * locate the bus specific by-id link
464 */
465 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
466 entry = udev_device_get_devlinks_list_entry(dev);
467 while (entry != NULL) {
468 const char *name;
469
470 name = udev_list_entry_get_name(entry);
471 if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
472 name += strlen(DEV_BYID_PATH);
473 (void) strlcpy(bufptr, name, buflen);
474 return (0);
475 }
476 entry = udev_list_entry_get_next(entry);
477 }
478
479 return (ENODATA);
480 }
481
482 /*
483 * Obtain the persistent physical location string (describes where)
484 *
485 * used by ZED vdev matching for auto-{online,expand,replace}
486 */
487 int
zfs_device_get_physical(struct udev_device * dev,char * bufptr,size_t buflen)488 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
489 {
490 const char *physpath = NULL;
491 struct udev_list_entry *entry;
492
493 /*
494 * Normal disks use ID_PATH for their physical path.
495 */
496 physpath = udev_device_get_property_value(dev, "ID_PATH");
497 if (physpath != NULL && strlen(physpath) > 0) {
498 (void) strlcpy(bufptr, physpath, buflen);
499 return (0);
500 }
501
502 /*
503 * Device mapper devices are virtual and don't have a physical
504 * path. For them we use ID_VDEV instead, which is setup via the
505 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
506 * to a virtual device. If you don't have vdev_id.conf setup,
507 * you cannot use multipath autoreplace with device mapper.
508 */
509 physpath = udev_device_get_property_value(dev, "ID_VDEV");
510 if (physpath != NULL && strlen(physpath) > 0) {
511 (void) strlcpy(bufptr, physpath, buflen);
512 return (0);
513 }
514
515 /*
516 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
517 */
518 entry = udev_device_get_devlinks_list_entry(dev);
519 while (entry != NULL) {
520 physpath = udev_list_entry_get_name(entry);
521 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
522 (void) strlcpy(bufptr, physpath, buflen);
523 return (0);
524 }
525 entry = udev_list_entry_get_next(entry);
526 }
527
528 /*
529 * For all other devices fallback to using the by-uuid name.
530 */
531 entry = udev_device_get_devlinks_list_entry(dev);
532 while (entry != NULL) {
533 physpath = udev_list_entry_get_name(entry);
534 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
535 (void) strlcpy(bufptr, physpath, buflen);
536 return (0);
537 }
538 entry = udev_list_entry_get_next(entry);
539 }
540
541 return (ENODATA);
542 }
543
544 /*
545 * A disk is considered a multipath whole disk when:
546 * DEVNAME key value has "dm-"
547 * DM_NAME key value has "mpath" prefix
548 * DM_UUID key exists
549 * ID_PART_TABLE_TYPE key does not exist or is not gpt
550 */
551 static boolean_t
udev_mpath_whole_disk(struct udev_device * dev)552 udev_mpath_whole_disk(struct udev_device *dev)
553 {
554 const char *devname, *type, *uuid;
555
556 devname = udev_device_get_property_value(dev, "DEVNAME");
557 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
558 uuid = udev_device_get_property_value(dev, "DM_UUID");
559
560 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
561 ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
562 (uuid != NULL)) {
563 return (B_TRUE);
564 }
565
566 return (B_FALSE);
567 }
568
569 static int
udev_device_is_ready(struct udev_device * dev)570 udev_device_is_ready(struct udev_device *dev)
571 {
572 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
573 return (udev_device_get_is_initialized(dev));
574 #else
575 /* wait for DEVLINKS property to be initialized */
576 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
577 #endif
578 }
579
580 #else
581
582 /* ARGSUSED */
583 int
zfs_device_get_devid(struct udev_device * dev,char * bufptr,size_t buflen)584 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
585 {
586 return (ENODATA);
587 }
588
589 /* ARGSUSED */
590 int
zfs_device_get_physical(struct udev_device * dev,char * bufptr,size_t buflen)591 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
592 {
593 return (ENODATA);
594 }
595
596 #endif /* HAVE_LIBUDEV */
597
598 /*
599 * Wait up to timeout_ms for udev to set up the device node. The device is
600 * considered ready when libudev determines it has been initialized, all of
601 * the device links have been verified to exist, and it has been allowed to
602 * settle. At this point the device the device can be accessed reliably.
603 * Depending on the complexity of the udev rules this process could take
604 * several seconds.
605 */
606 int
zpool_label_disk_wait(const char * path,int timeout_ms)607 zpool_label_disk_wait(const char *path, int timeout_ms)
608 {
609 #ifdef HAVE_LIBUDEV
610 struct udev *udev;
611 struct udev_device *dev = NULL;
612 char nodepath[MAXPATHLEN];
613 char *sysname = NULL;
614 int ret = ENODEV;
615 int settle_ms = 50;
616 long sleep_ms = 10;
617 hrtime_t start, settle;
618
619 if ((udev = udev_new()) == NULL)
620 return (ENXIO);
621
622 start = gethrtime();
623 settle = 0;
624
625 do {
626 if (sysname == NULL) {
627 if (realpath(path, nodepath) != NULL) {
628 sysname = strrchr(nodepath, '/') + 1;
629 } else {
630 (void) usleep(sleep_ms * MILLISEC);
631 continue;
632 }
633 }
634
635 dev = udev_device_new_from_subsystem_sysname(udev,
636 "block", sysname);
637 if ((dev != NULL) && udev_device_is_ready(dev)) {
638 struct udev_list_entry *links, *link = NULL;
639
640 ret = 0;
641 links = udev_device_get_devlinks_list_entry(dev);
642
643 udev_list_entry_foreach(link, links) {
644 struct stat64 statbuf;
645 const char *name;
646
647 name = udev_list_entry_get_name(link);
648 errno = 0;
649 if (stat64(name, &statbuf) == 0 && errno == 0)
650 continue;
651
652 settle = 0;
653 ret = ENODEV;
654 break;
655 }
656
657 if (ret == 0) {
658 if (settle == 0) {
659 settle = gethrtime();
660 } else if (NSEC2MSEC(gethrtime() - settle) >=
661 settle_ms) {
662 udev_device_unref(dev);
663 break;
664 }
665 }
666 }
667
668 udev_device_unref(dev);
669 (void) usleep(sleep_ms * MILLISEC);
670
671 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
672
673 udev_unref(udev);
674
675 return (ret);
676 #else
677 int settle_ms = 50;
678 long sleep_ms = 10;
679 hrtime_t start, settle;
680 struct stat64 statbuf;
681
682 start = gethrtime();
683 settle = 0;
684
685 do {
686 errno = 0;
687 if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
688 if (settle == 0)
689 settle = gethrtime();
690 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
691 return (0);
692 } else if (errno != ENOENT) {
693 return (errno);
694 }
695
696 usleep(sleep_ms * MILLISEC);
697 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
698
699 return (ENODEV);
700 #endif /* HAVE_LIBUDEV */
701 }
702
703 /*
704 * Encode the persistent devices strings
705 * used for the vdev disk label
706 */
707 static int
encode_device_strings(const char * path,vdev_dev_strs_t * ds,boolean_t wholedisk)708 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
709 boolean_t wholedisk)
710 {
711 #ifdef HAVE_LIBUDEV
712 struct udev *udev;
713 struct udev_device *dev = NULL;
714 char nodepath[MAXPATHLEN];
715 char *sysname;
716 int ret = ENODEV;
717 hrtime_t start;
718
719 if ((udev = udev_new()) == NULL)
720 return (ENXIO);
721
722 /* resolve path to a runtime device node instance */
723 if (realpath(path, nodepath) == NULL)
724 goto no_dev;
725
726 sysname = strrchr(nodepath, '/') + 1;
727
728 /*
729 * Wait up to 3 seconds for udev to set up the device node context
730 */
731 start = gethrtime();
732 do {
733 dev = udev_device_new_from_subsystem_sysname(udev, "block",
734 sysname);
735 if (dev == NULL)
736 goto no_dev;
737 if (udev_device_is_ready(dev))
738 break; /* udev ready */
739
740 udev_device_unref(dev);
741 dev = NULL;
742
743 if (NSEC2MSEC(gethrtime() - start) < 10)
744 (void) sched_yield(); /* yield/busy wait up to 10ms */
745 else
746 (void) usleep(10 * MILLISEC);
747
748 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
749
750 if (dev == NULL)
751 goto no_dev;
752
753 /*
754 * Only whole disks require extra device strings
755 */
756 if (!wholedisk && !udev_mpath_whole_disk(dev))
757 goto no_dev;
758
759 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
760 if (ret != 0)
761 goto no_dev_ref;
762
763 /* physical location string (optional) */
764 if (zfs_device_get_physical(dev, ds->vds_devphys,
765 sizeof (ds->vds_devphys)) != 0) {
766 ds->vds_devphys[0] = '\0'; /* empty string --> not available */
767 }
768
769 no_dev_ref:
770 udev_device_unref(dev);
771 no_dev:
772 udev_unref(udev);
773
774 return (ret);
775 #else
776 return (ENOENT);
777 #endif
778 }
779
780 /*
781 * Update a leaf vdev's persistent device strings
782 *
783 * - only applies for a dedicated leaf vdev (aka whole disk)
784 * - updated during pool create|add|attach|import
785 * - used for matching device matching during auto-{online,expand,replace}
786 * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
787 * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
788 *
789 * single device node example:
790 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
791 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
792 *
793 * multipath device node example:
794 * devid: 'dm-uuid-mpath-35000c5006304de3f'
795 *
796 * We also store the enclosure sysfs path for turning on enclosure LEDs
797 * (if applicable):
798 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
799 */
800 void
update_vdev_config_dev_strs(nvlist_t * nv)801 update_vdev_config_dev_strs(nvlist_t *nv)
802 {
803 vdev_dev_strs_t vds;
804 char *env, *type, *path;
805 uint64_t wholedisk = 0;
806 char *upath, *spath;
807
808 /*
809 * For the benefit of legacy ZFS implementations, allow
810 * for opting out of devid strings in the vdev label.
811 *
812 * example use:
813 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
814 *
815 * explanation:
816 * Older OpenZFS implementations had issues when attempting to
817 * display pool config VDEV names if a "devid" NVP value is
818 * present in the pool's config.
819 *
820 * For example, a pool that originated on illumos platform would
821 * have a devid value in the config and "zpool status" would fail
822 * when listing the config.
823 *
824 * A pool can be stripped of any "devid" values on import or
825 * prevented from adding them on zpool create|add by setting
826 * ZFS_VDEV_DEVID_OPT_OUT.
827 */
828 env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
829 if (env && (strtoul(env, NULL, 0) > 0 ||
830 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
831 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
832 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
833 return;
834 }
835
836 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
837 strcmp(type, VDEV_TYPE_DISK) != 0) {
838 return;
839 }
840 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
841 return;
842 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
843
844 /*
845 * Update device string values in the config nvlist.
846 */
847 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
848 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
849 if (vds.vds_devphys[0] != '\0') {
850 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
851 vds.vds_devphys);
852 }
853
854 /* Add enclosure sysfs path (if disk is in an enclosure). */
855 upath = zfs_get_underlying_path(path);
856 spath = zfs_get_enclosure_sysfs_path(upath);
857 if (spath)
858 nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
859 spath);
860 else
861 nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
862
863 free(upath);
864 free(spath);
865 } else {
866 /* Clear out any stale entries. */
867 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
868 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
869 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
870 }
871 }
872