1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/efi_partition.h>
33 
34 #ifdef HAVE_LIBUDEV
35 #include <libudev.h>
36 #endif
37 
38 #include <libzutil.h>
39 
40 /*
41  * Append partition suffix to an otherwise fully qualified device path.
42  * This is used to generate the name the full path as its stored in
43  * ZPOOL_CONFIG_PATH for whole disk devices.  On success the new length
44  * of 'path' will be returned on error a negative value is returned.
45  */
46 int
zfs_append_partition(char * path,size_t max_len)47 zfs_append_partition(char *path, size_t max_len)
48 {
49 	int len = strlen(path);
50 
51 	if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
52 	    (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
53 		if (len + 6 >= max_len)
54 			return (-1);
55 
56 		(void) strcat(path, "-part1");
57 		len += 6;
58 	} else {
59 		if (len + 2 >= max_len)
60 			return (-1);
61 
62 		if (isdigit(path[len-1])) {
63 			(void) strcat(path, "p1");
64 			len += 2;
65 		} else {
66 			(void) strcat(path, "1");
67 			len += 1;
68 		}
69 	}
70 
71 	return (len);
72 }
73 
74 /*
75  * Remove partition suffix from a vdev path.  Partition suffixes may take three
76  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
77  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
78  * third case only occurs when preceded by a string matching the regular
79  * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
80  *
81  * caller must free the returned string
82  */
83 char *
zfs_strip_partition(char * path)84 zfs_strip_partition(char *path)
85 {
86 	char *tmp = strdup(path);
87 	char *part = NULL, *d = NULL;
88 	if (!tmp)
89 		return (NULL);
90 
91 	if ((part = strstr(tmp, "-part")) && part != tmp) {
92 		d = part + 5;
93 	} else if ((part = strrchr(tmp, 'p')) &&
94 	    part > tmp + 1 && isdigit(*(part-1))) {
95 		d = part + 1;
96 	} else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
97 	    tmp[1] == 'd') {
98 		for (d = &tmp[2]; isalpha(*d); part = ++d) { }
99 	} else if (strncmp("xvd", tmp, 3) == 0) {
100 		for (d = &tmp[3]; isalpha(*d); part = ++d) { }
101 	}
102 	if (part && d && *d != '\0') {
103 		for (; isdigit(*d); d++) { }
104 		if (*d == '\0')
105 			*part = '\0';
106 	}
107 
108 	return (tmp);
109 }
110 
111 /*
112  * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
113  *
114  * path:	/dev/sda1
115  * returns:	/dev/sda
116  *
117  * Returned string must be freed.
118  */
119 static char *
zfs_strip_partition_path(char * path)120 zfs_strip_partition_path(char *path)
121 {
122 	char *newpath = strdup(path);
123 	char *sd_offset;
124 	char *new_sd;
125 
126 	if (!newpath)
127 		return (NULL);
128 
129 	/* Point to "sda1" part of "/dev/sda1" */
130 	sd_offset = strrchr(newpath, '/') + 1;
131 
132 	/* Get our new name "sda" */
133 	new_sd = zfs_strip_partition(sd_offset);
134 	if (!new_sd) {
135 		free(newpath);
136 		return (NULL);
137 	}
138 
139 	/* Paste the "sda" where "sda1" was */
140 	strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
141 
142 	/* Free temporary "sda" */
143 	free(new_sd);
144 
145 	return (newpath);
146 }
147 
148 /*
149  * Strip the unwanted portion of a device path.
150  */
151 char *
zfs_strip_path(char * path)152 zfs_strip_path(char *path)
153 {
154 	return (strrchr(path, '/') + 1);
155 }
156 
157 /*
158  * Allocate and return the underlying device name for a device mapper device.
159  * If a device mapper device maps to multiple devices, return the first device.
160  *
161  * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
162  * DM device (like /dev/disk/by-vdev/A0) are also allowed.
163  *
164  * Returns device name, or NULL on error or no match.  If dm_name is not a DM
165  * device then return NULL.
166  *
167  * NOTE: The returned name string must be *freed*.
168  */
169 static char *
dm_get_underlying_path(const char * dm_name)170 dm_get_underlying_path(const char *dm_name)
171 {
172 	DIR *dp = NULL;
173 	struct dirent *ep;
174 	char *realp;
175 	char *tmp = NULL;
176 	char *path = NULL;
177 	char *dev_str;
178 	int size;
179 
180 	if (dm_name == NULL)
181 		return (NULL);
182 
183 	/* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
184 	realp = realpath(dm_name, NULL);
185 	if (realp == NULL)
186 		return (NULL);
187 
188 	/*
189 	 * If they preface 'dev' with a path (like "/dev") then strip it off.
190 	 * We just want the 'dm-N' part.
191 	 */
192 	tmp = strrchr(realp, '/');
193 	if (tmp != NULL)
194 		dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
195 	else
196 		dev_str = tmp;
197 
198 	size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
199 	if (size == -1 || !tmp)
200 		goto end;
201 
202 	dp = opendir(tmp);
203 	if (dp == NULL)
204 		goto end;
205 
206 	/*
207 	 * Return first entry (that isn't itself a directory) in the
208 	 * directory containing device-mapper dependent (underlying)
209 	 * devices.
210 	 */
211 	while ((ep = readdir(dp))) {
212 		if (ep->d_type != DT_DIR) {	/* skip "." and ".." dirs */
213 			size = asprintf(&path, "/dev/%s", ep->d_name);
214 			break;
215 		}
216 	}
217 
218 end:
219 	if (dp != NULL)
220 		closedir(dp);
221 	free(tmp);
222 	free(realp);
223 	return (path);
224 }
225 
226 /*
227  * Return B_TRUE if device is a device mapper or multipath device.
228  * Return B_FALSE if not.
229  */
230 boolean_t
zfs_dev_is_dm(const char * dev_name)231 zfs_dev_is_dm(const char *dev_name)
232 {
233 
234 	char *tmp;
235 	tmp = dm_get_underlying_path(dev_name);
236 	if (tmp == NULL)
237 		return (B_FALSE);
238 
239 	free(tmp);
240 	return (B_TRUE);
241 }
242 
243 /*
244  * By "whole disk" we mean an entire physical disk (something we can
245  * label, toggle the write cache on, etc.) as opposed to the full
246  * capacity of a pseudo-device such as lofi or did.  We act as if we
247  * are labeling the disk, which should be a pretty good test of whether
248  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
249  * it isn't.
250  */
251 boolean_t
zfs_dev_is_whole_disk(const char * dev_name)252 zfs_dev_is_whole_disk(const char *dev_name)
253 {
254 	struct dk_gpt *label;
255 	int fd;
256 
257 	if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
258 		return (B_FALSE);
259 
260 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
261 		(void) close(fd);
262 		return (B_FALSE);
263 	}
264 
265 	efi_free(label);
266 	(void) close(fd);
267 
268 	return (B_TRUE);
269 }
270 
271 /*
272  * Lookup the underlying device for a device name
273  *
274  * Often you'll have a symlink to a device, a partition device,
275  * or a multipath device, and want to look up the underlying device.
276  * This function returns the underlying device name.  If the device
277  * name is already the underlying device, then just return the same
278  * name.  If the device is a DM device with multiple underlying devices
279  * then return the first one.
280  *
281  * For example:
282  *
283  * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
284  * dev_name:	/dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
285  * returns:	/dev/sda
286  *
287  * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
288  * dev_name:	/dev/mapper/mpatha
289  * returns:	/dev/sda (first device)
290  *
291  * 3. /dev/sda (already the underlying device)
292  * dev_name:	/dev/sda
293  * returns:	/dev/sda
294  *
295  * 4. /dev/dm-3 (mapped to /dev/sda)
296  * dev_name:	/dev/dm-3
297  * returns:	/dev/sda
298  *
299  * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
300  * dev_name:	/dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
301  * returns:	/dev/sdb
302  *
303  * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
304  * dev_name:	/dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
305  * returns:	/dev/sda
306  *
307  * Returns underlying device name, or NULL on error or no match.
308  *
309  * NOTE: The returned name string must be *freed*.
310  */
311 char *
zfs_get_underlying_path(const char * dev_name)312 zfs_get_underlying_path(const char *dev_name)
313 {
314 	char *name = NULL;
315 	char *tmp;
316 
317 	if (dev_name == NULL)
318 		return (NULL);
319 
320 	tmp = dm_get_underlying_path(dev_name);
321 
322 	/* dev_name not a DM device, so just un-symlinkize it */
323 	if (tmp == NULL)
324 		tmp = realpath(dev_name, NULL);
325 
326 	if (tmp != NULL) {
327 		name = zfs_strip_partition_path(tmp);
328 		free(tmp);
329 	}
330 
331 	return (name);
332 }
333 
334 /*
335  * Given a dev name like "sda", return the full enclosure sysfs path to
336  * the disk.  You can also pass in the name with "/dev" prepended
337  * to it (like /dev/sda).
338  *
339  * For example, disk "sda" in enclosure slot 1:
340  *     dev:            "sda"
341  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
342  *
343  * 'dev' must be a non-devicemapper device.
344  *
345  * Returned string must be freed.
346  */
347 char *
zfs_get_enclosure_sysfs_path(const char * dev_name)348 zfs_get_enclosure_sysfs_path(const char *dev_name)
349 {
350 	DIR *dp = NULL;
351 	struct dirent *ep;
352 	char buf[MAXPATHLEN];
353 	char *tmp1 = NULL;
354 	char *tmp2 = NULL;
355 	char *tmp3 = NULL;
356 	char *path = NULL;
357 	size_t size;
358 	int tmpsize;
359 
360 	if (dev_name == NULL)
361 		return (NULL);
362 
363 	/* If they preface 'dev' with a path (like "/dev") then strip it off */
364 	tmp1 = strrchr(dev_name, '/');
365 	if (tmp1 != NULL)
366 		dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
367 
368 	tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
369 	if (tmpsize == -1 || tmp1 == NULL) {
370 		tmp1 = NULL;
371 		goto end;
372 	}
373 
374 	dp = opendir(tmp1);
375 	if (dp == NULL) {
376 		tmp1 = NULL;	/* To make free() at the end a NOP */
377 		goto end;
378 	}
379 
380 	/*
381 	 * Look though all sysfs entries in /sys/block/<dev>/device for
382 	 * the enclosure symlink.
383 	 */
384 	while ((ep = readdir(dp))) {
385 		/* Ignore everything that's not our enclosure_device link */
386 		if (strstr(ep->d_name, "enclosure_device") == NULL)
387 			continue;
388 
389 		if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
390 		    tmp2 == NULL)
391 			break;
392 
393 		size = readlink(tmp2, buf, sizeof (buf));
394 
395 		/* Did readlink fail or crop the link name? */
396 		if (size == -1 || size >= sizeof (buf)) {
397 			free(tmp2);
398 			tmp2 = NULL;	/* To make free() at the end a NOP */
399 			break;
400 		}
401 
402 		/*
403 		 * We got a valid link.  readlink() doesn't terminate strings
404 		 * so we have to do it.
405 		 */
406 		buf[size] = '\0';
407 
408 		/*
409 		 * Our link will look like:
410 		 *
411 		 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
412 		 *
413 		 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
414 		 */
415 		tmp3 = strstr(buf, "enclosure");
416 		if (tmp3 == NULL)
417 			break;
418 
419 		if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
420 			/* If asprintf() fails, 'path' is undefined */
421 			path = NULL;
422 			break;
423 		}
424 
425 		if (path == NULL)
426 			break;
427 	}
428 
429 end:
430 	free(tmp2);
431 	free(tmp1);
432 
433 	if (dp != NULL)
434 		closedir(dp);
435 
436 	return (path);
437 }
438 
439 #ifdef HAVE_LIBUDEV
440 
441 /*
442  * A disk is considered a multipath whole disk when:
443  *	DEVNAME key value has "dm-"
444  *	DM_NAME key value has "mpath" prefix
445  *	DM_UUID key exists
446  *	ID_PART_TABLE_TYPE key does not exist or is not gpt
447  */
448 static boolean_t
udev_mpath_whole_disk(struct udev_device * dev)449 udev_mpath_whole_disk(struct udev_device *dev)
450 {
451 	const char *devname, *type, *uuid;
452 
453 	devname = udev_device_get_property_value(dev, "DEVNAME");
454 	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
455 	uuid = udev_device_get_property_value(dev, "DM_UUID");
456 
457 	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
458 	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
459 	    (uuid != NULL)) {
460 		return (B_TRUE);
461 	}
462 
463 	return (B_FALSE);
464 }
465 
466 /*
467  * Check if a disk is effectively a multipath whole disk
468  */
469 boolean_t
is_mpath_whole_disk(const char * path)470 is_mpath_whole_disk(const char *path)
471 {
472 	struct udev *udev;
473 	struct udev_device *dev = NULL;
474 	char nodepath[MAXPATHLEN];
475 	char *sysname;
476 	boolean_t wholedisk = B_FALSE;
477 
478 	if (realpath(path, nodepath) == NULL)
479 		return (B_FALSE);
480 	sysname = strrchr(nodepath, '/') + 1;
481 	if (strncmp(sysname, "dm-", 3) != 0)
482 		return (B_FALSE);
483 	if ((udev = udev_new()) == NULL)
484 		return (B_FALSE);
485 	if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
486 	    sysname)) == NULL) {
487 		udev_device_unref(dev);
488 		return (B_FALSE);
489 	}
490 
491 	wholedisk = udev_mpath_whole_disk(dev);
492 
493 	udev_device_unref(dev);
494 	return (wholedisk);
495 }
496 
497 #else /* HAVE_LIBUDEV */
498 
499 /* ARGSUSED */
500 boolean_t
is_mpath_whole_disk(const char * path)501 is_mpath_whole_disk(const char *path)
502 {
503 	return (B_FALSE);
504 }
505 
506 #endif /* HAVE_LIBUDEV */
507