1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/efi_partition.h>
33
34 #ifdef HAVE_LIBUDEV
35 #include <libudev.h>
36 #endif
37
38 #include <libzutil.h>
39
40 /*
41 * Append partition suffix to an otherwise fully qualified device path.
42 * This is used to generate the name the full path as its stored in
43 * ZPOOL_CONFIG_PATH for whole disk devices. On success the new length
44 * of 'path' will be returned on error a negative value is returned.
45 */
46 int
zfs_append_partition(char * path,size_t max_len)47 zfs_append_partition(char *path, size_t max_len)
48 {
49 int len = strlen(path);
50
51 if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
52 (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
53 if (len + 6 >= max_len)
54 return (-1);
55
56 (void) strcat(path, "-part1");
57 len += 6;
58 } else {
59 if (len + 2 >= max_len)
60 return (-1);
61
62 if (isdigit(path[len-1])) {
63 (void) strcat(path, "p1");
64 len += 2;
65 } else {
66 (void) strcat(path, "1");
67 len += 1;
68 }
69 }
70
71 return (len);
72 }
73
74 /*
75 * Remove partition suffix from a vdev path. Partition suffixes may take three
76 * forms: "-partX", "pX", or "X", where X is a string of digits. The second
77 * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
78 * third case only occurs when preceded by a string matching the regular
79 * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
80 *
81 * caller must free the returned string
82 */
83 char *
zfs_strip_partition(char * path)84 zfs_strip_partition(char *path)
85 {
86 char *tmp = strdup(path);
87 char *part = NULL, *d = NULL;
88 if (!tmp)
89 return (NULL);
90
91 if ((part = strstr(tmp, "-part")) && part != tmp) {
92 d = part + 5;
93 } else if ((part = strrchr(tmp, 'p')) &&
94 part > tmp + 1 && isdigit(*(part-1))) {
95 d = part + 1;
96 } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
97 tmp[1] == 'd') {
98 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
99 } else if (strncmp("xvd", tmp, 3) == 0) {
100 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
101 }
102 if (part && d && *d != '\0') {
103 for (; isdigit(*d); d++) { }
104 if (*d == '\0')
105 *part = '\0';
106 }
107
108 return (tmp);
109 }
110
111 /*
112 * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
113 *
114 * path: /dev/sda1
115 * returns: /dev/sda
116 *
117 * Returned string must be freed.
118 */
119 static char *
zfs_strip_partition_path(char * path)120 zfs_strip_partition_path(char *path)
121 {
122 char *newpath = strdup(path);
123 char *sd_offset;
124 char *new_sd;
125
126 if (!newpath)
127 return (NULL);
128
129 /* Point to "sda1" part of "/dev/sda1" */
130 sd_offset = strrchr(newpath, '/') + 1;
131
132 /* Get our new name "sda" */
133 new_sd = zfs_strip_partition(sd_offset);
134 if (!new_sd) {
135 free(newpath);
136 return (NULL);
137 }
138
139 /* Paste the "sda" where "sda1" was */
140 strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
141
142 /* Free temporary "sda" */
143 free(new_sd);
144
145 return (newpath);
146 }
147
148 /*
149 * Strip the unwanted portion of a device path.
150 */
151 char *
zfs_strip_path(char * path)152 zfs_strip_path(char *path)
153 {
154 return (strrchr(path, '/') + 1);
155 }
156
157 /*
158 * Allocate and return the underlying device name for a device mapper device.
159 * If a device mapper device maps to multiple devices, return the first device.
160 *
161 * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
162 * DM device (like /dev/disk/by-vdev/A0) are also allowed.
163 *
164 * Returns device name, or NULL on error or no match. If dm_name is not a DM
165 * device then return NULL.
166 *
167 * NOTE: The returned name string must be *freed*.
168 */
169 static char *
dm_get_underlying_path(const char * dm_name)170 dm_get_underlying_path(const char *dm_name)
171 {
172 DIR *dp = NULL;
173 struct dirent *ep;
174 char *realp;
175 char *tmp = NULL;
176 char *path = NULL;
177 char *dev_str;
178 int size;
179
180 if (dm_name == NULL)
181 return (NULL);
182
183 /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
184 realp = realpath(dm_name, NULL);
185 if (realp == NULL)
186 return (NULL);
187
188 /*
189 * If they preface 'dev' with a path (like "/dev") then strip it off.
190 * We just want the 'dm-N' part.
191 */
192 tmp = strrchr(realp, '/');
193 if (tmp != NULL)
194 dev_str = tmp + 1; /* +1 since we want the chr after '/' */
195 else
196 dev_str = tmp;
197
198 size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
199 if (size == -1 || !tmp)
200 goto end;
201
202 dp = opendir(tmp);
203 if (dp == NULL)
204 goto end;
205
206 /*
207 * Return first entry (that isn't itself a directory) in the
208 * directory containing device-mapper dependent (underlying)
209 * devices.
210 */
211 while ((ep = readdir(dp))) {
212 if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
213 size = asprintf(&path, "/dev/%s", ep->d_name);
214 break;
215 }
216 }
217
218 end:
219 if (dp != NULL)
220 closedir(dp);
221 free(tmp);
222 free(realp);
223 return (path);
224 }
225
226 /*
227 * Return B_TRUE if device is a device mapper or multipath device.
228 * Return B_FALSE if not.
229 */
230 boolean_t
zfs_dev_is_dm(const char * dev_name)231 zfs_dev_is_dm(const char *dev_name)
232 {
233
234 char *tmp;
235 tmp = dm_get_underlying_path(dev_name);
236 if (tmp == NULL)
237 return (B_FALSE);
238
239 free(tmp);
240 return (B_TRUE);
241 }
242
243 /*
244 * By "whole disk" we mean an entire physical disk (something we can
245 * label, toggle the write cache on, etc.) as opposed to the full
246 * capacity of a pseudo-device such as lofi or did. We act as if we
247 * are labeling the disk, which should be a pretty good test of whether
248 * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
249 * it isn't.
250 */
251 boolean_t
zfs_dev_is_whole_disk(const char * dev_name)252 zfs_dev_is_whole_disk(const char *dev_name)
253 {
254 struct dk_gpt *label;
255 int fd;
256
257 if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
258 return (B_FALSE);
259
260 if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
261 (void) close(fd);
262 return (B_FALSE);
263 }
264
265 efi_free(label);
266 (void) close(fd);
267
268 return (B_TRUE);
269 }
270
271 /*
272 * Lookup the underlying device for a device name
273 *
274 * Often you'll have a symlink to a device, a partition device,
275 * or a multipath device, and want to look up the underlying device.
276 * This function returns the underlying device name. If the device
277 * name is already the underlying device, then just return the same
278 * name. If the device is a DM device with multiple underlying devices
279 * then return the first one.
280 *
281 * For example:
282 *
283 * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
284 * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
285 * returns: /dev/sda
286 *
287 * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
288 * dev_name: /dev/mapper/mpatha
289 * returns: /dev/sda (first device)
290 *
291 * 3. /dev/sda (already the underlying device)
292 * dev_name: /dev/sda
293 * returns: /dev/sda
294 *
295 * 4. /dev/dm-3 (mapped to /dev/sda)
296 * dev_name: /dev/dm-3
297 * returns: /dev/sda
298 *
299 * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
300 * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
301 * returns: /dev/sdb
302 *
303 * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
304 * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
305 * returns: /dev/sda
306 *
307 * Returns underlying device name, or NULL on error or no match.
308 *
309 * NOTE: The returned name string must be *freed*.
310 */
311 char *
zfs_get_underlying_path(const char * dev_name)312 zfs_get_underlying_path(const char *dev_name)
313 {
314 char *name = NULL;
315 char *tmp;
316
317 if (dev_name == NULL)
318 return (NULL);
319
320 tmp = dm_get_underlying_path(dev_name);
321
322 /* dev_name not a DM device, so just un-symlinkize it */
323 if (tmp == NULL)
324 tmp = realpath(dev_name, NULL);
325
326 if (tmp != NULL) {
327 name = zfs_strip_partition_path(tmp);
328 free(tmp);
329 }
330
331 return (name);
332 }
333
334 /*
335 * Given a dev name like "sda", return the full enclosure sysfs path to
336 * the disk. You can also pass in the name with "/dev" prepended
337 * to it (like /dev/sda).
338 *
339 * For example, disk "sda" in enclosure slot 1:
340 * dev: "sda"
341 * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
342 *
343 * 'dev' must be a non-devicemapper device.
344 *
345 * Returned string must be freed.
346 */
347 char *
zfs_get_enclosure_sysfs_path(const char * dev_name)348 zfs_get_enclosure_sysfs_path(const char *dev_name)
349 {
350 DIR *dp = NULL;
351 struct dirent *ep;
352 char buf[MAXPATHLEN];
353 char *tmp1 = NULL;
354 char *tmp2 = NULL;
355 char *tmp3 = NULL;
356 char *path = NULL;
357 size_t size;
358 int tmpsize;
359
360 if (dev_name == NULL)
361 return (NULL);
362
363 /* If they preface 'dev' with a path (like "/dev") then strip it off */
364 tmp1 = strrchr(dev_name, '/');
365 if (tmp1 != NULL)
366 dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
367
368 tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
369 if (tmpsize == -1 || tmp1 == NULL) {
370 tmp1 = NULL;
371 goto end;
372 }
373
374 dp = opendir(tmp1);
375 if (dp == NULL) {
376 tmp1 = NULL; /* To make free() at the end a NOP */
377 goto end;
378 }
379
380 /*
381 * Look though all sysfs entries in /sys/block/<dev>/device for
382 * the enclosure symlink.
383 */
384 while ((ep = readdir(dp))) {
385 /* Ignore everything that's not our enclosure_device link */
386 if (strstr(ep->d_name, "enclosure_device") == NULL)
387 continue;
388
389 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
390 tmp2 == NULL)
391 break;
392
393 size = readlink(tmp2, buf, sizeof (buf));
394
395 /* Did readlink fail or crop the link name? */
396 if (size == -1 || size >= sizeof (buf)) {
397 free(tmp2);
398 tmp2 = NULL; /* To make free() at the end a NOP */
399 break;
400 }
401
402 /*
403 * We got a valid link. readlink() doesn't terminate strings
404 * so we have to do it.
405 */
406 buf[size] = '\0';
407
408 /*
409 * Our link will look like:
410 *
411 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
412 *
413 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
414 */
415 tmp3 = strstr(buf, "enclosure");
416 if (tmp3 == NULL)
417 break;
418
419 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
420 /* If asprintf() fails, 'path' is undefined */
421 path = NULL;
422 break;
423 }
424
425 if (path == NULL)
426 break;
427 }
428
429 end:
430 free(tmp2);
431 free(tmp1);
432
433 if (dp != NULL)
434 closedir(dp);
435
436 return (path);
437 }
438
439 #ifdef HAVE_LIBUDEV
440
441 /*
442 * A disk is considered a multipath whole disk when:
443 * DEVNAME key value has "dm-"
444 * DM_NAME key value has "mpath" prefix
445 * DM_UUID key exists
446 * ID_PART_TABLE_TYPE key does not exist or is not gpt
447 */
448 static boolean_t
udev_mpath_whole_disk(struct udev_device * dev)449 udev_mpath_whole_disk(struct udev_device *dev)
450 {
451 const char *devname, *type, *uuid;
452
453 devname = udev_device_get_property_value(dev, "DEVNAME");
454 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
455 uuid = udev_device_get_property_value(dev, "DM_UUID");
456
457 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
458 ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
459 (uuid != NULL)) {
460 return (B_TRUE);
461 }
462
463 return (B_FALSE);
464 }
465
466 /*
467 * Check if a disk is effectively a multipath whole disk
468 */
469 boolean_t
is_mpath_whole_disk(const char * path)470 is_mpath_whole_disk(const char *path)
471 {
472 struct udev *udev;
473 struct udev_device *dev = NULL;
474 char nodepath[MAXPATHLEN];
475 char *sysname;
476 boolean_t wholedisk = B_FALSE;
477
478 if (realpath(path, nodepath) == NULL)
479 return (B_FALSE);
480 sysname = strrchr(nodepath, '/') + 1;
481 if (strncmp(sysname, "dm-", 3) != 0)
482 return (B_FALSE);
483 if ((udev = udev_new()) == NULL)
484 return (B_FALSE);
485 if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
486 sysname)) == NULL) {
487 udev_device_unref(dev);
488 return (B_FALSE);
489 }
490
491 wholedisk = udev_mpath_whole_disk(dev);
492
493 udev_device_unref(dev);
494 return (wholedisk);
495 }
496
497 #else /* HAVE_LIBUDEV */
498
499 /* ARGSUSED */
500 boolean_t
is_mpath_whole_disk(const char * path)501 is_mpath_whole_disk(const char *path)
502 {
503 return (B_FALSE);
504 }
505
506 #endif /* HAVE_LIBUDEV */
507