1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  * Copyright (c) 2017 Datto Inc.
26  * Copyright 2017 RackTop Systems.
27  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
28  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
29  */
30 
31 /*
32  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
33  * It has the following characteristics:
34  *
35  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
36  *  threads.  This is accomplished primarily by avoiding global data
37  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
38  *  process to have multiple libzfs "instances".  Therefore, we store
39  *  our few pieces of data (e.g. the file descriptor) in global
40  *  variables.  The fd is reference-counted so that the libzfs_core
41  *  library can be "initialized" multiple times (e.g. by different
42  *  consumers within the same process).
43  *
44  *  - Committed Interface.  The libzfs_core interface will be committed,
45  *  therefore consumers can compile against it and be confident that
46  *  their code will continue to work on future releases of this code.
47  *  Currently, the interface is Evolving (not Committed), but we intend
48  *  to commit to it once it is more complete and we determine that it
49  *  meets the needs of all consumers.
50  *
51  *  - Programmatic Error Handling.  libzfs_core communicates errors with
52  *  defined error numbers, and doesn't print anything to stdout/stderr.
53  *
54  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
55  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
56  *  between libzfs_core functions and ioctls to ZFS_DEV.
57  *
58  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
59  *  with kernel ioctls, and kernel ioctls are general atomic, each
60  *  libzfs_core function is atomic.  For example, creating multiple
61  *  snapshots with a single call to lzc_snapshot() is atomic -- it
62  *  can't fail with only some of the requested snapshots created, even
63  *  in the event of power loss or system crash.
64  *
65  *  - Continued libzfs Support.  Some higher-level operations (e.g.
66  *  support for "zfs send -R") are too complicated to fit the scope of
67  *  libzfs_core.  This functionality will continue to live in libzfs.
68  *  Where appropriate, libzfs will use the underlying atomic operations
69  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
70  *  zfs receive" by using individual "send one snapshot", rename,
71  *  destroy, and "receive one snapshot" operations in libzfs_core.
72  *  /sbin/zfs and /sbin/zpool will link with both libzfs and
73  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
74  *  since that will be the supported, stable interface going forwards.
75  */
76 
77 #include <libzfs_core.h>
78 #include <ctype.h>
79 #include <unistd.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #ifdef ZFS_DEBUG
83 #include <stdio.h>
84 #endif
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <pthread.h>
88 #include <libzutil.h>
89 #include <sys/nvpair.h>
90 #include <sys/param.h>
91 #include <sys/types.h>
92 #include <sys/stat.h>
93 #include <sys/zfs_ioctl.h>
94 
95 static int g_fd = -1;
96 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
97 static int g_refcount;
98 
99 #ifdef ZFS_DEBUG
100 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
101 static zfs_errno_t fail_ioc_err;
102 
103 static void
libzfs_core_debug_ioc(void)104 libzfs_core_debug_ioc(void)
105 {
106 	/*
107 	 * To test running newer user space binaries with kernel's
108 	 * that don't yet support an ioctl or a new ioctl arg we
109 	 * provide an override to intentionally fail an ioctl.
110 	 *
111 	 * USAGE:
112 	 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
113 	 *
114 	 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
115 	 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
116 	 *
117 	 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
118 	 * cannot checkpoint 'tank': the loaded zfs module does not support
119 	 * this operation. A reboot may be required to enable this operation.
120 	 */
121 	if (fail_ioc_cmd == ZFS_IOC_LAST) {
122 		char *ioc_test = getenv("ZFS_IOC_TEST");
123 		unsigned int ioc_num = 0, ioc_err = 0;
124 
125 		if (ioc_test != NULL &&
126 		    sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
127 		    ioc_num < ZFS_IOC_LAST)  {
128 			fail_ioc_cmd = ioc_num;
129 			fail_ioc_err = ioc_err;
130 		}
131 	}
132 }
133 #endif
134 
135 int
libzfs_core_init(void)136 libzfs_core_init(void)
137 {
138 	(void) pthread_mutex_lock(&g_lock);
139 	if (g_refcount == 0) {
140 		g_fd = open(ZFS_DEV, O_RDWR);
141 		if (g_fd < 0) {
142 			(void) pthread_mutex_unlock(&g_lock);
143 			return (errno);
144 		}
145 	}
146 	g_refcount++;
147 
148 #ifdef ZFS_DEBUG
149 	libzfs_core_debug_ioc();
150 #endif
151 	(void) pthread_mutex_unlock(&g_lock);
152 	return (0);
153 }
154 
155 void
libzfs_core_fini(void)156 libzfs_core_fini(void)
157 {
158 	(void) pthread_mutex_lock(&g_lock);
159 	ASSERT3S(g_refcount, >, 0);
160 
161 	if (g_refcount > 0)
162 		g_refcount--;
163 
164 	if (g_refcount == 0 && g_fd != -1) {
165 		(void) close(g_fd);
166 		g_fd = -1;
167 	}
168 	(void) pthread_mutex_unlock(&g_lock);
169 }
170 
171 static int
lzc_ioctl(zfs_ioc_t ioc,const char * name,nvlist_t * source,nvlist_t ** resultp)172 lzc_ioctl(zfs_ioc_t ioc, const char *name,
173     nvlist_t *source, nvlist_t **resultp)
174 {
175 	zfs_cmd_t zc = {"\0"};
176 	int error = 0;
177 	char *packed = NULL;
178 	size_t size = 0;
179 
180 	ASSERT3S(g_refcount, >, 0);
181 	VERIFY3S(g_fd, !=, -1);
182 
183 #ifdef ZFS_DEBUG
184 	if (ioc == fail_ioc_cmd)
185 		return (fail_ioc_err);
186 #endif
187 
188 	if (name != NULL)
189 		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
190 
191 	if (source != NULL) {
192 		packed = fnvlist_pack(source, &size);
193 		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
194 		zc.zc_nvlist_src_size = size;
195 	}
196 
197 	if (resultp != NULL) {
198 		*resultp = NULL;
199 		if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
200 			zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
201 			    ZCP_ARG_MEMLIMIT);
202 		} else {
203 			zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
204 		}
205 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
206 		    malloc(zc.zc_nvlist_dst_size);
207 		if (zc.zc_nvlist_dst == (uint64_t)0) {
208 			error = ENOMEM;
209 			goto out;
210 		}
211 	}
212 
213 	while (zfs_ioctl_fd(g_fd, ioc, &zc) != 0) {
214 		/*
215 		 * If ioctl exited with ENOMEM, we retry the ioctl after
216 		 * increasing the size of the destination nvlist.
217 		 *
218 		 * Channel programs that exit with ENOMEM ran over the
219 		 * lua memory sandbox; they should not be retried.
220 		 */
221 		if (errno == ENOMEM && resultp != NULL &&
222 		    ioc != ZFS_IOC_CHANNEL_PROGRAM) {
223 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
224 			zc.zc_nvlist_dst_size *= 2;
225 			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
226 			    malloc(zc.zc_nvlist_dst_size);
227 			if (zc.zc_nvlist_dst == (uint64_t)0) {
228 				error = ENOMEM;
229 				goto out;
230 			}
231 		} else {
232 			error = errno;
233 			break;
234 		}
235 	}
236 	if (zc.zc_nvlist_dst_filled) {
237 		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
238 		    zc.zc_nvlist_dst_size);
239 	}
240 
241 out:
242 	if (packed != NULL)
243 		fnvlist_pack_free(packed, size);
244 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
245 	return (error);
246 }
247 
248 int
lzc_create(const char * fsname,enum lzc_dataset_type type,nvlist_t * props,uint8_t * wkeydata,uint_t wkeylen)249 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
250     uint8_t *wkeydata, uint_t wkeylen)
251 {
252 	int error;
253 	nvlist_t *hidden_args = NULL;
254 	nvlist_t *args = fnvlist_alloc();
255 
256 	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
257 	if (props != NULL)
258 		fnvlist_add_nvlist(args, "props", props);
259 
260 	if (wkeydata != NULL) {
261 		hidden_args = fnvlist_alloc();
262 		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
263 		    wkeylen);
264 		fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
265 	}
266 
267 	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
268 	nvlist_free(hidden_args);
269 	nvlist_free(args);
270 	return (error);
271 }
272 
273 int
lzc_clone(const char * fsname,const char * origin,nvlist_t * props)274 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
275 {
276 	int error;
277 	nvlist_t *hidden_args = NULL;
278 	nvlist_t *args = fnvlist_alloc();
279 
280 	fnvlist_add_string(args, "origin", origin);
281 	if (props != NULL)
282 		fnvlist_add_nvlist(args, "props", props);
283 	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
284 	nvlist_free(hidden_args);
285 	nvlist_free(args);
286 	return (error);
287 }
288 
289 int
lzc_promote(const char * fsname,char * snapnamebuf,int snapnamelen)290 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
291 {
292 	/*
293 	 * The promote ioctl is still legacy, so we need to construct our
294 	 * own zfs_cmd_t rather than using lzc_ioctl().
295 	 */
296 	zfs_cmd_t zc = {"\0"};
297 
298 	ASSERT3S(g_refcount, >, 0);
299 	VERIFY3S(g_fd, !=, -1);
300 
301 	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
302 	if (zfs_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
303 		int error = errno;
304 		if (error == EEXIST && snapnamebuf != NULL)
305 			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
306 		return (error);
307 	}
308 	return (0);
309 }
310 
311 int
lzc_rename(const char * source,const char * target)312 lzc_rename(const char *source, const char *target)
313 {
314 	zfs_cmd_t zc = {"\0"};
315 	int error;
316 
317 	ASSERT3S(g_refcount, >, 0);
318 	VERIFY3S(g_fd, !=, -1);
319 	(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
320 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
321 	error = zfs_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
322 	if (error != 0)
323 		error = errno;
324 	return (error);
325 }
326 int
lzc_destroy(const char * fsname)327 lzc_destroy(const char *fsname)
328 {
329 	int error;
330 	nvlist_t *args = fnvlist_alloc();
331 	error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
332 	nvlist_free(args);
333 	return (error);
334 }
335 
336 /*
337  * Creates snapshots.
338  *
339  * The keys in the snaps nvlist are the snapshots to be created.
340  * They must all be in the same pool.
341  *
342  * The props nvlist is properties to set.  Currently only user properties
343  * are supported.  { user:prop_name -> string value }
344  *
345  * The returned results nvlist will have an entry for each snapshot that failed.
346  * The value will be the (int32) error code.
347  *
348  * The return value will be 0 if all snapshots were created, otherwise it will
349  * be the errno of a (unspecified) snapshot that failed.
350  */
351 int
lzc_snapshot(nvlist_t * snaps,nvlist_t * props,nvlist_t ** errlist)352 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
353 {
354 	nvpair_t *elem;
355 	nvlist_t *args;
356 	int error;
357 	char pool[ZFS_MAX_DATASET_NAME_LEN];
358 
359 	*errlist = NULL;
360 
361 	/* determine the pool name */
362 	elem = nvlist_next_nvpair(snaps, NULL);
363 	if (elem == NULL)
364 		return (0);
365 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
366 	pool[strcspn(pool, "/@")] = '\0';
367 
368 	args = fnvlist_alloc();
369 	fnvlist_add_nvlist(args, "snaps", snaps);
370 	if (props != NULL)
371 		fnvlist_add_nvlist(args, "props", props);
372 
373 	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
374 	nvlist_free(args);
375 
376 	return (error);
377 }
378 
379 /*
380  * Destroys snapshots.
381  *
382  * The keys in the snaps nvlist are the snapshots to be destroyed.
383  * They must all be in the same pool.
384  *
385  * Snapshots that do not exist will be silently ignored.
386  *
387  * If 'defer' is not set, and a snapshot has user holds or clones, the
388  * destroy operation will fail and none of the snapshots will be
389  * destroyed.
390  *
391  * If 'defer' is set, and a snapshot has user holds or clones, it will be
392  * marked for deferred destruction, and will be destroyed when the last hold
393  * or clone is removed/destroyed.
394  *
395  * The return value will be 0 if all snapshots were destroyed (or marked for
396  * later destruction if 'defer' is set) or didn't exist to begin with.
397  *
398  * Otherwise the return value will be the errno of a (unspecified) snapshot
399  * that failed, no snapshots will be destroyed, and the errlist will have an
400  * entry for each snapshot that failed.  The value in the errlist will be
401  * the (int32) error code.
402  */
403 int
lzc_destroy_snaps(nvlist_t * snaps,boolean_t defer,nvlist_t ** errlist)404 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
405 {
406 	nvpair_t *elem;
407 	nvlist_t *args;
408 	int error;
409 	char pool[ZFS_MAX_DATASET_NAME_LEN];
410 
411 	/* determine the pool name */
412 	elem = nvlist_next_nvpair(snaps, NULL);
413 	if (elem == NULL)
414 		return (0);
415 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
416 	pool[strcspn(pool, "/@")] = '\0';
417 
418 	args = fnvlist_alloc();
419 	fnvlist_add_nvlist(args, "snaps", snaps);
420 	if (defer)
421 		fnvlist_add_boolean(args, "defer");
422 
423 	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
424 	nvlist_free(args);
425 
426 	return (error);
427 }
428 
429 int
lzc_snaprange_space(const char * firstsnap,const char * lastsnap,uint64_t * usedp)430 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
431     uint64_t *usedp)
432 {
433 	nvlist_t *args;
434 	nvlist_t *result;
435 	int err;
436 	char fs[ZFS_MAX_DATASET_NAME_LEN];
437 	char *atp;
438 
439 	/* determine the fs name */
440 	(void) strlcpy(fs, firstsnap, sizeof (fs));
441 	atp = strchr(fs, '@');
442 	if (atp == NULL)
443 		return (EINVAL);
444 	*atp = '\0';
445 
446 	args = fnvlist_alloc();
447 	fnvlist_add_string(args, "firstsnap", firstsnap);
448 
449 	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
450 	nvlist_free(args);
451 	if (err == 0)
452 		*usedp = fnvlist_lookup_uint64(result, "used");
453 	fnvlist_free(result);
454 
455 	return (err);
456 }
457 
458 boolean_t
lzc_exists(const char * dataset)459 lzc_exists(const char *dataset)
460 {
461 	/*
462 	 * The objset_stats ioctl is still legacy, so we need to construct our
463 	 * own zfs_cmd_t rather than using lzc_ioctl().
464 	 */
465 	zfs_cmd_t zc = {"\0"};
466 
467 	ASSERT3S(g_refcount, >, 0);
468 	VERIFY3S(g_fd, !=, -1);
469 
470 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
471 	return (zfs_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
472 }
473 
474 /*
475  * outnvl is unused.
476  * It was added to preserve the function signature in case it is
477  * needed in the future.
478  */
479 /*ARGSUSED*/
480 int
lzc_sync(const char * pool_name,nvlist_t * innvl,nvlist_t ** outnvl)481 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
482 {
483 	return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
484 }
485 
486 /*
487  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
488  * the snapshot can not be destroyed.  (However, it can be marked for deletion
489  * by lzc_destroy_snaps(defer=B_TRUE).)
490  *
491  * The keys in the nvlist are snapshot names.
492  * The snapshots must all be in the same pool.
493  * The value is the name of the hold (string type).
494  *
495  * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
496  * In this case, when the cleanup_fd is closed (including on process
497  * termination), the holds will be released.  If the system is shut down
498  * uncleanly, the holds will be released when the pool is next opened
499  * or imported.
500  *
501  * Holds for snapshots which don't exist will be skipped and have an entry
502  * added to errlist, but will not cause an overall failure.
503  *
504  * The return value will be 0 if all holds, for snapshots that existed,
505  * were successfully created.
506  *
507  * Otherwise the return value will be the errno of a (unspecified) hold that
508  * failed and no holds will be created.
509  *
510  * In all cases the errlist will have an entry for each hold that failed
511  * (name = snapshot), with its value being the error code (int32).
512  */
513 int
lzc_hold(nvlist_t * holds,int cleanup_fd,nvlist_t ** errlist)514 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
515 {
516 	char pool[ZFS_MAX_DATASET_NAME_LEN];
517 	nvlist_t *args;
518 	nvpair_t *elem;
519 	int error;
520 
521 	/* determine the pool name */
522 	elem = nvlist_next_nvpair(holds, NULL);
523 	if (elem == NULL)
524 		return (0);
525 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
526 	pool[strcspn(pool, "/@")] = '\0';
527 
528 	args = fnvlist_alloc();
529 	fnvlist_add_nvlist(args, "holds", holds);
530 	if (cleanup_fd != -1)
531 		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
532 
533 	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
534 	nvlist_free(args);
535 	return (error);
536 }
537 
538 /*
539  * Release "user holds" on snapshots.  If the snapshot has been marked for
540  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
541  * any clones, and all the user holds are removed, then the snapshot will be
542  * destroyed.
543  *
544  * The keys in the nvlist are snapshot names.
545  * The snapshots must all be in the same pool.
546  * The value is an nvlist whose keys are the holds to remove.
547  *
548  * Holds which failed to release because they didn't exist will have an entry
549  * added to errlist, but will not cause an overall failure.
550  *
551  * The return value will be 0 if the nvl holds was empty or all holds that
552  * existed, were successfully removed.
553  *
554  * Otherwise the return value will be the errno of a (unspecified) hold that
555  * failed to release and no holds will be released.
556  *
557  * In all cases the errlist will have an entry for each hold that failed to
558  * to release.
559  */
560 int
lzc_release(nvlist_t * holds,nvlist_t ** errlist)561 lzc_release(nvlist_t *holds, nvlist_t **errlist)
562 {
563 	char pool[ZFS_MAX_DATASET_NAME_LEN];
564 	nvpair_t *elem;
565 
566 	/* determine the pool name */
567 	elem = nvlist_next_nvpair(holds, NULL);
568 	if (elem == NULL)
569 		return (0);
570 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
571 	pool[strcspn(pool, "/@")] = '\0';
572 
573 	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
574 }
575 
576 /*
577  * Retrieve list of user holds on the specified snapshot.
578  *
579  * On success, *holdsp will be set to an nvlist which the caller must free.
580  * The keys are the names of the holds, and the value is the creation time
581  * of the hold (uint64) in seconds since the epoch.
582  */
583 int
lzc_get_holds(const char * snapname,nvlist_t ** holdsp)584 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
585 {
586 	return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
587 }
588 
589 /*
590  * Generate a zfs send stream for the specified snapshot and write it to
591  * the specified file descriptor.
592  *
593  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
594  *
595  * If "from" is NULL, a full (non-incremental) stream will be sent.
596  * If "from" is non-NULL, it must be the full name of a snapshot or
597  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
598  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
599  * bookmark must represent an earlier point in the history of "snapname").
600  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
601  * or it can be the origin of "snapname"'s filesystem, or an earlier
602  * snapshot in the origin, etc.
603  *
604  * "fd" is the file descriptor to write the send stream to.
605  *
606  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
607  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
608  * records with drr_blksz > 128K.
609  *
610  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
611  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
612  * which the receiving system must support (as indicated by support
613  * for the "embedded_data" feature).
614  *
615  * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
616  * compressed WRITE records for blocks which are compressed on disk and in
617  * memory.  If the lz4_compress feature is active on the sending system, then
618  * the receiving system must have that feature enabled as well.
619  *
620  * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
621  * datasets, by sending data exactly as it exists on disk.  This allows backups
622  * to be taken even if encryption keys are not currently loaded.
623  */
624 int
lzc_send(const char * snapname,const char * from,int fd,enum lzc_send_flags flags)625 lzc_send(const char *snapname, const char *from, int fd,
626     enum lzc_send_flags flags)
627 {
628 	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
629 	    NULL));
630 }
631 
632 int
lzc_send_redacted(const char * snapname,const char * from,int fd,enum lzc_send_flags flags,const char * redactbook)633 lzc_send_redacted(const char *snapname, const char *from, int fd,
634     enum lzc_send_flags flags, const char *redactbook)
635 {
636 	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
637 	    redactbook));
638 }
639 
640 int
lzc_send_resume(const char * snapname,const char * from,int fd,enum lzc_send_flags flags,uint64_t resumeobj,uint64_t resumeoff)641 lzc_send_resume(const char *snapname, const char *from, int fd,
642     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
643 {
644 	return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
645 	    resumeoff, NULL));
646 }
647 
648 /*
649  * snapname: The name of the "tosnap", or the snapshot whose contents we are
650  * sending.
651  * from: The name of the "fromsnap", or the incremental source.
652  * fd: File descriptor to write the stream to.
653  * flags: flags that determine features to be used by the stream.
654  * resumeobj: Object to resume from, for resuming send
655  * resumeoff: Offset to resume from, for resuming send.
656  * redactnv: nvlist of string -> boolean(ignored) containing the names of all
657  * the snapshots that we should redact with respect to.
658  * redactbook: Name of the redaction bookmark to create.
659  */
660 int
lzc_send_resume_redacted(const char * snapname,const char * from,int fd,enum lzc_send_flags flags,uint64_t resumeobj,uint64_t resumeoff,const char * redactbook)661 lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
662     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
663     const char *redactbook)
664 {
665 	nvlist_t *args;
666 	int err;
667 
668 	args = fnvlist_alloc();
669 	fnvlist_add_int32(args, "fd", fd);
670 	if (from != NULL)
671 		fnvlist_add_string(args, "fromsnap", from);
672 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
673 		fnvlist_add_boolean(args, "largeblockok");
674 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
675 		fnvlist_add_boolean(args, "embedok");
676 	if (flags & LZC_SEND_FLAG_COMPRESS)
677 		fnvlist_add_boolean(args, "compressok");
678 	if (flags & LZC_SEND_FLAG_RAW)
679 		fnvlist_add_boolean(args, "rawok");
680 	if (flags & LZC_SEND_FLAG_SAVED)
681 		fnvlist_add_boolean(args, "savedok");
682 	if (resumeobj != 0 || resumeoff != 0) {
683 		fnvlist_add_uint64(args, "resume_object", resumeobj);
684 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
685 	}
686 	if (redactbook != NULL)
687 		fnvlist_add_string(args, "redactbook", redactbook);
688 
689 	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
690 	nvlist_free(args);
691 	return (err);
692 }
693 
694 /*
695  * "from" can be NULL, a snapshot, or a bookmark.
696  *
697  * If from is NULL, a full (non-incremental) stream will be estimated.  This
698  * is calculated very efficiently.
699  *
700  * If from is a snapshot, lzc_send_space uses the deadlists attached to
701  * each snapshot to efficiently estimate the stream size.
702  *
703  * If from is a bookmark, the indirect blocks in the destination snapshot
704  * are traversed, looking for blocks with a birth time since the creation TXG of
705  * the snapshot this bookmark was created from.  This will result in
706  * significantly more I/O and be less efficient than a send space estimation on
707  * an equivalent snapshot. This process is also used if redact_snaps is
708  * non-null.
709  */
710 int
lzc_send_space_resume_redacted(const char * snapname,const char * from,enum lzc_send_flags flags,uint64_t resumeobj,uint64_t resumeoff,uint64_t resume_bytes,const char * redactbook,int fd,uint64_t * spacep)711 lzc_send_space_resume_redacted(const char *snapname, const char *from,
712     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
713     uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
714 {
715 	nvlist_t *args;
716 	nvlist_t *result;
717 	int err;
718 
719 	args = fnvlist_alloc();
720 	if (from != NULL)
721 		fnvlist_add_string(args, "from", from);
722 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
723 		fnvlist_add_boolean(args, "largeblockok");
724 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
725 		fnvlist_add_boolean(args, "embedok");
726 	if (flags & LZC_SEND_FLAG_COMPRESS)
727 		fnvlist_add_boolean(args, "compressok");
728 	if (flags & LZC_SEND_FLAG_RAW)
729 		fnvlist_add_boolean(args, "rawok");
730 	if (resumeobj != 0 || resumeoff != 0) {
731 		fnvlist_add_uint64(args, "resume_object", resumeobj);
732 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
733 		fnvlist_add_uint64(args, "bytes", resume_bytes);
734 	}
735 	if (redactbook != NULL)
736 		fnvlist_add_string(args, "redactbook", redactbook);
737 	if (fd != -1)
738 		fnvlist_add_int32(args, "fd", fd);
739 
740 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
741 	nvlist_free(args);
742 	if (err == 0)
743 		*spacep = fnvlist_lookup_uint64(result, "space");
744 	nvlist_free(result);
745 	return (err);
746 }
747 
748 int
lzc_send_space(const char * snapname,const char * from,enum lzc_send_flags flags,uint64_t * spacep)749 lzc_send_space(const char *snapname, const char *from,
750     enum lzc_send_flags flags, uint64_t *spacep)
751 {
752 	return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
753 	    NULL, -1, spacep));
754 }
755 
756 static int
recv_read(int fd,void * buf,int ilen)757 recv_read(int fd, void *buf, int ilen)
758 {
759 	char *cp = buf;
760 	int rv;
761 	int len = ilen;
762 
763 	do {
764 		rv = read(fd, cp, len);
765 		cp += rv;
766 		len -= rv;
767 	} while (rv > 0);
768 
769 	if (rv < 0 || len != 0)
770 		return (EIO);
771 
772 	return (0);
773 }
774 
775 /*
776  * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
777  * legacy ZFS_IOC_RECV user/kernel interface.  The new interface supports all
778  * stream options but is currently only used for resumable streams.  This way
779  * updated user space utilities will interoperate with older kernel modules.
780  *
781  * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
782  */
783 static int
recv_impl(const char * snapname,nvlist_t * recvdprops,nvlist_t * localprops,uint8_t * wkeydata,uint_t wkeylen,const char * origin,boolean_t force,boolean_t resumable,boolean_t raw,int input_fd,const dmu_replay_record_t * begin_record,uint64_t * read_bytes,uint64_t * errflags,nvlist_t ** errors)784 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
785     uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
786     boolean_t resumable, boolean_t raw, int input_fd,
787     const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
788     uint64_t *errflags, nvlist_t **errors)
789 {
790 	dmu_replay_record_t drr;
791 	char fsname[MAXPATHLEN];
792 	char *atp;
793 	int error;
794 	boolean_t payload = B_FALSE;
795 
796 	ASSERT3S(g_refcount, >, 0);
797 	VERIFY3S(g_fd, !=, -1);
798 
799 	/* Set 'fsname' to the name of containing filesystem */
800 	(void) strlcpy(fsname, snapname, sizeof (fsname));
801 	atp = strchr(fsname, '@');
802 	if (atp == NULL)
803 		return (EINVAL);
804 	*atp = '\0';
805 
806 	/* If the fs does not exist, try its parent. */
807 	if (!lzc_exists(fsname)) {
808 		char *slashp = strrchr(fsname, '/');
809 		if (slashp == NULL)
810 			return (ENOENT);
811 		*slashp = '\0';
812 	}
813 
814 	/*
815 	 * The begin_record is normally a non-byteswapped BEGIN record.
816 	 * For resumable streams it may be set to any non-byteswapped
817 	 * dmu_replay_record_t.
818 	 */
819 	if (begin_record == NULL) {
820 		error = recv_read(input_fd, &drr, sizeof (drr));
821 		if (error != 0)
822 			return (error);
823 	} else {
824 		drr = *begin_record;
825 		payload = (begin_record->drr_payloadlen != 0);
826 	}
827 
828 	/*
829 	 * All receives with a payload should use the new interface.
830 	 */
831 	if (resumable || raw || wkeydata != NULL || payload) {
832 		nvlist_t *outnvl = NULL;
833 		nvlist_t *innvl = fnvlist_alloc();
834 
835 		fnvlist_add_string(innvl, "snapname", snapname);
836 
837 		if (recvdprops != NULL)
838 			fnvlist_add_nvlist(innvl, "props", recvdprops);
839 
840 		if (localprops != NULL)
841 			fnvlist_add_nvlist(innvl, "localprops", localprops);
842 
843 		if (wkeydata != NULL) {
844 			/*
845 			 * wkeydata must be placed in the special
846 			 * ZPOOL_HIDDEN_ARGS nvlist so that it
847 			 * will not be printed to the zpool history.
848 			 */
849 			nvlist_t *hidden_args = fnvlist_alloc();
850 			fnvlist_add_uint8_array(hidden_args, "wkeydata",
851 			    wkeydata, wkeylen);
852 			fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
853 			    hidden_args);
854 			nvlist_free(hidden_args);
855 		}
856 
857 		if (origin != NULL && strlen(origin))
858 			fnvlist_add_string(innvl, "origin", origin);
859 
860 		fnvlist_add_byte_array(innvl, "begin_record",
861 		    (uchar_t *)&drr, sizeof (drr));
862 
863 		fnvlist_add_int32(innvl, "input_fd", input_fd);
864 
865 		if (force)
866 			fnvlist_add_boolean(innvl, "force");
867 
868 		if (resumable)
869 			fnvlist_add_boolean(innvl, "resumable");
870 
871 
872 		error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
873 
874 		if (error == 0 && read_bytes != NULL)
875 			error = nvlist_lookup_uint64(outnvl, "read_bytes",
876 			    read_bytes);
877 
878 		if (error == 0 && errflags != NULL)
879 			error = nvlist_lookup_uint64(outnvl, "error_flags",
880 			    errflags);
881 
882 		if (error == 0 && errors != NULL) {
883 			nvlist_t *nvl;
884 			error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
885 			if (error == 0)
886 				*errors = fnvlist_dup(nvl);
887 		}
888 
889 		fnvlist_free(innvl);
890 		fnvlist_free(outnvl);
891 	} else {
892 		zfs_cmd_t zc = {"\0"};
893 		char *packed = NULL;
894 		size_t size;
895 
896 		ASSERT3S(g_refcount, >, 0);
897 
898 		(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
899 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
900 
901 		if (recvdprops != NULL) {
902 			packed = fnvlist_pack(recvdprops, &size);
903 			zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
904 			zc.zc_nvlist_src_size = size;
905 		}
906 
907 		if (localprops != NULL) {
908 			packed = fnvlist_pack(localprops, &size);
909 			zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
910 			zc.zc_nvlist_conf_size = size;
911 		}
912 
913 		if (origin != NULL)
914 			(void) strlcpy(zc.zc_string, origin,
915 			    sizeof (zc.zc_string));
916 
917 		ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
918 		zc.zc_begin_record = drr.drr_u.drr_begin;
919 		zc.zc_guid = force;
920 		zc.zc_cookie = input_fd;
921 		zc.zc_cleanup_fd = -1;
922 		zc.zc_action_handle = 0;
923 
924 		zc.zc_nvlist_dst_size = 128 * 1024;
925 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
926 		    malloc(zc.zc_nvlist_dst_size);
927 
928 		error = zfs_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
929 		if (error != 0) {
930 			error = errno;
931 		} else {
932 			if (read_bytes != NULL)
933 				*read_bytes = zc.zc_cookie;
934 
935 			if (errflags != NULL)
936 				*errflags = zc.zc_obj;
937 
938 			if (errors != NULL)
939 				VERIFY0(nvlist_unpack(
940 				    (void *)(uintptr_t)zc.zc_nvlist_dst,
941 				    zc.zc_nvlist_dst_size, errors, KM_SLEEP));
942 		}
943 
944 		if (packed != NULL)
945 			fnvlist_pack_free(packed, size);
946 		free((void *)(uintptr_t)zc.zc_nvlist_dst);
947 	}
948 
949 	return (error);
950 }
951 
952 /*
953  * The simplest receive case: receive from the specified fd, creating the
954  * specified snapshot.  Apply the specified properties as "received" properties
955  * (which can be overridden by locally-set properties).  If the stream is a
956  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
957  * flag will cause the target filesystem to be rolled back or destroyed if
958  * necessary to receive.
959  *
960  * Return 0 on success or an errno on failure.
961  *
962  * Note: this interface does not work on dedup'd streams
963  * (those with DMU_BACKUP_FEATURE_DEDUP).
964  */
965 int
lzc_receive(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,boolean_t raw,int fd)966 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
967     boolean_t force, boolean_t raw, int fd)
968 {
969 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
970 	    B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
971 }
972 
973 /*
974  * Like lzc_receive, but if the receive fails due to premature stream
975  * termination, the intermediate state will be preserved on disk.  In this
976  * case, ECKSUM will be returned.  The receive may subsequently be resumed
977  * with a resuming send stream generated by lzc_send_resume().
978  */
979 int
lzc_receive_resumable(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,boolean_t raw,int fd)980 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
981     boolean_t force, boolean_t raw, int fd)
982 {
983 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
984 	    B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
985 }
986 
987 /*
988  * Like lzc_receive, but allows the caller to read the begin record and then to
989  * pass it in.  That could be useful if the caller wants to derive, for example,
990  * the snapname or the origin parameters based on the information contained in
991  * the begin record.
992  * The begin record must be in its original form as read from the stream,
993  * in other words, it should not be byteswapped.
994  *
995  * The 'resumable' parameter allows to obtain the same behavior as with
996  * lzc_receive_resumable.
997  */
998 int
lzc_receive_with_header(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,boolean_t resumable,boolean_t raw,int fd,const dmu_replay_record_t * begin_record)999 lzc_receive_with_header(const char *snapname, nvlist_t *props,
1000     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1001     int fd, const dmu_replay_record_t *begin_record)
1002 {
1003 	if (begin_record == NULL)
1004 		return (EINVAL);
1005 
1006 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1007 	    resumable, raw, fd, begin_record, NULL, NULL, NULL));
1008 }
1009 
1010 /*
1011  * Like lzc_receive, but allows the caller to pass all supported arguments
1012  * and retrieve all values returned.  The only additional input parameter
1013  * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1014  *
1015  * The following parameters all provide return values.  Several may be set
1016  * in the failure case and will contain additional information.
1017  *
1018  * The 'read_bytes' value will be set to the total number of bytes read.
1019  *
1020  * The 'errflags' value will contain zprop_errflags_t flags which are
1021  * used to describe any failures.
1022  *
1023  * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
1024  *
1025  * The 'errors' nvlist contains an entry for each unapplied received
1026  * property.  Callers are responsible for freeing this nvlist.
1027  */
lzc_receive_one(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,boolean_t resumable,boolean_t raw,int input_fd,const dmu_replay_record_t * begin_record,int cleanup_fd,uint64_t * read_bytes,uint64_t * errflags,uint64_t * action_handle,nvlist_t ** errors)1028 int lzc_receive_one(const char *snapname, nvlist_t *props,
1029     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1030     int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1031     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1032     nvlist_t **errors)
1033 {
1034 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1035 	    resumable, raw, input_fd, begin_record,
1036 	    read_bytes, errflags, errors));
1037 }
1038 
1039 /*
1040  * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1041  * argument.
1042  *
1043  * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1044  * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1045  * this nvlist
1046  */
lzc_receive_with_cmdprops(const char * snapname,nvlist_t * props,nvlist_t * cmdprops,uint8_t * wkeydata,uint_t wkeylen,const char * origin,boolean_t force,boolean_t resumable,boolean_t raw,int input_fd,const dmu_replay_record_t * begin_record,int cleanup_fd,uint64_t * read_bytes,uint64_t * errflags,uint64_t * action_handle,nvlist_t ** errors)1047 int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1048     nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1049     boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1050     const dmu_replay_record_t *begin_record, int cleanup_fd,
1051     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1052     nvlist_t **errors)
1053 {
1054 	return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1055 	    force, resumable, raw, input_fd, begin_record,
1056 	    read_bytes, errflags, errors));
1057 }
1058 
1059 /*
1060  * Roll back this filesystem or volume to its most recent snapshot.
1061  * If snapnamebuf is not NULL, it will be filled in with the name
1062  * of the most recent snapshot.
1063  * Note that the latest snapshot may change if a new one is concurrently
1064  * created or the current one is destroyed.  lzc_rollback_to can be used
1065  * to roll back to a specific latest snapshot.
1066  *
1067  * Return 0 on success or an errno on failure.
1068  */
1069 int
lzc_rollback(const char * fsname,char * snapnamebuf,int snapnamelen)1070 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1071 {
1072 	nvlist_t *args;
1073 	nvlist_t *result;
1074 	int err;
1075 
1076 	args = fnvlist_alloc();
1077 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1078 	nvlist_free(args);
1079 	if (err == 0 && snapnamebuf != NULL) {
1080 		const char *snapname = fnvlist_lookup_string(result, "target");
1081 		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
1082 	}
1083 	nvlist_free(result);
1084 
1085 	return (err);
1086 }
1087 
1088 /*
1089  * Roll back this filesystem or volume to the specified snapshot,
1090  * if possible.
1091  *
1092  * Return 0 on success or an errno on failure.
1093  */
1094 int
lzc_rollback_to(const char * fsname,const char * snapname)1095 lzc_rollback_to(const char *fsname, const char *snapname)
1096 {
1097 	nvlist_t *args;
1098 	nvlist_t *result;
1099 	int err;
1100 
1101 	args = fnvlist_alloc();
1102 	fnvlist_add_string(args, "target", snapname);
1103 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1104 	nvlist_free(args);
1105 	nvlist_free(result);
1106 	return (err);
1107 }
1108 
1109 /*
1110  * Creates new bookmarks from existing snapshot or bookmark.
1111  *
1112  * The bookmarks nvlist maps from the full name of the new bookmark to
1113  * the full name of the source snapshot or bookmark.
1114  * All the bookmarks and snapshots must be in the same pool.
1115  * The new bookmarks names must be unique.
1116  * => see function dsl_bookmark_create_nvl_validate
1117  *
1118  * The returned results nvlist will have an entry for each bookmark that failed.
1119  * The value will be the (int32) error code.
1120  *
1121  * The return value will be 0 if all bookmarks were created, otherwise it will
1122  * be the errno of a (undetermined) bookmarks that failed.
1123  */
1124 int
lzc_bookmark(nvlist_t * bookmarks,nvlist_t ** errlist)1125 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1126 {
1127 	nvpair_t *elem;
1128 	int error;
1129 	char pool[ZFS_MAX_DATASET_NAME_LEN];
1130 
1131 	/* determine pool name from first bookmark */
1132 	elem = nvlist_next_nvpair(bookmarks, NULL);
1133 	if (elem == NULL)
1134 		return (0);
1135 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1136 	pool[strcspn(pool, "/#")] = '\0';
1137 
1138 	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1139 
1140 	return (error);
1141 }
1142 
1143 /*
1144  * Retrieve bookmarks.
1145  *
1146  * Retrieve the list of bookmarks for the given file system. The props
1147  * parameter is an nvlist of property names (with no values) that will be
1148  * returned for each bookmark.
1149  *
1150  * The following are valid properties on bookmarks, most of which are numbers
1151  * (represented as uint64 in the nvlist), except redact_snaps, which is a
1152  * uint64 array, and redact_complete, which is a boolean
1153  *
1154  * "guid" - globally unique identifier of the snapshot it refers to
1155  * "createtxg" - txg when the snapshot it refers to was created
1156  * "creation" - timestamp when the snapshot it refers to was created
1157  * "ivsetguid" - IVset guid for identifying encrypted snapshots
1158  * "redact_snaps" - list of guids of the redaction snapshots for the specified
1159  *     bookmark.  If the bookmark is not a redaction bookmark, the nvlist will
1160  *     not contain an entry for this value.  If it is redacted with respect to
1161  *     no snapshots, it will contain value -> NULL uint64 array
1162  * "redact_complete" - boolean value; true if the redaction bookmark is
1163  *     complete, false otherwise.
1164  *
1165  * The format of the returned nvlist as follows:
1166  * <short name of bookmark> -> {
1167  *     <name of property> -> {
1168  *         "value" -> uint64
1169  *     }
1170  *     ...
1171  *     "redact_snaps" -> {
1172  *         "value" -> uint64 array
1173  *     }
1174  *     "redact_complete" -> {
1175  *         "value" -> boolean value
1176  *     }
1177  *  }
1178  */
1179 int
lzc_get_bookmarks(const char * fsname,nvlist_t * props,nvlist_t ** bmarks)1180 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1181 {
1182 	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1183 }
1184 
1185 /*
1186  * Get bookmark properties.
1187  *
1188  * Given a bookmark's full name, retrieve all properties for the bookmark.
1189  *
1190  * The format of the returned property list is as follows:
1191  * {
1192  *     <name of property> -> {
1193  *         "value" -> uint64
1194  *     }
1195  *     ...
1196  *     "redact_snaps" -> {
1197  *         "value" -> uint64 array
1198  * }
1199  */
1200 int
lzc_get_bookmark_props(const char * bookmark,nvlist_t ** props)1201 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
1202 {
1203 	int error;
1204 
1205 	nvlist_t *innvl = fnvlist_alloc();
1206 	error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
1207 	fnvlist_free(innvl);
1208 
1209 	return (error);
1210 }
1211 
1212 /*
1213  * Destroys bookmarks.
1214  *
1215  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1216  * They must all be in the same pool.  Bookmarks are specified as
1217  * <fs>#<bmark>.
1218  *
1219  * Bookmarks that do not exist will be silently ignored.
1220  *
1221  * The return value will be 0 if all bookmarks that existed were destroyed.
1222  *
1223  * Otherwise the return value will be the errno of a (undetermined) bookmark
1224  * that failed, no bookmarks will be destroyed, and the errlist will have an
1225  * entry for each bookmarks that failed.  The value in the errlist will be
1226  * the (int32) error code.
1227  */
1228 int
lzc_destroy_bookmarks(nvlist_t * bmarks,nvlist_t ** errlist)1229 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1230 {
1231 	nvpair_t *elem;
1232 	int error;
1233 	char pool[ZFS_MAX_DATASET_NAME_LEN];
1234 
1235 	/* determine the pool name */
1236 	elem = nvlist_next_nvpair(bmarks, NULL);
1237 	if (elem == NULL)
1238 		return (0);
1239 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1240 	pool[strcspn(pool, "/#")] = '\0';
1241 
1242 	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1243 
1244 	return (error);
1245 }
1246 
1247 static int
lzc_channel_program_impl(const char * pool,const char * program,boolean_t sync,uint64_t instrlimit,uint64_t memlimit,nvlist_t * argnvl,nvlist_t ** outnvl)1248 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1249     uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1250 {
1251 	int error;
1252 	nvlist_t *args;
1253 
1254 	args = fnvlist_alloc();
1255 	fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1256 	fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1257 	fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1258 	fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1259 	fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1260 	error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1261 	fnvlist_free(args);
1262 
1263 	return (error);
1264 }
1265 
1266 /*
1267  * Executes a channel program.
1268  *
1269  * If this function returns 0 the channel program was successfully loaded and
1270  * ran without failing. Note that individual commands the channel program ran
1271  * may have failed and the channel program is responsible for reporting such
1272  * errors through outnvl if they are important.
1273  *
1274  * This method may also return:
1275  *
1276  * EINVAL   The program contains syntax errors, or an invalid memory or time
1277  *          limit was given. No part of the channel program was executed.
1278  *          If caused by syntax errors, 'outnvl' contains information about the
1279  *          errors.
1280  *
1281  * ECHRNG   The program was executed, but encountered a runtime error, such as
1282  *          calling a function with incorrect arguments, invoking the error()
1283  *          function directly, failing an assert() command, etc. Some portion
1284  *          of the channel program may have executed and committed changes.
1285  *          Information about the failure can be found in 'outnvl'.
1286  *
1287  * ENOMEM   The program fully executed, but the output buffer was not large
1288  *          enough to store the returned value. No output is returned through
1289  *          'outnvl'.
1290  *
1291  * ENOSPC   The program was terminated because it exceeded its memory usage
1292  *          limit. Some portion of the channel program may have executed and
1293  *          committed changes to disk. No output is returned through 'outnvl'.
1294  *
1295  * ETIME    The program was terminated because it exceeded its Lua instruction
1296  *          limit. Some portion of the channel program may have executed and
1297  *          committed changes to disk. No output is returned through 'outnvl'.
1298  */
1299 int
lzc_channel_program(const char * pool,const char * program,uint64_t instrlimit,uint64_t memlimit,nvlist_t * argnvl,nvlist_t ** outnvl)1300 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1301     uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1302 {
1303 	return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1304 	    memlimit, argnvl, outnvl));
1305 }
1306 
1307 /*
1308  * Creates a checkpoint for the specified pool.
1309  *
1310  * If this function returns 0 the pool was successfully checkpointed.
1311  *
1312  * This method may also return:
1313  *
1314  * ZFS_ERR_CHECKPOINT_EXISTS
1315  *	The pool already has a checkpoint. A pools can only have one
1316  *	checkpoint at most, at any given time.
1317  *
1318  * ZFS_ERR_DISCARDING_CHECKPOINT
1319  * 	ZFS is in the middle of discarding a checkpoint for this pool.
1320  * 	The pool can be checkpointed again once the discard is done.
1321  *
1322  * ZFS_DEVRM_IN_PROGRESS
1323  * 	A vdev is currently being removed. The pool cannot be
1324  * 	checkpointed until the device removal is done.
1325  *
1326  * ZFS_VDEV_TOO_BIG
1327  * 	One or more top-level vdevs exceed the maximum vdev size
1328  * 	supported for this feature.
1329  */
1330 int
lzc_pool_checkpoint(const char * pool)1331 lzc_pool_checkpoint(const char *pool)
1332 {
1333 	int error;
1334 
1335 	nvlist_t *result = NULL;
1336 	nvlist_t *args = fnvlist_alloc();
1337 
1338 	error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1339 
1340 	fnvlist_free(args);
1341 	fnvlist_free(result);
1342 
1343 	return (error);
1344 }
1345 
1346 /*
1347  * Discard the checkpoint from the specified pool.
1348  *
1349  * If this function returns 0 the checkpoint was successfully discarded.
1350  *
1351  * This method may also return:
1352  *
1353  * ZFS_ERR_NO_CHECKPOINT
1354  * 	The pool does not have a checkpoint.
1355  *
1356  * ZFS_ERR_DISCARDING_CHECKPOINT
1357  * 	ZFS is already in the middle of discarding the checkpoint.
1358  */
1359 int
lzc_pool_checkpoint_discard(const char * pool)1360 lzc_pool_checkpoint_discard(const char *pool)
1361 {
1362 	int error;
1363 
1364 	nvlist_t *result = NULL;
1365 	nvlist_t *args = fnvlist_alloc();
1366 
1367 	error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1368 
1369 	fnvlist_free(args);
1370 	fnvlist_free(result);
1371 
1372 	return (error);
1373 }
1374 
1375 /*
1376  * Executes a read-only channel program.
1377  *
1378  * A read-only channel program works programmatically the same way as a
1379  * normal channel program executed with lzc_channel_program(). The only
1380  * difference is it runs exclusively in open-context and therefore can
1381  * return faster. The downside to that, is that the program cannot change
1382  * on-disk state by calling functions from the zfs.sync submodule.
1383  *
1384  * The return values of this function (and their meaning) are exactly the
1385  * same as the ones described in lzc_channel_program().
1386  */
1387 int
lzc_channel_program_nosync(const char * pool,const char * program,uint64_t timeout,uint64_t memlimit,nvlist_t * argnvl,nvlist_t ** outnvl)1388 lzc_channel_program_nosync(const char *pool, const char *program,
1389     uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1390 {
1391 	return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1392 	    memlimit, argnvl, outnvl));
1393 }
1394 
1395 /*
1396  * Performs key management functions
1397  *
1398  * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1399  * load or change a wrapping key, the key should be specified in the
1400  * hidden_args nvlist so that it is not logged.
1401  */
1402 int
lzc_load_key(const char * fsname,boolean_t noop,uint8_t * wkeydata,uint_t wkeylen)1403 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1404     uint_t wkeylen)
1405 {
1406 	int error;
1407 	nvlist_t *ioc_args;
1408 	nvlist_t *hidden_args;
1409 
1410 	if (wkeydata == NULL)
1411 		return (EINVAL);
1412 
1413 	ioc_args = fnvlist_alloc();
1414 	hidden_args = fnvlist_alloc();
1415 	fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1416 	fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1417 	if (noop)
1418 		fnvlist_add_boolean(ioc_args, "noop");
1419 	error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1420 	nvlist_free(hidden_args);
1421 	nvlist_free(ioc_args);
1422 
1423 	return (error);
1424 }
1425 
1426 int
lzc_unload_key(const char * fsname)1427 lzc_unload_key(const char *fsname)
1428 {
1429 	return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1430 }
1431 
1432 int
lzc_change_key(const char * fsname,uint64_t crypt_cmd,nvlist_t * props,uint8_t * wkeydata,uint_t wkeylen)1433 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1434     uint8_t *wkeydata, uint_t wkeylen)
1435 {
1436 	int error;
1437 	nvlist_t *ioc_args = fnvlist_alloc();
1438 	nvlist_t *hidden_args = NULL;
1439 
1440 	fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1441 
1442 	if (wkeydata != NULL) {
1443 		hidden_args = fnvlist_alloc();
1444 		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1445 		    wkeylen);
1446 		fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1447 	}
1448 
1449 	if (props != NULL)
1450 		fnvlist_add_nvlist(ioc_args, "props", props);
1451 
1452 	error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1453 	nvlist_free(hidden_args);
1454 	nvlist_free(ioc_args);
1455 
1456 	return (error);
1457 }
1458 
1459 int
lzc_reopen(const char * pool_name,boolean_t scrub_restart)1460 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1461 {
1462 	nvlist_t *args = fnvlist_alloc();
1463 	int error;
1464 
1465 	fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1466 
1467 	error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1468 	nvlist_free(args);
1469 	return (error);
1470 }
1471 
1472 /*
1473  * Changes initializing state.
1474  *
1475  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1476  * The key is ignored.
1477  *
1478  * If there are errors related to vdev arguments, per-vdev errors are returned
1479  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1480  * guid is stringified with PRIu64, and errno is one of the following as
1481  * an int64_t:
1482  *	- ENODEV if the device was not found
1483  *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1484  *	- EROFS if the device is not writeable
1485  *	- EBUSY start requested but the device is already being either
1486  *	        initialized or trimmed
1487  *	- ESRCH cancel/suspend requested but device is not being initialized
1488  *
1489  * If the errlist is empty, then return value will be:
1490  *	- EINVAL if one or more arguments was invalid
1491  *	- Other spa_open failures
1492  *	- 0 if the operation succeeded
1493  */
1494 int
lzc_initialize(const char * poolname,pool_initialize_func_t cmd_type,nvlist_t * vdevs,nvlist_t ** errlist)1495 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1496     nvlist_t *vdevs, nvlist_t **errlist)
1497 {
1498 	int error;
1499 
1500 	nvlist_t *args = fnvlist_alloc();
1501 	fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1502 	fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1503 
1504 	error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1505 
1506 	fnvlist_free(args);
1507 
1508 	return (error);
1509 }
1510 
1511 /*
1512  * Changes TRIM state.
1513  *
1514  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1515  * The key is ignored.
1516  *
1517  * If there are errors related to vdev arguments, per-vdev errors are returned
1518  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1519  * guid is stringified with PRIu64, and errno is one of the following as
1520  * an int64_t:
1521  *	- ENODEV if the device was not found
1522  *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1523  *	- EROFS if the device is not writeable
1524  *	- EBUSY start requested but the device is already being either trimmed
1525  *	        or initialized
1526  *	- ESRCH cancel/suspend requested but device is not being initialized
1527  *	- EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1528  *
1529  * If the errlist is empty, then return value will be:
1530  *	- EINVAL if one or more arguments was invalid
1531  *	- Other spa_open failures
1532  *	- 0 if the operation succeeded
1533  */
1534 int
lzc_trim(const char * poolname,pool_trim_func_t cmd_type,uint64_t rate,boolean_t secure,nvlist_t * vdevs,nvlist_t ** errlist)1535 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1536     boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1537 {
1538 	int error;
1539 
1540 	nvlist_t *args = fnvlist_alloc();
1541 	fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1542 	fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1543 	fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1544 	fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1545 
1546 	error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1547 
1548 	fnvlist_free(args);
1549 
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Create a redaction bookmark named bookname by redacting snapshot with respect
1555  * to all the snapshots in snapnv.
1556  */
1557 int
lzc_redact(const char * snapshot,const char * bookname,nvlist_t * snapnv)1558 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
1559 {
1560 	nvlist_t *args = fnvlist_alloc();
1561 	fnvlist_add_string(args, "bookname", bookname);
1562 	fnvlist_add_nvlist(args, "snapnv", snapnv);
1563 	int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
1564 	fnvlist_free(args);
1565 	return (error);
1566 }
1567 
1568 static int
wait_common(const char * pool,zpool_wait_activity_t activity,boolean_t use_tag,uint64_t tag,boolean_t * waited)1569 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
1570     uint64_t tag, boolean_t *waited)
1571 {
1572 	nvlist_t *args = fnvlist_alloc();
1573 	nvlist_t *result = NULL;
1574 
1575 	fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
1576 	if (use_tag)
1577 		fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
1578 
1579 	int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
1580 
1581 	if (error == 0 && waited != NULL)
1582 		*waited = fnvlist_lookup_boolean_value(result,
1583 		    ZPOOL_WAIT_WAITED);
1584 
1585 	fnvlist_free(args);
1586 	fnvlist_free(result);
1587 
1588 	return (error);
1589 }
1590 
1591 int
lzc_wait(const char * pool,zpool_wait_activity_t activity,boolean_t * waited)1592 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
1593 {
1594 	return (wait_common(pool, activity, B_FALSE, 0, waited));
1595 }
1596 
1597 int
lzc_wait_tag(const char * pool,zpool_wait_activity_t activity,uint64_t tag,boolean_t * waited)1598 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
1599     boolean_t *waited)
1600 {
1601 	return (wait_common(pool, activity, B_TRUE, tag, waited));
1602 }
1603 
1604 int
lzc_wait_fs(const char * fs,zfs_wait_activity_t activity,boolean_t * waited)1605 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
1606 {
1607 	nvlist_t *args = fnvlist_alloc();
1608 	nvlist_t *result = NULL;
1609 
1610 	fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
1611 
1612 	int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
1613 
1614 	if (error == 0 && waited != NULL)
1615 		*waited = fnvlist_lookup_boolean_value(result,
1616 		    ZFS_WAIT_WAITED);
1617 
1618 	fnvlist_free(args);
1619 	fnvlist_free(result);
1620 
1621 	return (error);
1622 }
1623 
1624 /*
1625  * Set the bootenv contents for the given pool.
1626  */
1627 int
lzc_set_bootenv(const char * pool,const nvlist_t * env)1628 lzc_set_bootenv(const char *pool, const nvlist_t *env)
1629 {
1630 	return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
1631 }
1632 
1633 /*
1634  * Get the contents of the bootenv of the given pool.
1635  */
1636 int
lzc_get_bootenv(const char * pool,nvlist_t ** outnvl)1637 lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
1638 {
1639 	return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
1640 }
1641