1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011 Pawel Jakub Dawidek <[email protected]>.
24  * All rights reserved.
25  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
26  * Copyright (c) 2014 Integros [integros.com]
27  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28  */
29 
30 /* Portions Copyright 2010 Robert Milkowski */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysmacros.h>
37 #include <sys/kmem.h>
38 #include <sys/acl.h>
39 #include <sys/vnode.h>
40 #include <sys/vfs.h>
41 #include <sys/mntent.h>
42 #include <sys/mount.h>
43 #include <sys/cmn_err.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zfs_vnops.h>
46 #include <sys/zfs_dir.h>
47 #include <sys/zil.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu.h>
50 #include <sys/dsl_prop.h>
51 #include <sys/dsl_dataset.h>
52 #include <sys/dsl_deleg.h>
53 #include <sys/spa.h>
54 #include <sys/zap.h>
55 #include <sys/sa.h>
56 #include <sys/sa_impl.h>
57 #include <sys/policy.h>
58 #include <sys/atomic.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/zfs_ctldir.h>
61 #include <sys/zfs_fuid.h>
62 #include <sys/sunddi.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/dsl_dir.h>
65 #include <sys/spa_boot.h>
66 #include <sys/jail.h>
67 #include <ufs/ufs/quota.h>
68 #include <sys/zfs_quota.h>
69 
70 #include "zfs_comutil.h"
71 
72 #ifndef	MNTK_VMSETSIZE_BUG
73 #define	MNTK_VMSETSIZE_BUG	0
74 #endif
75 #ifndef	MNTK_NOMSYNC
76 #define	MNTK_NOMSYNC	8
77 #endif
78 
79 /* BEGIN CSTYLED */
80 struct mtx zfs_debug_mtx;
81 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
82 
83 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
84 
85 int zfs_super_owner;
86 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
87     "File system owner can perform privileged operation on his file systems");
88 
89 int zfs_debug_level;
90 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
91 	"Debug level");
92 
93 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
94 static int zfs_version_acl = ZFS_ACL_VERSION;
95 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
96     "ZFS_ACL_VERSION");
97 static int zfs_version_spa = SPA_VERSION;
98 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
99     "SPA_VERSION");
100 static int zfs_version_zpl = ZPL_VERSION;
101 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
102     "ZPL_VERSION");
103 /* END CSTYLED */
104 
105 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
106 static int zfs_mount(vfs_t *vfsp);
107 static int zfs_umount(vfs_t *vfsp, int fflag);
108 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
109 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
110 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
111 static int zfs_sync(vfs_t *vfsp, int waitfor);
112 #if __FreeBSD_version >= 1300098
113 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
114     struct ucred **credanonp, int *numsecflavors, int *secflavors);
115 #else
116 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
117     struct ucred **credanonp, int *numsecflavors, int **secflavors);
118 #endif
119 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
120 static void zfs_freevfs(vfs_t *vfsp);
121 
122 struct vfsops zfs_vfsops = {
123 	.vfs_mount =		zfs_mount,
124 	.vfs_unmount =		zfs_umount,
125 #if __FreeBSD_version >= 1300049
126 	.vfs_root =		vfs_cache_root,
127 	.vfs_cachedroot = zfs_root,
128 #else
129 	.vfs_root =		zfs_root,
130 #endif
131 	.vfs_statfs =		zfs_statfs,
132 	.vfs_vget =		zfs_vget,
133 	.vfs_sync =		zfs_sync,
134 	.vfs_checkexp =		zfs_checkexp,
135 	.vfs_fhtovp =		zfs_fhtovp,
136 	.vfs_quotactl =		zfs_quotactl,
137 };
138 
139 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
140 
141 /*
142  * We need to keep a count of active fs's.
143  * This is necessary to prevent our module
144  * from being unloaded after a umount -f
145  */
146 static uint32_t	zfs_active_fs_count = 0;
147 
148 int
zfs_get_temporary_prop(dsl_dataset_t * ds,zfs_prop_t zfs_prop,uint64_t * val,char * setpoint)149 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
150     char *setpoint)
151 {
152 	int error;
153 	zfsvfs_t *zfvp;
154 	vfs_t *vfsp;
155 	objset_t *os;
156 	uint64_t tmp = *val;
157 
158 	error = dmu_objset_from_ds(ds, &os);
159 	if (error != 0)
160 		return (error);
161 
162 	error = getzfsvfs_impl(os, &zfvp);
163 	if (error != 0)
164 		return (error);
165 	if (zfvp == NULL)
166 		return (ENOENT);
167 	vfsp = zfvp->z_vfs;
168 	switch (zfs_prop) {
169 	case ZFS_PROP_ATIME:
170 		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
171 			tmp = 0;
172 		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
173 			tmp = 1;
174 		break;
175 	case ZFS_PROP_DEVICES:
176 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
177 			tmp = 0;
178 		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
179 			tmp = 1;
180 		break;
181 	case ZFS_PROP_EXEC:
182 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
183 			tmp = 0;
184 		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
185 			tmp = 1;
186 		break;
187 	case ZFS_PROP_SETUID:
188 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
189 			tmp = 0;
190 		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
191 			tmp = 1;
192 		break;
193 	case ZFS_PROP_READONLY:
194 		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
195 			tmp = 0;
196 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
197 			tmp = 1;
198 		break;
199 	case ZFS_PROP_XATTR:
200 		if (zfvp->z_flags & ZSB_XATTR)
201 			tmp = zfvp->z_xattr;
202 		break;
203 	case ZFS_PROP_NBMAND:
204 		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
205 			tmp = 0;
206 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
207 			tmp = 1;
208 		break;
209 	default:
210 		vfs_unbusy(vfsp);
211 		return (ENOENT);
212 	}
213 
214 	vfs_unbusy(vfsp);
215 	if (tmp != *val) {
216 		(void) strcpy(setpoint, "temporary");
217 		*val = tmp;
218 	}
219 	return (0);
220 }
221 
222 static int
zfs_getquota(zfsvfs_t * zfsvfs,uid_t id,int isgroup,struct dqblk64 * dqp)223 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
224 {
225 	int error = 0;
226 	char buf[32];
227 	uint64_t usedobj, quotaobj;
228 	uint64_t quota, used = 0;
229 	timespec_t now;
230 
231 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
232 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
233 
234 	if (quotaobj == 0 || zfsvfs->z_replay) {
235 		error = ENOENT;
236 		goto done;
237 	}
238 	(void) sprintf(buf, "%llx", (longlong_t)id);
239 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
240 	    buf, sizeof (quota), 1, &quota)) != 0) {
241 		dprintf("%s(%d): quotaobj lookup failed\n",
242 		    __FUNCTION__, __LINE__);
243 		goto done;
244 	}
245 	/*
246 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
247 	 * So we set them to be the same.
248 	 */
249 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
250 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
251 	if (error && error != ENOENT) {
252 		dprintf("%s(%d):  usedobj failed; %d\n",
253 		    __FUNCTION__, __LINE__, error);
254 		goto done;
255 	}
256 	dqp->dqb_curblocks = btodb(used);
257 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
258 	vfs_timestamp(&now);
259 	/*
260 	 * Setting this to 0 causes FreeBSD quota(8) to print
261 	 * the number of days since the epoch, which isn't
262 	 * particularly useful.
263 	 */
264 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
265 done:
266 	return (error);
267 }
268 
269 static int
zfs_quotactl(vfs_t * vfsp,int cmds,uid_t id,void * arg)270 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
271 {
272 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
273 	struct thread *td;
274 	int cmd, type, error = 0;
275 	int bitsize;
276 	zfs_userquota_prop_t quota_type;
277 	struct dqblk64 dqblk = { 0 };
278 
279 	td = curthread;
280 	cmd = cmds >> SUBCMDSHIFT;
281 	type = cmds & SUBCMDMASK;
282 
283 	ZFS_ENTER(zfsvfs);
284 	if (id == -1) {
285 		switch (type) {
286 		case USRQUOTA:
287 			id = td->td_ucred->cr_ruid;
288 			break;
289 		case GRPQUOTA:
290 			id = td->td_ucred->cr_rgid;
291 			break;
292 		default:
293 			error = EINVAL;
294 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
295 				vfs_unbusy(vfsp);
296 			goto done;
297 		}
298 	}
299 	/*
300 	 * Map BSD type to:
301 	 * ZFS_PROP_USERUSED,
302 	 * ZFS_PROP_USERQUOTA,
303 	 * ZFS_PROP_GROUPUSED,
304 	 * ZFS_PROP_GROUPQUOTA
305 	 */
306 	switch (cmd) {
307 	case Q_SETQUOTA:
308 	case Q_SETQUOTA32:
309 		if (type == USRQUOTA)
310 			quota_type = ZFS_PROP_USERQUOTA;
311 		else if (type == GRPQUOTA)
312 			quota_type = ZFS_PROP_GROUPQUOTA;
313 		else
314 			error = EINVAL;
315 		break;
316 	case Q_GETQUOTA:
317 	case Q_GETQUOTA32:
318 		if (type == USRQUOTA)
319 			quota_type = ZFS_PROP_USERUSED;
320 		else if (type == GRPQUOTA)
321 			quota_type = ZFS_PROP_GROUPUSED;
322 		else
323 			error = EINVAL;
324 		break;
325 	}
326 
327 	/*
328 	 * Depending on the cmd, we may need to get
329 	 * the ruid and domain (see fuidstr_to_sid?),
330 	 * the fuid (how?), or other information.
331 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
332 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
333 	 * I think I can use just the id?
334 	 *
335 	 * Look at zfs_id_overquota() to look up a quota.
336 	 * zap_lookup(something, quotaobj, fuidstring,
337 	 *     sizeof (long long), 1, &quota)
338 	 *
339 	 * See zfs_set_userquota() to set a quota.
340 	 */
341 	if ((uint32_t)type >= MAXQUOTAS) {
342 		error = EINVAL;
343 		goto done;
344 	}
345 
346 	switch (cmd) {
347 	case Q_GETQUOTASIZE:
348 		bitsize = 64;
349 		error = copyout(&bitsize, arg, sizeof (int));
350 		break;
351 	case Q_QUOTAON:
352 		// As far as I can tell, you can't turn quotas on or off on zfs
353 		error = 0;
354 		vfs_unbusy(vfsp);
355 		break;
356 	case Q_QUOTAOFF:
357 		error = ENOTSUP;
358 		vfs_unbusy(vfsp);
359 		break;
360 	case Q_SETQUOTA:
361 		error = copyin(arg, &dqblk, sizeof (dqblk));
362 		if (error == 0)
363 			error = zfs_set_userquota(zfsvfs, quota_type,
364 			    "", id, dbtob(dqblk.dqb_bhardlimit));
365 		break;
366 	case Q_GETQUOTA:
367 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
368 		if (error == 0)
369 			error = copyout(&dqblk, arg, sizeof (dqblk));
370 		break;
371 	default:
372 		error = EINVAL;
373 		break;
374 	}
375 done:
376 	ZFS_EXIT(zfsvfs);
377 	return (error);
378 }
379 
380 
381 boolean_t
zfs_is_readonly(zfsvfs_t * zfsvfs)382 zfs_is_readonly(zfsvfs_t *zfsvfs)
383 {
384 	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
385 }
386 
387 /*ARGSUSED*/
388 static int
zfs_sync(vfs_t * vfsp,int waitfor)389 zfs_sync(vfs_t *vfsp, int waitfor)
390 {
391 
392 	/*
393 	 * Data integrity is job one.  We don't want a compromised kernel
394 	 * writing to the storage pool, so we never sync during panic.
395 	 */
396 	if (panicstr)
397 		return (0);
398 
399 	/*
400 	 * Ignore the system syncher.  ZFS already commits async data
401 	 * at zfs_txg_timeout intervals.
402 	 */
403 	if (waitfor == MNT_LAZY)
404 		return (0);
405 
406 	if (vfsp != NULL) {
407 		/*
408 		 * Sync a specific filesystem.
409 		 */
410 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
411 		dsl_pool_t *dp;
412 		int error;
413 
414 		error = vfs_stdsync(vfsp, waitfor);
415 		if (error != 0)
416 			return (error);
417 
418 		ZFS_ENTER(zfsvfs);
419 		dp = dmu_objset_pool(zfsvfs->z_os);
420 
421 		/*
422 		 * If the system is shutting down, then skip any
423 		 * filesystems which may exist on a suspended pool.
424 		 */
425 		if (rebooting && spa_suspended(dp->dp_spa)) {
426 			ZFS_EXIT(zfsvfs);
427 			return (0);
428 		}
429 
430 		if (zfsvfs->z_log != NULL)
431 			zil_commit(zfsvfs->z_log, 0);
432 
433 		ZFS_EXIT(zfsvfs);
434 	} else {
435 		/*
436 		 * Sync all ZFS filesystems.  This is what happens when you
437 		 * run sync(8).  Unlike other filesystems, ZFS honors the
438 		 * request by waiting for all pools to commit all dirty data.
439 		 */
440 		spa_sync_allpools();
441 	}
442 
443 	return (0);
444 }
445 
446 static void
atime_changed_cb(void * arg,uint64_t newval)447 atime_changed_cb(void *arg, uint64_t newval)
448 {
449 	zfsvfs_t *zfsvfs = arg;
450 
451 	if (newval == TRUE) {
452 		zfsvfs->z_atime = TRUE;
453 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
454 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
455 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
456 	} else {
457 		zfsvfs->z_atime = FALSE;
458 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
459 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
460 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
461 	}
462 }
463 
464 static void
xattr_changed_cb(void * arg,uint64_t newval)465 xattr_changed_cb(void *arg, uint64_t newval)
466 {
467 	zfsvfs_t *zfsvfs = arg;
468 
469 	if (newval == ZFS_XATTR_OFF) {
470 		zfsvfs->z_flags &= ~ZSB_XATTR;
471 	} else {
472 		zfsvfs->z_flags |= ZSB_XATTR;
473 
474 		if (newval == ZFS_XATTR_SA)
475 			zfsvfs->z_xattr_sa = B_TRUE;
476 		else
477 			zfsvfs->z_xattr_sa = B_FALSE;
478 	}
479 }
480 
481 static void
blksz_changed_cb(void * arg,uint64_t newval)482 blksz_changed_cb(void *arg, uint64_t newval)
483 {
484 	zfsvfs_t *zfsvfs = arg;
485 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
486 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
487 	ASSERT(ISP2(newval));
488 
489 	zfsvfs->z_max_blksz = newval;
490 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
491 }
492 
493 static void
readonly_changed_cb(void * arg,uint64_t newval)494 readonly_changed_cb(void *arg, uint64_t newval)
495 {
496 	zfsvfs_t *zfsvfs = arg;
497 
498 	if (newval) {
499 		/* XXX locking on vfs_flag? */
500 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
501 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
502 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
503 	} else {
504 		/* XXX locking on vfs_flag? */
505 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
506 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
507 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
508 	}
509 }
510 
511 static void
setuid_changed_cb(void * arg,uint64_t newval)512 setuid_changed_cb(void *arg, uint64_t newval)
513 {
514 	zfsvfs_t *zfsvfs = arg;
515 
516 	if (newval == FALSE) {
517 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
518 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
519 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
520 	} else {
521 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
522 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
523 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
524 	}
525 }
526 
527 static void
exec_changed_cb(void * arg,uint64_t newval)528 exec_changed_cb(void *arg, uint64_t newval)
529 {
530 	zfsvfs_t *zfsvfs = arg;
531 
532 	if (newval == FALSE) {
533 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
534 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
535 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
536 	} else {
537 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
538 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
539 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
540 	}
541 }
542 
543 /*
544  * The nbmand mount option can be changed at mount time.
545  * We can't allow it to be toggled on live file systems or incorrect
546  * behavior may be seen from cifs clients
547  *
548  * This property isn't registered via dsl_prop_register(), but this callback
549  * will be called when a file system is first mounted
550  */
551 static void
nbmand_changed_cb(void * arg,uint64_t newval)552 nbmand_changed_cb(void *arg, uint64_t newval)
553 {
554 	zfsvfs_t *zfsvfs = arg;
555 	if (newval == FALSE) {
556 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
557 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
558 	} else {
559 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
560 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
561 	}
562 }
563 
564 static void
snapdir_changed_cb(void * arg,uint64_t newval)565 snapdir_changed_cb(void *arg, uint64_t newval)
566 {
567 	zfsvfs_t *zfsvfs = arg;
568 
569 	zfsvfs->z_show_ctldir = newval;
570 }
571 
572 static void
vscan_changed_cb(void * arg,uint64_t newval)573 vscan_changed_cb(void *arg, uint64_t newval)
574 {
575 	zfsvfs_t *zfsvfs = arg;
576 
577 	zfsvfs->z_vscan = newval;
578 }
579 
580 static void
acl_mode_changed_cb(void * arg,uint64_t newval)581 acl_mode_changed_cb(void *arg, uint64_t newval)
582 {
583 	zfsvfs_t *zfsvfs = arg;
584 
585 	zfsvfs->z_acl_mode = newval;
586 }
587 
588 static void
acl_inherit_changed_cb(void * arg,uint64_t newval)589 acl_inherit_changed_cb(void *arg, uint64_t newval)
590 {
591 	zfsvfs_t *zfsvfs = arg;
592 
593 	zfsvfs->z_acl_inherit = newval;
594 }
595 
596 static void
acl_type_changed_cb(void * arg,uint64_t newval)597 acl_type_changed_cb(void *arg, uint64_t newval)
598 {
599 	zfsvfs_t *zfsvfs = arg;
600 
601 	zfsvfs->z_acl_type = newval;
602 }
603 
604 static int
zfs_register_callbacks(vfs_t * vfsp)605 zfs_register_callbacks(vfs_t *vfsp)
606 {
607 	struct dsl_dataset *ds = NULL;
608 	objset_t *os = NULL;
609 	zfsvfs_t *zfsvfs = NULL;
610 	uint64_t nbmand;
611 	boolean_t readonly = B_FALSE;
612 	boolean_t do_readonly = B_FALSE;
613 	boolean_t setuid = B_FALSE;
614 	boolean_t do_setuid = B_FALSE;
615 	boolean_t exec = B_FALSE;
616 	boolean_t do_exec = B_FALSE;
617 	boolean_t xattr = B_FALSE;
618 	boolean_t atime = B_FALSE;
619 	boolean_t do_atime = B_FALSE;
620 	boolean_t do_xattr = B_FALSE;
621 	int error = 0;
622 
623 	ASSERT(vfsp);
624 	zfsvfs = vfsp->vfs_data;
625 	ASSERT(zfsvfs);
626 	os = zfsvfs->z_os;
627 
628 	/*
629 	 * This function can be called for a snapshot when we update snapshot's
630 	 * mount point, which isn't really supported.
631 	 */
632 	if (dmu_objset_is_snapshot(os))
633 		return (EOPNOTSUPP);
634 
635 	/*
636 	 * The act of registering our callbacks will destroy any mount
637 	 * options we may have.  In order to enable temporary overrides
638 	 * of mount options, we stash away the current values and
639 	 * restore them after we register the callbacks.
640 	 */
641 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
642 	    !spa_writeable(dmu_objset_spa(os))) {
643 		readonly = B_TRUE;
644 		do_readonly = B_TRUE;
645 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
646 		readonly = B_FALSE;
647 		do_readonly = B_TRUE;
648 	}
649 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
650 		setuid = B_FALSE;
651 		do_setuid = B_TRUE;
652 	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
653 		setuid = B_TRUE;
654 		do_setuid = B_TRUE;
655 	}
656 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
657 		exec = B_FALSE;
658 		do_exec = B_TRUE;
659 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
660 		exec = B_TRUE;
661 		do_exec = B_TRUE;
662 	}
663 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
664 		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
665 		do_xattr = B_TRUE;
666 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
667 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
668 		do_xattr = B_TRUE;
669 	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
670 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
671 		do_xattr = B_TRUE;
672 	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
673 		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
674 		do_xattr = B_TRUE;
675 	}
676 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
677 		atime = B_FALSE;
678 		do_atime = B_TRUE;
679 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
680 		atime = B_TRUE;
681 		do_atime = B_TRUE;
682 	}
683 
684 	/*
685 	 * We need to enter pool configuration here, so that we can use
686 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
687 	 * dsl_prop_get_integer() can not be used, because it has to acquire
688 	 * spa_namespace_lock and we can not do that because we already hold
689 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
690 	 * with spa_namespace_lock held and the function calls ZFS vnode
691 	 * operations to write the cache file and thus z_teardown_lock is
692 	 * acquired after spa_namespace_lock.
693 	 */
694 	ds = dmu_objset_ds(os);
695 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
696 
697 	/*
698 	 * nbmand is a special property.  It can only be changed at
699 	 * mount time.
700 	 *
701 	 * This is weird, but it is documented to only be changeable
702 	 * at mount time.
703 	 */
704 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
705 		nbmand = B_FALSE;
706 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
707 		nbmand = B_TRUE;
708 	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) {
709 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
710 		return (error);
711 	}
712 
713 	/*
714 	 * Register property callbacks.
715 	 *
716 	 * It would probably be fine to just check for i/o error from
717 	 * the first prop_register(), but I guess I like to go
718 	 * overboard...
719 	 */
720 	error = dsl_prop_register(ds,
721 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
722 	error = error ? error : dsl_prop_register(ds,
723 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
724 	error = error ? error : dsl_prop_register(ds,
725 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
726 	error = error ? error : dsl_prop_register(ds,
727 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
728 	error = error ? error : dsl_prop_register(ds,
729 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
730 	error = error ? error : dsl_prop_register(ds,
731 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
732 	error = error ? error : dsl_prop_register(ds,
733 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
734 	error = error ? error : dsl_prop_register(ds,
735 	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
736 	error = error ? error : dsl_prop_register(ds,
737 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
738 	error = error ? error : dsl_prop_register(ds,
739 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
740 	    zfsvfs);
741 	error = error ? error : dsl_prop_register(ds,
742 	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
743 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
744 	if (error)
745 		goto unregister;
746 
747 	/*
748 	 * Invoke our callbacks to restore temporary mount options.
749 	 */
750 	if (do_readonly)
751 		readonly_changed_cb(zfsvfs, readonly);
752 	if (do_setuid)
753 		setuid_changed_cb(zfsvfs, setuid);
754 	if (do_exec)
755 		exec_changed_cb(zfsvfs, exec);
756 	if (do_xattr)
757 		xattr_changed_cb(zfsvfs, xattr);
758 	if (do_atime)
759 		atime_changed_cb(zfsvfs, atime);
760 
761 	nbmand_changed_cb(zfsvfs, nbmand);
762 
763 	return (0);
764 
765 unregister:
766 	dsl_prop_unregister_all(ds, zfsvfs);
767 	return (error);
768 }
769 
770 /*
771  * Associate this zfsvfs with the given objset, which must be owned.
772  * This will cache a bunch of on-disk state from the objset in the
773  * zfsvfs.
774  */
775 static int
zfsvfs_init(zfsvfs_t * zfsvfs,objset_t * os)776 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
777 {
778 	int error;
779 	uint64_t val;
780 
781 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
782 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
783 	zfsvfs->z_os = os;
784 
785 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
786 	if (error != 0)
787 		return (error);
788 	if (zfsvfs->z_version >
789 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
790 		(void) printf("Can't mount a version %lld file system "
791 		    "on a version %lld pool\n. Pool must be upgraded to mount "
792 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
793 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
794 		return (SET_ERROR(ENOTSUP));
795 	}
796 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
797 	if (error != 0)
798 		return (error);
799 	zfsvfs->z_norm = (int)val;
800 
801 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
802 	if (error != 0)
803 		return (error);
804 	zfsvfs->z_utf8 = (val != 0);
805 
806 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
807 	if (error != 0)
808 		return (error);
809 	zfsvfs->z_case = (uint_t)val;
810 
811 	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
812 	if (error != 0)
813 		return (error);
814 	zfsvfs->z_acl_type = (uint_t)val;
815 
816 	/*
817 	 * Fold case on file systems that are always or sometimes case
818 	 * insensitive.
819 	 */
820 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
821 	    zfsvfs->z_case == ZFS_CASE_MIXED)
822 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
823 
824 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
825 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
826 
827 	uint64_t sa_obj = 0;
828 	if (zfsvfs->z_use_sa) {
829 		/* should either have both of these objects or none */
830 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
831 		    &sa_obj);
832 		if (error != 0)
833 			return (error);
834 	}
835 
836 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
837 	    &zfsvfs->z_attr_table);
838 	if (error != 0)
839 		return (error);
840 
841 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
842 		sa_register_update_callback(os, zfs_sa_upgrade);
843 
844 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
845 	    &zfsvfs->z_root);
846 	if (error != 0)
847 		return (error);
848 	ASSERT(zfsvfs->z_root != 0);
849 
850 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
851 	    &zfsvfs->z_unlinkedobj);
852 	if (error != 0)
853 		return (error);
854 
855 	error = zap_lookup(os, MASTER_NODE_OBJ,
856 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
857 	    8, 1, &zfsvfs->z_userquota_obj);
858 	if (error == ENOENT)
859 		zfsvfs->z_userquota_obj = 0;
860 	else if (error != 0)
861 		return (error);
862 
863 	error = zap_lookup(os, MASTER_NODE_OBJ,
864 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
865 	    8, 1, &zfsvfs->z_groupquota_obj);
866 	if (error == ENOENT)
867 		zfsvfs->z_groupquota_obj = 0;
868 	else if (error != 0)
869 		return (error);
870 
871 	error = zap_lookup(os, MASTER_NODE_OBJ,
872 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
873 	    8, 1, &zfsvfs->z_projectquota_obj);
874 	if (error == ENOENT)
875 		zfsvfs->z_projectquota_obj = 0;
876 	else if (error != 0)
877 		return (error);
878 
879 	error = zap_lookup(os, MASTER_NODE_OBJ,
880 	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
881 	    8, 1, &zfsvfs->z_userobjquota_obj);
882 	if (error == ENOENT)
883 		zfsvfs->z_userobjquota_obj = 0;
884 	else if (error != 0)
885 		return (error);
886 
887 	error = zap_lookup(os, MASTER_NODE_OBJ,
888 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
889 	    8, 1, &zfsvfs->z_groupobjquota_obj);
890 	if (error == ENOENT)
891 		zfsvfs->z_groupobjquota_obj = 0;
892 	else if (error != 0)
893 		return (error);
894 
895 	error = zap_lookup(os, MASTER_NODE_OBJ,
896 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
897 	    8, 1, &zfsvfs->z_projectobjquota_obj);
898 	if (error == ENOENT)
899 		zfsvfs->z_projectobjquota_obj = 0;
900 	else if (error != 0)
901 		return (error);
902 
903 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
904 	    &zfsvfs->z_fuid_obj);
905 	if (error == ENOENT)
906 		zfsvfs->z_fuid_obj = 0;
907 	else if (error != 0)
908 		return (error);
909 
910 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
911 	    &zfsvfs->z_shares_dir);
912 	if (error == ENOENT)
913 		zfsvfs->z_shares_dir = 0;
914 	else if (error != 0)
915 		return (error);
916 
917 	/*
918 	 * Only use the name cache if we are looking for a
919 	 * name on a file system that does not require normalization
920 	 * or case folding.  We can also look there if we happen to be
921 	 * on a non-normalizing, mixed sensitivity file system IF we
922 	 * are looking for the exact name (which is always the case on
923 	 * FreeBSD).
924 	 */
925 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
926 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
927 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
928 
929 	return (0);
930 }
931 
932 taskq_t *zfsvfs_taskq;
933 
934 static void
zfsvfs_task_unlinked_drain(void * context,int pending __unused)935 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
936 {
937 
938 	zfs_unlinked_drain((zfsvfs_t *)context);
939 }
940 
941 int
zfsvfs_create(const char * osname,boolean_t readonly,zfsvfs_t ** zfvp)942 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
943 {
944 	objset_t *os;
945 	zfsvfs_t *zfsvfs;
946 	int error;
947 	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
948 
949 	/*
950 	 * XXX: Fix struct statfs so this isn't necessary!
951 	 *
952 	 * The 'osname' is used as the filesystem's special node, which means
953 	 * it must fit in statfs.f_mntfromname, or else it can't be
954 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
955 	 * 'zfs unmount' to think it's not mounted when it is.
956 	 */
957 	if (strlen(osname) >= MNAMELEN)
958 		return (SET_ERROR(ENAMETOOLONG));
959 
960 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
961 
962 	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
963 	    &os);
964 	if (error != 0) {
965 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
966 		return (error);
967 	}
968 
969 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
970 
971 	return (error);
972 }
973 
974 
975 int
zfsvfs_create_impl(zfsvfs_t ** zfvp,zfsvfs_t * zfsvfs,objset_t * os)976 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
977 {
978 	int error;
979 
980 	zfsvfs->z_vfs = NULL;
981 	zfsvfs->z_parent = zfsvfs;
982 
983 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
984 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
985 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
986 	    offsetof(znode_t, z_link_node));
987 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
988 	    zfsvfs_task_unlinked_drain, zfsvfs);
989 	ZFS_TEARDOWN_INIT(zfsvfs);
990 	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
991 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
992 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
993 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
994 
995 	error = zfsvfs_init(zfsvfs, os);
996 	if (error != 0) {
997 		dmu_objset_disown(os, B_TRUE, zfsvfs);
998 		*zfvp = NULL;
999 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
1000 		return (error);
1001 	}
1002 
1003 	*zfvp = zfsvfs;
1004 	return (0);
1005 }
1006 
1007 static int
zfsvfs_setup(zfsvfs_t * zfsvfs,boolean_t mounting)1008 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1009 {
1010 	int error;
1011 
1012 	/*
1013 	 * Check for a bad on-disk format version now since we
1014 	 * lied about owning the dataset readonly before.
1015 	 */
1016 	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
1017 	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
1018 		return (SET_ERROR(EROFS));
1019 
1020 	error = zfs_register_callbacks(zfsvfs->z_vfs);
1021 	if (error)
1022 		return (error);
1023 
1024 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1025 
1026 	/*
1027 	 * If we are not mounting (ie: online recv), then we don't
1028 	 * have to worry about replaying the log as we blocked all
1029 	 * operations out since we closed the ZIL.
1030 	 */
1031 	if (mounting) {
1032 		boolean_t readonly;
1033 
1034 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
1035 		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
1036 
1037 		/*
1038 		 * During replay we remove the read only flag to
1039 		 * allow replays to succeed.
1040 		 */
1041 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1042 		if (readonly != 0) {
1043 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1044 		} else {
1045 			dsl_dir_t *dd;
1046 			zap_stats_t zs;
1047 
1048 			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
1049 			    &zs) == 0) {
1050 				dataset_kstats_update_nunlinks_kstat(
1051 				    &zfsvfs->z_kstat, zs.zs_num_entries);
1052 				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
1053 				    "num_entries in unlinked set: %llu",
1054 				    zs.zs_num_entries);
1055 			}
1056 
1057 			zfs_unlinked_drain(zfsvfs);
1058 			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1059 			dd->dd_activity_cancelled = B_FALSE;
1060 		}
1061 
1062 		/*
1063 		 * Parse and replay the intent log.
1064 		 *
1065 		 * Because of ziltest, this must be done after
1066 		 * zfs_unlinked_drain().  (Further note: ziltest
1067 		 * doesn't use readonly mounts, where
1068 		 * zfs_unlinked_drain() isn't called.)  This is because
1069 		 * ziltest causes spa_sync() to think it's committed,
1070 		 * but actually it is not, so the intent log contains
1071 		 * many txg's worth of changes.
1072 		 *
1073 		 * In particular, if object N is in the unlinked set in
1074 		 * the last txg to actually sync, then it could be
1075 		 * actually freed in a later txg and then reallocated
1076 		 * in a yet later txg.  This would write a "create
1077 		 * object N" record to the intent log.  Normally, this
1078 		 * would be fine because the spa_sync() would have
1079 		 * written out the fact that object N is free, before
1080 		 * we could write the "create object N" intent log
1081 		 * record.
1082 		 *
1083 		 * But when we are in ziltest mode, we advance the "open
1084 		 * txg" without actually spa_sync()-ing the changes to
1085 		 * disk.  So we would see that object N is still
1086 		 * allocated and in the unlinked set, and there is an
1087 		 * intent log record saying to allocate it.
1088 		 */
1089 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1090 			if (zil_replay_disable) {
1091 				zil_destroy(zfsvfs->z_log, B_FALSE);
1092 			} else {
1093 				boolean_t use_nc = zfsvfs->z_use_namecache;
1094 				zfsvfs->z_use_namecache = B_FALSE;
1095 				zfsvfs->z_replay = B_TRUE;
1096 				zil_replay(zfsvfs->z_os, zfsvfs,
1097 				    zfs_replay_vector);
1098 				zfsvfs->z_replay = B_FALSE;
1099 				zfsvfs->z_use_namecache = use_nc;
1100 			}
1101 		}
1102 
1103 		/* restore readonly bit */
1104 		if (readonly != 0)
1105 			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
1106 	}
1107 
1108 	/*
1109 	 * Set the objset user_ptr to track its zfsvfs.
1110 	 */
1111 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1112 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1113 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1114 
1115 	return (0);
1116 }
1117 
1118 void
zfsvfs_free(zfsvfs_t * zfsvfs)1119 zfsvfs_free(zfsvfs_t *zfsvfs)
1120 {
1121 	int i;
1122 
1123 	zfs_fuid_destroy(zfsvfs);
1124 
1125 	mutex_destroy(&zfsvfs->z_znodes_lock);
1126 	mutex_destroy(&zfsvfs->z_lock);
1127 	ASSERT(zfsvfs->z_nr_znodes == 0);
1128 	list_destroy(&zfsvfs->z_all_znodes);
1129 	ZFS_TEARDOWN_DESTROY(zfsvfs);
1130 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
1131 	rw_destroy(&zfsvfs->z_fuid_lock);
1132 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1133 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1134 	dataset_kstats_destroy(&zfsvfs->z_kstat);
1135 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1136 }
1137 
1138 static void
zfs_set_fuid_feature(zfsvfs_t * zfsvfs)1139 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1140 {
1141 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1142 	if (zfsvfs->z_vfs) {
1143 		if (zfsvfs->z_use_fuids) {
1144 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1145 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1146 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1147 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1148 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1149 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1150 		} else {
1151 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1152 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1153 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1154 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1155 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1156 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1157 		}
1158 	}
1159 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1160 }
1161 
1162 static int
zfs_domount(vfs_t * vfsp,char * osname)1163 zfs_domount(vfs_t *vfsp, char *osname)
1164 {
1165 	uint64_t recordsize, fsid_guid;
1166 	int error = 0;
1167 	zfsvfs_t *zfsvfs;
1168 
1169 	ASSERT(vfsp);
1170 	ASSERT(osname);
1171 
1172 	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
1173 	if (error)
1174 		return (error);
1175 	zfsvfs->z_vfs = vfsp;
1176 
1177 	if ((error = dsl_prop_get_integer(osname,
1178 	    "recordsize", &recordsize, NULL)))
1179 		goto out;
1180 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1181 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1182 
1183 	vfsp->vfs_data = zfsvfs;
1184 	vfsp->mnt_flag |= MNT_LOCAL;
1185 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1186 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
1187 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
1188 	/*
1189 	 * This can cause a loss of coherence between ARC and page cache
1190 	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
1191 	 */
1192 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
1193 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
1194 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
1195 
1196 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
1197 	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
1198 #endif
1199 	/*
1200 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1201 	 * separates our fsid from any other filesystem types, and a
1202 	 * 56-bit objset unique ID.  The objset unique ID is unique to
1203 	 * all objsets open on this system, provided by unique_create().
1204 	 * The 8-bit fs type must be put in the low bits of fsid[1]
1205 	 * because that's where other Solaris filesystems put it.
1206 	 */
1207 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1208 	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1209 	vfsp->vfs_fsid.val[0] = fsid_guid;
1210 	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1211 	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
1212 
1213 	/*
1214 	 * Set features for file system.
1215 	 */
1216 	zfs_set_fuid_feature(zfsvfs);
1217 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1218 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1219 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1220 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1221 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1222 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1223 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1224 	}
1225 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1226 
1227 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1228 		uint64_t pval;
1229 
1230 		atime_changed_cb(zfsvfs, B_FALSE);
1231 		readonly_changed_cb(zfsvfs, B_TRUE);
1232 		if ((error = dsl_prop_get_integer(osname,
1233 		    "xattr", &pval, NULL)))
1234 			goto out;
1235 		xattr_changed_cb(zfsvfs, pval);
1236 		if ((error = dsl_prop_get_integer(osname,
1237 		    "acltype", &pval, NULL)))
1238 			goto out;
1239 		acl_type_changed_cb(zfsvfs, pval);
1240 		zfsvfs->z_issnap = B_TRUE;
1241 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1242 
1243 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1244 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1245 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1246 	} else {
1247 		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
1248 			goto out;
1249 	}
1250 
1251 	vfs_mountedfrom(vfsp, osname);
1252 
1253 	if (!zfsvfs->z_issnap)
1254 		zfsctl_create(zfsvfs);
1255 out:
1256 	if (error) {
1257 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1258 		zfsvfs_free(zfsvfs);
1259 	} else {
1260 		atomic_inc_32(&zfs_active_fs_count);
1261 	}
1262 
1263 	return (error);
1264 }
1265 
1266 static void
zfs_unregister_callbacks(zfsvfs_t * zfsvfs)1267 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1268 {
1269 	objset_t *os = zfsvfs->z_os;
1270 
1271 	if (!dmu_objset_is_snapshot(os))
1272 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1273 }
1274 
1275 static int
getpoolname(const char * osname,char * poolname)1276 getpoolname(const char *osname, char *poolname)
1277 {
1278 	char *p;
1279 
1280 	p = strchr(osname, '/');
1281 	if (p == NULL) {
1282 		if (strlen(osname) >= MAXNAMELEN)
1283 			return (ENAMETOOLONG);
1284 		(void) strcpy(poolname, osname);
1285 	} else {
1286 		if (p - osname >= MAXNAMELEN)
1287 			return (ENAMETOOLONG);
1288 		(void) strncpy(poolname, osname, p - osname);
1289 		poolname[p - osname] = '\0';
1290 	}
1291 	return (0);
1292 }
1293 
1294 static void
fetch_osname_options(char * name,bool * checkpointrewind)1295 fetch_osname_options(char *name, bool *checkpointrewind)
1296 {
1297 
1298 	if (name[0] == '!') {
1299 		*checkpointrewind = true;
1300 		memmove(name, name + 1, strlen(name));
1301 	} else {
1302 		*checkpointrewind = false;
1303 	}
1304 }
1305 
1306 /*ARGSUSED*/
1307 static int
zfs_mount(vfs_t * vfsp)1308 zfs_mount(vfs_t *vfsp)
1309 {
1310 	kthread_t	*td = curthread;
1311 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
1312 	cred_t		*cr = td->td_ucred;
1313 	char		*osname;
1314 	int		error = 0;
1315 	int		canwrite;
1316 	bool		checkpointrewind;
1317 
1318 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1319 		return (SET_ERROR(EINVAL));
1320 
1321 	/*
1322 	 * If full-owner-access is enabled and delegated administration is
1323 	 * turned on, we must set nosuid.
1324 	 */
1325 	if (zfs_super_owner &&
1326 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1327 		secpolicy_fs_mount_clearopts(cr, vfsp);
1328 	}
1329 
1330 	fetch_osname_options(osname, &checkpointrewind);
1331 
1332 	/*
1333 	 * Check for mount privilege?
1334 	 *
1335 	 * If we don't have privilege then see if
1336 	 * we have local permission to allow it
1337 	 */
1338 	error = secpolicy_fs_mount(cr, mvp, vfsp);
1339 	if (error) {
1340 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
1341 			goto out;
1342 
1343 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
1344 			vattr_t		vattr;
1345 
1346 			/*
1347 			 * Make sure user is the owner of the mount point
1348 			 * or has sufficient privileges.
1349 			 */
1350 
1351 			vattr.va_mask = AT_UID;
1352 
1353 			vn_lock(mvp, LK_SHARED | LK_RETRY);
1354 			if (VOP_GETATTR(mvp, &vattr, cr)) {
1355 				VOP_UNLOCK1(mvp);
1356 				goto out;
1357 			}
1358 
1359 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
1360 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
1361 				VOP_UNLOCK1(mvp);
1362 				goto out;
1363 			}
1364 			VOP_UNLOCK1(mvp);
1365 		}
1366 
1367 		secpolicy_fs_mount_clearopts(cr, vfsp);
1368 	}
1369 
1370 	/*
1371 	 * Refuse to mount a filesystem if we are in a local zone and the
1372 	 * dataset is not visible.
1373 	 */
1374 	if (!INGLOBALZONE(curproc) &&
1375 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1376 		error = SET_ERROR(EPERM);
1377 		goto out;
1378 	}
1379 
1380 	vfsp->vfs_flag |= MNT_NFS4ACLS;
1381 
1382 	/*
1383 	 * When doing a remount, we simply refresh our temporary properties
1384 	 * according to those options set in the current VFS options.
1385 	 */
1386 	if (vfsp->vfs_flag & MS_REMOUNT) {
1387 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
1388 
1389 		/*
1390 		 * Refresh mount options with z_teardown_lock blocking I/O while
1391 		 * the filesystem is in an inconsistent state.
1392 		 * The lock also serializes this code with filesystem
1393 		 * manipulations between entry to zfs_suspend_fs() and return
1394 		 * from zfs_resume_fs().
1395 		 */
1396 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1397 		zfs_unregister_callbacks(zfsvfs);
1398 		error = zfs_register_callbacks(vfsp);
1399 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1400 		goto out;
1401 	}
1402 
1403 	/* Initial root mount: try hard to import the requested root pool. */
1404 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
1405 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
1406 		char pname[MAXNAMELEN];
1407 
1408 		error = getpoolname(osname, pname);
1409 		if (error == 0)
1410 			error = spa_import_rootpool(pname, checkpointrewind);
1411 		if (error)
1412 			goto out;
1413 	}
1414 	DROP_GIANT();
1415 	error = zfs_domount(vfsp, osname);
1416 	PICKUP_GIANT();
1417 
1418 out:
1419 	return (error);
1420 }
1421 
1422 static int
zfs_statfs(vfs_t * vfsp,struct statfs * statp)1423 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
1424 {
1425 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1426 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1427 
1428 	statp->f_version = STATFS_VERSION;
1429 
1430 	ZFS_ENTER(zfsvfs);
1431 
1432 	dmu_objset_space(zfsvfs->z_os,
1433 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1434 
1435 	/*
1436 	 * The underlying storage pool actually uses multiple block sizes.
1437 	 * We report the fragsize as the smallest block size we support,
1438 	 * and we report our blocksize as the filesystem's maximum blocksize.
1439 	 */
1440 	statp->f_bsize = SPA_MINBLOCKSIZE;
1441 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
1442 
1443 	/*
1444 	 * The following report "total" blocks of various kinds in the
1445 	 * file system, but reported in terms of f_frsize - the
1446 	 * "fragment" size.
1447 	 */
1448 
1449 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1450 	statp->f_bfree = availbytes / statp->f_bsize;
1451 	statp->f_bavail = statp->f_bfree; /* no root reservation */
1452 
1453 	/*
1454 	 * statvfs() should really be called statufs(), because it assumes
1455 	 * static metadata.  ZFS doesn't preallocate files, so the best
1456 	 * we can do is report the max that could possibly fit in f_files,
1457 	 * and that minus the number actually used in f_ffree.
1458 	 * For f_ffree, report the smaller of the number of object available
1459 	 * and the number of blocks (each object will take at least a block).
1460 	 */
1461 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1462 	statp->f_files = statp->f_ffree + usedobjs;
1463 
1464 	/*
1465 	 * We're a zfs filesystem.
1466 	 */
1467 	strlcpy(statp->f_fstypename, "zfs",
1468 	    sizeof (statp->f_fstypename));
1469 
1470 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1471 	    sizeof (statp->f_mntfromname));
1472 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1473 	    sizeof (statp->f_mntonname));
1474 
1475 	statp->f_namemax = MAXNAMELEN - 1;
1476 
1477 	ZFS_EXIT(zfsvfs);
1478 	return (0);
1479 }
1480 
1481 static int
zfs_root(vfs_t * vfsp,int flags,vnode_t ** vpp)1482 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
1483 {
1484 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1485 	znode_t *rootzp;
1486 	int error;
1487 
1488 	ZFS_ENTER(zfsvfs);
1489 
1490 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1491 	if (error == 0)
1492 		*vpp = ZTOV(rootzp);
1493 
1494 	ZFS_EXIT(zfsvfs);
1495 
1496 	if (error == 0) {
1497 		error = vn_lock(*vpp, flags);
1498 		if (error != 0) {
1499 			VN_RELE(*vpp);
1500 			*vpp = NULL;
1501 		}
1502 	}
1503 	return (error);
1504 }
1505 
1506 /*
1507  * Teardown the zfsvfs::z_os.
1508  *
1509  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
1510  * and 'z_teardown_inactive_lock' held.
1511  */
1512 static int
zfsvfs_teardown(zfsvfs_t * zfsvfs,boolean_t unmounting)1513 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1514 {
1515 	znode_t	*zp;
1516 	dsl_dir_t *dd;
1517 
1518 	/*
1519 	 * If someone has not already unmounted this file system,
1520 	 * drain the zrele_taskq to ensure all active references to the
1521 	 * zfsvfs_t have been handled only then can it be safely destroyed.
1522 	 */
1523 	if (zfsvfs->z_os) {
1524 		/*
1525 		 * If we're unmounting we have to wait for the list to
1526 		 * drain completely.
1527 		 *
1528 		 * If we're not unmounting there's no guarantee the list
1529 		 * will drain completely, but zreles run from the taskq
1530 		 * may add the parents of dir-based xattrs to the taskq
1531 		 * so we want to wait for these.
1532 		 *
1533 		 * We can safely read z_nr_znodes without locking because the
1534 		 * VFS has already blocked operations which add to the
1535 		 * z_all_znodes list and thus increment z_nr_znodes.
1536 		 */
1537 		int round = 0;
1538 		while (zfsvfs->z_nr_znodes > 0) {
1539 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
1540 			    dmu_objset_pool(zfsvfs->z_os)), 0);
1541 			if (++round > 1 && !unmounting)
1542 				break;
1543 		}
1544 	}
1545 	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1546 
1547 	if (!unmounting) {
1548 		/*
1549 		 * We purge the parent filesystem's vfsp as the parent
1550 		 * filesystem and all of its snapshots have their vnode's
1551 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1552 		 * 'z_parent' is self referential for non-snapshots.
1553 		 */
1554 #ifdef FREEBSD_NAMECACHE
1555 #if __FreeBSD_version >= 1300117
1556 		cache_purgevfs(zfsvfs->z_parent->z_vfs);
1557 #else
1558 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
1559 #endif
1560 #endif
1561 	}
1562 
1563 	/*
1564 	 * Close the zil. NB: Can't close the zil while zfs_inactive
1565 	 * threads are blocked as zil_close can call zfs_inactive.
1566 	 */
1567 	if (zfsvfs->z_log) {
1568 		zil_close(zfsvfs->z_log);
1569 		zfsvfs->z_log = NULL;
1570 	}
1571 
1572 	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
1573 
1574 	/*
1575 	 * If we are not unmounting (ie: online recv) and someone already
1576 	 * unmounted this file system while we were doing the switcheroo,
1577 	 * or a reopen of z_os failed then just bail out now.
1578 	 */
1579 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1580 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1581 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1582 		return (SET_ERROR(EIO));
1583 	}
1584 
1585 	/*
1586 	 * At this point there are no vops active, and any new vops will
1587 	 * fail with EIO since we have z_teardown_lock for writer (only
1588 	 * relevant for forced unmount).
1589 	 *
1590 	 * Release all holds on dbufs.
1591 	 */
1592 	mutex_enter(&zfsvfs->z_znodes_lock);
1593 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1594 	    zp = list_next(&zfsvfs->z_all_znodes, zp))
1595 		if (zp->z_sa_hdl) {
1596 			ASSERT(ZTOV(zp)->v_count >= 0);
1597 			zfs_znode_dmu_fini(zp);
1598 		}
1599 	mutex_exit(&zfsvfs->z_znodes_lock);
1600 
1601 	/*
1602 	 * If we are unmounting, set the unmounted flag and let new vops
1603 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1604 	 * other vops will fail with EIO.
1605 	 */
1606 	if (unmounting) {
1607 		zfsvfs->z_unmounted = B_TRUE;
1608 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1609 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1610 	}
1611 
1612 	/*
1613 	 * z_os will be NULL if there was an error in attempting to reopen
1614 	 * zfsvfs, so just return as the properties had already been
1615 	 * unregistered and cached data had been evicted before.
1616 	 */
1617 	if (zfsvfs->z_os == NULL)
1618 		return (0);
1619 
1620 	/*
1621 	 * Unregister properties.
1622 	 */
1623 	zfs_unregister_callbacks(zfsvfs);
1624 
1625 	/*
1626 	 * Evict cached data
1627 	 */
1628 	if (!zfs_is_readonly(zfsvfs))
1629 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1630 	dmu_objset_evict_dbufs(zfsvfs->z_os);
1631 	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1632 	dsl_dir_cancel_waiters(dd);
1633 
1634 	return (0);
1635 }
1636 
1637 /*ARGSUSED*/
1638 static int
zfs_umount(vfs_t * vfsp,int fflag)1639 zfs_umount(vfs_t *vfsp, int fflag)
1640 {
1641 	kthread_t *td = curthread;
1642 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1643 	objset_t *os;
1644 	cred_t *cr = td->td_ucred;
1645 	int ret;
1646 
1647 	ret = secpolicy_fs_unmount(cr, vfsp);
1648 	if (ret) {
1649 		if (dsl_deleg_access((char *)vfsp->vfs_resource,
1650 		    ZFS_DELEG_PERM_MOUNT, cr))
1651 			return (ret);
1652 	}
1653 
1654 	/*
1655 	 * Unmount any snapshots mounted under .zfs before unmounting the
1656 	 * dataset itself.
1657 	 */
1658 	if (zfsvfs->z_ctldir != NULL) {
1659 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1660 			return (ret);
1661 	}
1662 
1663 	if (fflag & MS_FORCE) {
1664 		/*
1665 		 * Mark file system as unmounted before calling
1666 		 * vflush(FORCECLOSE). This way we ensure no future vnops
1667 		 * will be called and risk operating on DOOMED vnodes.
1668 		 */
1669 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1670 		zfsvfs->z_unmounted = B_TRUE;
1671 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1672 	}
1673 
1674 	/*
1675 	 * Flush all the files.
1676 	 */
1677 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
1678 	if (ret != 0)
1679 		return (ret);
1680 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
1681 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
1682 		taskqueue_drain(zfsvfs_taskq->tq_queue,
1683 		    &zfsvfs->z_unlinked_drain_task);
1684 
1685 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1686 	os = zfsvfs->z_os;
1687 
1688 	/*
1689 	 * z_os will be NULL if there was an error in
1690 	 * attempting to reopen zfsvfs.
1691 	 */
1692 	if (os != NULL) {
1693 		/*
1694 		 * Unset the objset user_ptr.
1695 		 */
1696 		mutex_enter(&os->os_user_ptr_lock);
1697 		dmu_objset_set_user(os, NULL);
1698 		mutex_exit(&os->os_user_ptr_lock);
1699 
1700 		/*
1701 		 * Finally release the objset
1702 		 */
1703 		dmu_objset_disown(os, B_TRUE, zfsvfs);
1704 	}
1705 
1706 	/*
1707 	 * We can now safely destroy the '.zfs' directory node.
1708 	 */
1709 	if (zfsvfs->z_ctldir != NULL)
1710 		zfsctl_destroy(zfsvfs);
1711 	zfs_freevfs(vfsp);
1712 
1713 	return (0);
1714 }
1715 
1716 static int
zfs_vget(vfs_t * vfsp,ino_t ino,int flags,vnode_t ** vpp)1717 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
1718 {
1719 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1720 	znode_t		*zp;
1721 	int 		err;
1722 
1723 	/*
1724 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
1725 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
1726 	 * This will make NFS to switch to LOOKUP instead of using VGET.
1727 	 */
1728 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
1729 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
1730 		return (EOPNOTSUPP);
1731 
1732 	ZFS_ENTER(zfsvfs);
1733 	err = zfs_zget(zfsvfs, ino, &zp);
1734 	if (err == 0 && zp->z_unlinked) {
1735 		vrele(ZTOV(zp));
1736 		err = EINVAL;
1737 	}
1738 	if (err == 0)
1739 		*vpp = ZTOV(zp);
1740 	ZFS_EXIT(zfsvfs);
1741 	if (err == 0) {
1742 		err = vn_lock(*vpp, flags);
1743 		if (err != 0)
1744 			vrele(*vpp);
1745 	}
1746 	if (err != 0)
1747 		*vpp = NULL;
1748 	return (err);
1749 }
1750 
1751 static int
1752 #if __FreeBSD_version >= 1300098
zfs_checkexp(vfs_t * vfsp,struct sockaddr * nam,uint64_t * extflagsp,struct ucred ** credanonp,int * numsecflavors,int * secflavors)1753 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
1754     struct ucred **credanonp, int *numsecflavors, int *secflavors)
1755 #else
1756 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
1757     struct ucred **credanonp, int *numsecflavors, int **secflavors)
1758 #endif
1759 {
1760 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1761 
1762 	/*
1763 	 * If this is regular file system vfsp is the same as
1764 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
1765 	 * zfsvfs->z_parent->z_vfs represents parent file system
1766 	 * which we have to use here, because only this file system
1767 	 * has mnt_export configured.
1768 	 */
1769 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
1770 	    credanonp, numsecflavors, secflavors));
1771 }
1772 
1773 CTASSERT(SHORT_FID_LEN <= sizeof (struct fid));
1774 CTASSERT(LONG_FID_LEN <= sizeof (struct fid));
1775 
1776 static int
zfs_fhtovp(vfs_t * vfsp,fid_t * fidp,int flags,vnode_t ** vpp)1777 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
1778 {
1779 	struct componentname cn;
1780 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1781 	znode_t		*zp;
1782 	vnode_t		*dvp;
1783 	uint64_t	object = 0;
1784 	uint64_t	fid_gen = 0;
1785 	uint64_t	gen_mask;
1786 	uint64_t	zp_gen;
1787 	int 		i, err;
1788 
1789 	*vpp = NULL;
1790 
1791 	ZFS_ENTER(zfsvfs);
1792 
1793 	/*
1794 	 * On FreeBSD we can get snapshot's mount point or its parent file
1795 	 * system mount point depending if snapshot is already mounted or not.
1796 	 */
1797 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
1798 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
1799 		uint64_t	objsetid = 0;
1800 		uint64_t	setgen = 0;
1801 
1802 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1803 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1804 
1805 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1806 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1807 
1808 		ZFS_EXIT(zfsvfs);
1809 
1810 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1811 		if (err)
1812 			return (SET_ERROR(EINVAL));
1813 		ZFS_ENTER(zfsvfs);
1814 	}
1815 
1816 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1817 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
1818 
1819 		for (i = 0; i < sizeof (zfid->zf_object); i++)
1820 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1821 
1822 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
1823 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1824 	} else {
1825 		ZFS_EXIT(zfsvfs);
1826 		return (SET_ERROR(EINVAL));
1827 	}
1828 
1829 	/*
1830 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
1831 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
1832 	 * we are in the .zfs/shares directory tree.
1833 	 */
1834 	if ((fid_gen == 0 &&
1835 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
1836 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
1837 		ZFS_EXIT(zfsvfs);
1838 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
1839 		if (object == ZFSCTL_INO_SNAPDIR) {
1840 			cn.cn_nameptr = "snapshot";
1841 			cn.cn_namelen = strlen(cn.cn_nameptr);
1842 			cn.cn_nameiop = LOOKUP;
1843 			cn.cn_flags = ISLASTCN | LOCKLEAF;
1844 			cn.cn_lkflags = flags;
1845 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1846 			vput(dvp);
1847 		} else if (object == zfsvfs->z_shares_dir) {
1848 			/*
1849 			 * XXX This branch must not be taken,
1850 			 * if it is, then the lookup below will
1851 			 * explode.
1852 			 */
1853 			cn.cn_nameptr = "shares";
1854 			cn.cn_namelen = strlen(cn.cn_nameptr);
1855 			cn.cn_nameiop = LOOKUP;
1856 			cn.cn_flags = ISLASTCN;
1857 			cn.cn_lkflags = flags;
1858 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1859 			vput(dvp);
1860 		} else {
1861 			*vpp = dvp;
1862 		}
1863 		return (err);
1864 	}
1865 
1866 	gen_mask = -1ULL >> (64 - 8 * i);
1867 
1868 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
1869 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
1870 		ZFS_EXIT(zfsvfs);
1871 		return (err);
1872 	}
1873 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1874 	    sizeof (uint64_t));
1875 	zp_gen = zp_gen & gen_mask;
1876 	if (zp_gen == 0)
1877 		zp_gen = 1;
1878 	if (zp->z_unlinked || zp_gen != fid_gen) {
1879 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
1880 		vrele(ZTOV(zp));
1881 		ZFS_EXIT(zfsvfs);
1882 		return (SET_ERROR(EINVAL));
1883 	}
1884 
1885 	*vpp = ZTOV(zp);
1886 	ZFS_EXIT(zfsvfs);
1887 	err = vn_lock(*vpp, flags);
1888 	if (err == 0)
1889 		vnode_create_vobject(*vpp, zp->z_size, curthread);
1890 	else
1891 		*vpp = NULL;
1892 	return (err);
1893 }
1894 
1895 /*
1896  * Block out VOPs and close zfsvfs_t::z_os
1897  *
1898  * Note, if successful, then we return with the 'z_teardown_lock' and
1899  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
1900  * dataset and objset intact so that they can be atomically handed off during
1901  * a subsequent rollback or recv operation and the resume thereafter.
1902  */
1903 int
zfs_suspend_fs(zfsvfs_t * zfsvfs)1904 zfs_suspend_fs(zfsvfs_t *zfsvfs)
1905 {
1906 	int error;
1907 
1908 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1909 		return (error);
1910 
1911 	return (0);
1912 }
1913 
1914 /*
1915  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
1916  * is an invariant across any of the operations that can be performed while the
1917  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
1918  * are the same: the relevant objset and associated dataset are owned by
1919  * zfsvfs, held, and long held on entry.
1920  */
1921 int
zfs_resume_fs(zfsvfs_t * zfsvfs,dsl_dataset_t * ds)1922 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
1923 {
1924 	int err;
1925 	znode_t *zp;
1926 
1927 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
1928 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
1929 
1930 	/*
1931 	 * We already own this, so just update the objset_t, as the one we
1932 	 * had before may have been evicted.
1933 	 */
1934 	objset_t *os;
1935 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
1936 	VERIFY(dsl_dataset_long_held(ds));
1937 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
1938 	dsl_pool_config_enter(dp, FTAG);
1939 	VERIFY0(dmu_objset_from_ds(ds, &os));
1940 	dsl_pool_config_exit(dp, FTAG);
1941 
1942 	err = zfsvfs_init(zfsvfs, os);
1943 	if (err != 0)
1944 		goto bail;
1945 
1946 	ds->ds_dir->dd_activity_cancelled = B_FALSE;
1947 	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
1948 
1949 	zfs_set_fuid_feature(zfsvfs);
1950 
1951 	/*
1952 	 * Attempt to re-establish all the active znodes with
1953 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
1954 	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1955 	 * when they try to use their znode.
1956 	 */
1957 	mutex_enter(&zfsvfs->z_znodes_lock);
1958 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1959 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1960 		(void) zfs_rezget(zp);
1961 	}
1962 	mutex_exit(&zfsvfs->z_znodes_lock);
1963 
1964 bail:
1965 	/* release the VOPs */
1966 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1967 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1968 
1969 	if (err) {
1970 		/*
1971 		 * Since we couldn't setup the sa framework, try to force
1972 		 * unmount this file system.
1973 		 */
1974 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
1975 			vfs_ref(zfsvfs->z_vfs);
1976 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
1977 		}
1978 	}
1979 	return (err);
1980 }
1981 
1982 static void
zfs_freevfs(vfs_t * vfsp)1983 zfs_freevfs(vfs_t *vfsp)
1984 {
1985 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1986 
1987 	zfsvfs_free(zfsvfs);
1988 
1989 	atomic_dec_32(&zfs_active_fs_count);
1990 }
1991 
1992 #ifdef __i386__
1993 static int desiredvnodes_backup;
1994 #include <sys/vmmeter.h>
1995 
1996 
1997 #include <vm/vm_page.h>
1998 #include <vm/vm_object.h>
1999 #include <vm/vm_kern.h>
2000 #include <vm/vm_map.h>
2001 #endif
2002 
2003 static void
zfs_vnodes_adjust(void)2004 zfs_vnodes_adjust(void)
2005 {
2006 #ifdef __i386__
2007 	int newdesiredvnodes;
2008 
2009 	desiredvnodes_backup = desiredvnodes;
2010 
2011 	/*
2012 	 * We calculate newdesiredvnodes the same way it is done in
2013 	 * vntblinit(). If it is equal to desiredvnodes, it means that
2014 	 * it wasn't tuned by the administrator and we can tune it down.
2015 	 */
2016 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
2017 	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
2018 	    sizeof (struct vnode))));
2019 	if (newdesiredvnodes == desiredvnodes)
2020 		desiredvnodes = (3 * newdesiredvnodes) / 4;
2021 #endif
2022 }
2023 
2024 static void
zfs_vnodes_adjust_back(void)2025 zfs_vnodes_adjust_back(void)
2026 {
2027 
2028 #ifdef __i386__
2029 	desiredvnodes = desiredvnodes_backup;
2030 #endif
2031 }
2032 
2033 void
zfs_init(void)2034 zfs_init(void)
2035 {
2036 
2037 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
2038 
2039 	/*
2040 	 * Initialize .zfs directory structures
2041 	 */
2042 	zfsctl_init();
2043 
2044 	/*
2045 	 * Initialize znode cache, vnode ops, etc...
2046 	 */
2047 	zfs_znode_init();
2048 
2049 	/*
2050 	 * Reduce number of vnodes. Originally number of vnodes is calculated
2051 	 * with UFS inode in mind. We reduce it here, because it's too big for
2052 	 * ZFS/i386.
2053 	 */
2054 	zfs_vnodes_adjust();
2055 
2056 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
2057 
2058 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
2059 }
2060 
2061 void
zfs_fini(void)2062 zfs_fini(void)
2063 {
2064 	taskq_destroy(zfsvfs_taskq);
2065 	zfsctl_fini();
2066 	zfs_znode_fini();
2067 	zfs_vnodes_adjust_back();
2068 }
2069 
2070 int
zfs_busy(void)2071 zfs_busy(void)
2072 {
2073 	return (zfs_active_fs_count != 0);
2074 }
2075 
2076 /*
2077  * Release VOPs and unmount a suspended filesystem.
2078  */
2079 int
zfs_end_fs(zfsvfs_t * zfsvfs,dsl_dataset_t * ds)2080 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2081 {
2082 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
2083 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
2084 
2085 	/*
2086 	 * We already own this, so just hold and rele it to update the
2087 	 * objset_t, as the one we had before may have been evicted.
2088 	 */
2089 	objset_t *os;
2090 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
2091 	VERIFY(dsl_dataset_long_held(ds));
2092 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
2093 	dsl_pool_config_enter(dp, FTAG);
2094 	VERIFY0(dmu_objset_from_ds(ds, &os));
2095 	dsl_pool_config_exit(dp, FTAG);
2096 	zfsvfs->z_os = os;
2097 
2098 	/* release the VOPs */
2099 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
2100 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
2101 
2102 	/*
2103 	 * Try to force unmount this file system.
2104 	 */
2105 	(void) zfs_umount(zfsvfs->z_vfs, 0);
2106 	zfsvfs->z_unmounted = B_TRUE;
2107 	return (0);
2108 }
2109 
2110 int
zfs_set_version(zfsvfs_t * zfsvfs,uint64_t newvers)2111 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2112 {
2113 	int error;
2114 	objset_t *os = zfsvfs->z_os;
2115 	dmu_tx_t *tx;
2116 
2117 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2118 		return (SET_ERROR(EINVAL));
2119 
2120 	if (newvers < zfsvfs->z_version)
2121 		return (SET_ERROR(EINVAL));
2122 
2123 	if (zfs_spa_version_map(newvers) >
2124 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
2125 		return (SET_ERROR(ENOTSUP));
2126 
2127 	tx = dmu_tx_create(os);
2128 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2129 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2130 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2131 		    ZFS_SA_ATTRS);
2132 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2133 	}
2134 	error = dmu_tx_assign(tx, TXG_WAIT);
2135 	if (error) {
2136 		dmu_tx_abort(tx);
2137 		return (error);
2138 	}
2139 
2140 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2141 	    8, 1, &newvers, tx);
2142 
2143 	if (error) {
2144 		dmu_tx_commit(tx);
2145 		return (error);
2146 	}
2147 
2148 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2149 		uint64_t sa_obj;
2150 
2151 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2152 		    SPA_VERSION_SA);
2153 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2154 		    DMU_OT_NONE, 0, tx);
2155 
2156 		error = zap_add(os, MASTER_NODE_OBJ,
2157 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2158 		ASSERT0(error);
2159 
2160 		VERIFY(0 == sa_set_sa_object(os, sa_obj));
2161 		sa_register_update_callback(os, zfs_sa_upgrade);
2162 	}
2163 
2164 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2165 	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
2166 	    (uintmax_t)newvers);
2167 	dmu_tx_commit(tx);
2168 
2169 	zfsvfs->z_version = newvers;
2170 	os->os_version = newvers;
2171 
2172 	zfs_set_fuid_feature(zfsvfs);
2173 
2174 	return (0);
2175 }
2176 
2177 /*
2178  * Read a property stored within the master node.
2179  */
2180 int
zfs_get_zplprop(objset_t * os,zfs_prop_t prop,uint64_t * value)2181 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2182 {
2183 	uint64_t *cached_copy = NULL;
2184 
2185 	/*
2186 	 * Figure out where in the objset_t the cached copy would live, if it
2187 	 * is available for the requested property.
2188 	 */
2189 	if (os != NULL) {
2190 		switch (prop) {
2191 		case ZFS_PROP_VERSION:
2192 			cached_copy = &os->os_version;
2193 			break;
2194 		case ZFS_PROP_NORMALIZE:
2195 			cached_copy = &os->os_normalization;
2196 			break;
2197 		case ZFS_PROP_UTF8ONLY:
2198 			cached_copy = &os->os_utf8only;
2199 			break;
2200 		case ZFS_PROP_CASE:
2201 			cached_copy = &os->os_casesensitivity;
2202 			break;
2203 		default:
2204 			break;
2205 		}
2206 	}
2207 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
2208 		*value = *cached_copy;
2209 		return (0);
2210 	}
2211 
2212 	/*
2213 	 * If the property wasn't cached, look up the file system's value for
2214 	 * the property. For the version property, we look up a slightly
2215 	 * different string.
2216 	 */
2217 	const char *pname;
2218 	int error = ENOENT;
2219 	if (prop == ZFS_PROP_VERSION) {
2220 		pname = ZPL_VERSION_STR;
2221 	} else {
2222 		pname = zfs_prop_to_name(prop);
2223 	}
2224 
2225 	if (os != NULL) {
2226 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
2227 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2228 	}
2229 
2230 	if (error == ENOENT) {
2231 		/* No value set, use the default value */
2232 		switch (prop) {
2233 		case ZFS_PROP_VERSION:
2234 			*value = ZPL_VERSION;
2235 			break;
2236 		case ZFS_PROP_NORMALIZE:
2237 		case ZFS_PROP_UTF8ONLY:
2238 			*value = 0;
2239 			break;
2240 		case ZFS_PROP_CASE:
2241 			*value = ZFS_CASE_SENSITIVE;
2242 			break;
2243 		case ZFS_PROP_ACLTYPE:
2244 			*value = ZFS_ACLTYPE_NFSV4;
2245 			break;
2246 		default:
2247 			return (error);
2248 		}
2249 		error = 0;
2250 	}
2251 
2252 	/*
2253 	 * If one of the methods for getting the property value above worked,
2254 	 * copy it into the objset_t's cache.
2255 	 */
2256 	if (error == 0 && cached_copy != NULL) {
2257 		*cached_copy = *value;
2258 	}
2259 
2260 	return (error);
2261 }
2262 
2263 /*
2264  * Return true if the corresponding vfs's unmounted flag is set.
2265  * Otherwise return false.
2266  * If this function returns true we know VFS unmount has been initiated.
2267  */
2268 boolean_t
zfs_get_vfs_flag_unmounted(objset_t * os)2269 zfs_get_vfs_flag_unmounted(objset_t *os)
2270 {
2271 	zfsvfs_t *zfvp;
2272 	boolean_t unmounted = B_FALSE;
2273 
2274 	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
2275 
2276 	mutex_enter(&os->os_user_ptr_lock);
2277 	zfvp = dmu_objset_get_user(os);
2278 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
2279 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
2280 		unmounted = B_TRUE;
2281 	mutex_exit(&os->os_user_ptr_lock);
2282 
2283 	return (unmounted);
2284 }
2285 
2286 #ifdef _KERNEL
2287 void
zfsvfs_update_fromname(const char * oldname,const char * newname)2288 zfsvfs_update_fromname(const char *oldname, const char *newname)
2289 {
2290 	char tmpbuf[MAXPATHLEN];
2291 	struct mount *mp;
2292 	char *fromname;
2293 	size_t oldlen;
2294 
2295 	oldlen = strlen(oldname);
2296 
2297 	mtx_lock(&mountlist_mtx);
2298 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2299 		fromname = mp->mnt_stat.f_mntfromname;
2300 		if (strcmp(fromname, oldname) == 0) {
2301 			(void) strlcpy(fromname, newname,
2302 			    sizeof (mp->mnt_stat.f_mntfromname));
2303 			continue;
2304 		}
2305 		if (strncmp(fromname, oldname, oldlen) == 0 &&
2306 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
2307 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
2308 			    newname, fromname + oldlen);
2309 			(void) strlcpy(fromname, tmpbuf,
2310 			    sizeof (mp->mnt_stat.f_mntfromname));
2311 			continue;
2312 		}
2313 	}
2314 	mtx_unlock(&mountlist_mtx);
2315 }
2316 #endif
2317