1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/time.h>
36 #include <sys/systm.h>
37 #include <sys/sysmacros.h>
38 #include <sys/resource.h>
39 #include <sys/vfs.h>
40 #include <sys/endian.h>
41 #include <sys/vm.h>
42 #include <sys/vnode.h>
43 #if __FreeBSD_version >= 1300102
44 #include <sys/smr.h>
45 #endif
46 #include <sys/dirent.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/kmem.h>
50 #include <sys/taskq.h>
51 #include <sys/uio.h>
52 #include <sys/atomic.h>
53 #include <sys/namei.h>
54 #include <sys/mman.h>
55 #include <sys/cmn_err.h>
56 #include <sys/kdb.h>
57 #include <sys/sysproto.h>
58 #include <sys/errno.h>
59 #include <sys/unistd.h>
60 #include <sys/zfs_dir.h>
61 #include <sys/zfs_ioctl.h>
62 #include <sys/fs/zfs.h>
63 #include <sys/dmu.h>
64 #include <sys/dmu_objset.h>
65 #include <sys/spa.h>
66 #include <sys/txg.h>
67 #include <sys/dbuf.h>
68 #include <sys/zap.h>
69 #include <sys/sa.h>
70 #include <sys/policy.h>
71 #include <sys/sunddi.h>
72 #include <sys/filio.h>
73 #include <sys/sid.h>
74 #include <sys/zfs_ctldir.h>
75 #include <sys/zfs_fuid.h>
76 #include <sys/zfs_quota.h>
77 #include <sys/zfs_sa.h>
78 #include <sys/zfs_rlock.h>
79 #include <sys/extdirent.h>
80 #include <sys/bio.h>
81 #include <sys/buf.h>
82 #include <sys/sched.h>
83 #include <sys/acl.h>
84 #include <sys/vmmeter.h>
85 #include <vm/vm_param.h>
86 #include <sys/zil.h>
87 #include <sys/zfs_vnops.h>
88
89 #include <vm/vm_object.h>
90
91 #include <sys/extattr.h>
92 #include <sys/priv.h>
93
94 #ifndef VN_OPEN_INVFS
95 #define VN_OPEN_INVFS 0x0
96 #endif
97
98 VFS_SMR_DECLARE;
99
100 #if __FreeBSD_version >= 1300047
101 #define vm_page_wire_lock(pp)
102 #define vm_page_wire_unlock(pp)
103 #else
104 #define vm_page_wire_lock(pp) vm_page_lock(pp)
105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp)
106 #endif
107
108 #ifdef DEBUG_VFS_LOCKS
109 #define VNCHECKREF(vp) \
110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
111 ("%s: wrong ref counts", __func__));
112 #else
113 #define VNCHECKREF(vp)
114 #endif
115
116 #if __FreeBSD_version >= 1400045
117 typedef uint64_t cookie_t;
118 #else
119 typedef ulong_t cookie_t;
120 #endif
121
122 /*
123 * Programming rules.
124 *
125 * Each vnode op performs some logical unit of work. To do this, the ZPL must
126 * properly lock its in-core state, create a DMU transaction, do the work,
127 * record this work in the intent log (ZIL), commit the DMU transaction,
128 * and wait for the intent log to commit if it is a synchronous operation.
129 * Moreover, the vnode ops must work in both normal and log replay context.
130 * The ordering of events is important to avoid deadlocks and references
131 * to freed memory. The example below illustrates the following Big Rules:
132 *
133 * (1) A check must be made in each zfs thread for a mounted file system.
134 * This is done avoiding races using ZFS_ENTER(zfsvfs).
135 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
136 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
137 * can return EIO from the calling function.
138 *
139 * (2) VN_RELE() should always be the last thing except for zil_commit()
140 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
141 * First, if it's the last reference, the vnode/znode
142 * can be freed, so the zp may point to freed memory. Second, the last
143 * reference will call zfs_zinactive(), which may induce a lot of work --
144 * pushing cached pages (which acquires range locks) and syncing out
145 * cached atime changes. Third, zfs_zinactive() may require a new tx,
146 * which could deadlock the system if you were already holding one.
147 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
148 *
149 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
150 * as they can span dmu_tx_assign() calls.
151 *
152 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
153 * dmu_tx_assign(). This is critical because we don't want to block
154 * while holding locks.
155 *
156 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
157 * reduces lock contention and CPU usage when we must wait (note that if
158 * throughput is constrained by the storage, nearly every transaction
159 * must wait).
160 *
161 * Note, in particular, that if a lock is sometimes acquired before
162 * the tx assigns, and sometimes after (e.g. z_lock), then failing
163 * to use a non-blocking assign can deadlock the system. The scenario:
164 *
165 * Thread A has grabbed a lock before calling dmu_tx_assign().
166 * Thread B is in an already-assigned tx, and blocks for this lock.
167 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
168 * forever, because the previous txg can't quiesce until B's tx commits.
169 *
170 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
171 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
172 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
173 * to indicate that this operation has already called dmu_tx_wait().
174 * This will ensure that we don't retry forever, waiting a short bit
175 * each time.
176 *
177 * (5) If the operation succeeded, generate the intent log entry for it
178 * before dropping locks. This ensures that the ordering of events
179 * in the intent log matches the order in which they actually occurred.
180 * During ZIL replay the zfs_log_* functions will update the sequence
181 * number to indicate the zil transaction has replayed.
182 *
183 * (6) At the end of each vnode op, the DMU tx must always commit,
184 * regardless of whether there were any errors.
185 *
186 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
187 * to ensure that synchronous semantics are provided when necessary.
188 *
189 * In general, this is how things should be ordered in each vnode op:
190 *
191 * ZFS_ENTER(zfsvfs); // exit if unmounted
192 * top:
193 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
194 * rw_enter(...); // grab any other locks you need
195 * tx = dmu_tx_create(...); // get DMU tx
196 * dmu_tx_hold_*(); // hold each object you might modify
197 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
198 * if (error) {
199 * rw_exit(...); // drop locks
200 * zfs_dirent_unlock(dl); // unlock directory entry
201 * VN_RELE(...); // release held vnodes
202 * if (error == ERESTART) {
203 * waited = B_TRUE;
204 * dmu_tx_wait(tx);
205 * dmu_tx_abort(tx);
206 * goto top;
207 * }
208 * dmu_tx_abort(tx); // abort DMU tx
209 * ZFS_EXIT(zfsvfs); // finished in zfs
210 * return (error); // really out of space
211 * }
212 * error = do_real_work(); // do whatever this VOP does
213 * if (error == 0)
214 * zfs_log_*(...); // on success, make ZIL entry
215 * dmu_tx_commit(tx); // commit DMU tx -- error or not
216 * rw_exit(...); // drop locks
217 * zfs_dirent_unlock(dl); // unlock directory entry
218 * VN_RELE(...); // release held vnodes
219 * zil_commit(zilog, foid); // synchronous when necessary
220 * ZFS_EXIT(zfsvfs); // finished in zfs
221 * return (error); // done, report error
222 */
223
224 /* ARGSUSED */
225 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)226 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
227 {
228 znode_t *zp = VTOZ(*vpp);
229 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
230
231 ZFS_ENTER(zfsvfs);
232 ZFS_VERIFY_ZP(zp);
233
234 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
235 ((flag & FAPPEND) == 0)) {
236 ZFS_EXIT(zfsvfs);
237 return (SET_ERROR(EPERM));
238 }
239
240 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
241 ZTOV(zp)->v_type == VREG &&
242 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
243 if (fs_vscan(*vpp, cr, 0) != 0) {
244 ZFS_EXIT(zfsvfs);
245 return (SET_ERROR(EACCES));
246 }
247 }
248
249 /* Keep a count of the synchronous opens in the znode */
250 if (flag & (FSYNC | FDSYNC))
251 atomic_inc_32(&zp->z_sync_cnt);
252
253 ZFS_EXIT(zfsvfs);
254 return (0);
255 }
256
257 /* ARGSUSED */
258 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)259 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
260 {
261 znode_t *zp = VTOZ(vp);
262 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
263
264 ZFS_ENTER(zfsvfs);
265 ZFS_VERIFY_ZP(zp);
266
267 /* Decrement the synchronous opens in the znode */
268 if ((flag & (FSYNC | FDSYNC)) && (count == 1))
269 atomic_dec_32(&zp->z_sync_cnt);
270
271 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
272 ZTOV(zp)->v_type == VREG &&
273 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
274 VERIFY0(fs_vscan(vp, cr, 1));
275
276 ZFS_EXIT(zfsvfs);
277 return (0);
278 }
279
280 /* ARGSUSED */
281 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)282 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
283 int *rvalp)
284 {
285 loff_t off;
286 int error;
287
288 switch (com) {
289 case _FIOFFS:
290 {
291 return (0);
292
293 /*
294 * The following two ioctls are used by bfu. Faking out,
295 * necessary to avoid bfu errors.
296 */
297 }
298 case _FIOGDIO:
299 case _FIOSDIO:
300 {
301 return (0);
302 }
303
304 case F_SEEK_DATA:
305 case F_SEEK_HOLE:
306 {
307 off = *(offset_t *)data;
308 /* offset parameter is in/out */
309 error = zfs_holey(VTOZ(vp), com, &off);
310 if (error)
311 return (error);
312 *(offset_t *)data = off;
313 return (0);
314 }
315 }
316 return (SET_ERROR(ENOTTY));
317 }
318
319 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)320 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
321 {
322 vm_object_t obj;
323 vm_page_t pp;
324 int64_t end;
325
326 /*
327 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
328 * aligned boundaries, if the range is not aligned. As a result a
329 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
330 * It may happen that all DEV_BSIZE subranges are marked clean and thus
331 * the whole page would be considered clean despite have some
332 * dirty data.
333 * For this reason we should shrink the range to DEV_BSIZE aligned
334 * boundaries before calling vm_page_clear_dirty.
335 */
336 end = rounddown2(off + nbytes, DEV_BSIZE);
337 off = roundup2(off, DEV_BSIZE);
338 nbytes = end - off;
339
340 obj = vp->v_object;
341 zfs_vmobject_assert_wlocked_12(obj);
342 #if __FreeBSD_version < 1300050
343 for (;;) {
344 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
345 pp->valid) {
346 if (vm_page_xbusied(pp)) {
347 /*
348 * Reference the page before unlocking and
349 * sleeping so that the page daemon is less
350 * likely to reclaim it.
351 */
352 vm_page_reference(pp);
353 vm_page_lock(pp);
354 zfs_vmobject_wunlock(obj);
355 vm_page_busy_sleep(pp, "zfsmwb", true);
356 zfs_vmobject_wlock(obj);
357 continue;
358 }
359 vm_page_sbusy(pp);
360 } else if (pp != NULL) {
361 ASSERT(!pp->valid);
362 pp = NULL;
363 }
364 if (pp != NULL) {
365 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
366 vm_object_pip_add(obj, 1);
367 pmap_remove_write(pp);
368 if (nbytes != 0)
369 vm_page_clear_dirty(pp, off, nbytes);
370 }
371 break;
372 }
373 #else
374 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
375 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
376 VM_ALLOC_IGN_SBUSY);
377 if (pp != NULL) {
378 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
379 vm_object_pip_add(obj, 1);
380 pmap_remove_write(pp);
381 if (nbytes != 0)
382 vm_page_clear_dirty(pp, off, nbytes);
383 }
384 #endif
385 return (pp);
386 }
387
388 static void
page_unbusy(vm_page_t pp)389 page_unbusy(vm_page_t pp)
390 {
391
392 vm_page_sunbusy(pp);
393 #if __FreeBSD_version >= 1300041
394 vm_object_pip_wakeup(pp->object);
395 #else
396 vm_object_pip_subtract(pp->object, 1);
397 #endif
398 }
399
400 #if __FreeBSD_version > 1300051
401 static vm_page_t
page_hold(vnode_t * vp,int64_t start)402 page_hold(vnode_t *vp, int64_t start)
403 {
404 vm_object_t obj;
405 vm_page_t m;
406
407 obj = vp->v_object;
408 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
409 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
410 VM_ALLOC_NOBUSY);
411 return (m);
412 }
413 #else
414 static vm_page_t
page_hold(vnode_t * vp,int64_t start)415 page_hold(vnode_t *vp, int64_t start)
416 {
417 vm_object_t obj;
418 vm_page_t pp;
419
420 obj = vp->v_object;
421 zfs_vmobject_assert_wlocked(obj);
422
423 for (;;) {
424 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
425 pp->valid) {
426 if (vm_page_xbusied(pp)) {
427 /*
428 * Reference the page before unlocking and
429 * sleeping so that the page daemon is less
430 * likely to reclaim it.
431 */
432 vm_page_reference(pp);
433 vm_page_lock(pp);
434 zfs_vmobject_wunlock(obj);
435 vm_page_busy_sleep(pp, "zfsmwb", true);
436 zfs_vmobject_wlock(obj);
437 continue;
438 }
439
440 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
441 vm_page_wire_lock(pp);
442 vm_page_hold(pp);
443 vm_page_wire_unlock(pp);
444
445 } else
446 pp = NULL;
447 break;
448 }
449 return (pp);
450 }
451 #endif
452
453 static void
page_unhold(vm_page_t pp)454 page_unhold(vm_page_t pp)
455 {
456
457 vm_page_wire_lock(pp);
458 #if __FreeBSD_version >= 1300035
459 vm_page_unwire(pp, PQ_ACTIVE);
460 #else
461 vm_page_unhold(pp);
462 #endif
463 vm_page_wire_unlock(pp);
464 }
465
466 /*
467 * When a file is memory mapped, we must keep the IO data synchronized
468 * between the DMU cache and the memory mapped pages. What this means:
469 *
470 * On Write: If we find a memory mapped page, we write to *both*
471 * the page and the dmu buffer.
472 */
473 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)474 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
475 {
476 vm_object_t obj;
477 struct sf_buf *sf;
478 vnode_t *vp = ZTOV(zp);
479 caddr_t va;
480 int off;
481
482 ASSERT3P(vp->v_mount, !=, NULL);
483 obj = vp->v_object;
484 ASSERT3P(obj, !=, NULL);
485
486 off = start & PAGEOFFSET;
487 zfs_vmobject_wlock_12(obj);
488 #if __FreeBSD_version >= 1300041
489 vm_object_pip_add(obj, 1);
490 #endif
491 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
492 vm_page_t pp;
493 int nbytes = imin(PAGESIZE - off, len);
494
495 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
496 zfs_vmobject_wunlock_12(obj);
497
498 va = zfs_map_page(pp, &sf);
499 (void) dmu_read(os, zp->z_id, start + off, nbytes,
500 va + off, DMU_READ_PREFETCH);
501 zfs_unmap_page(sf);
502
503 zfs_vmobject_wlock_12(obj);
504 page_unbusy(pp);
505 }
506 len -= nbytes;
507 off = 0;
508 }
509 #if __FreeBSD_version >= 1300041
510 vm_object_pip_wakeup(obj);
511 #else
512 vm_object_pip_wakeupn(obj, 0);
513 #endif
514 zfs_vmobject_wunlock_12(obj);
515 }
516
517 /*
518 * Read with UIO_NOCOPY flag means that sendfile(2) requests
519 * ZFS to populate a range of page cache pages with data.
520 *
521 * NOTE: this function could be optimized to pre-allocate
522 * all pages in advance, drain exclusive busy on all of them,
523 * map them into contiguous KVA region and populate them
524 * in one single dmu_read() call.
525 */
526 int
mappedread_sf(znode_t * zp,int nbytes,zfs_uio_t * uio)527 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
528 {
529 vnode_t *vp = ZTOV(zp);
530 objset_t *os = zp->z_zfsvfs->z_os;
531 struct sf_buf *sf;
532 vm_object_t obj;
533 vm_page_t pp;
534 int64_t start;
535 caddr_t va;
536 int len = nbytes;
537 int error = 0;
538
539 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
540 ASSERT3P(vp->v_mount, !=, NULL);
541 obj = vp->v_object;
542 ASSERT3P(obj, !=, NULL);
543 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
544
545 zfs_vmobject_wlock_12(obj);
546 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
547 int bytes = MIN(PAGESIZE, len);
548
549 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
550 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
551 if (vm_page_none_valid(pp)) {
552 zfs_vmobject_wunlock_12(obj);
553 va = zfs_map_page(pp, &sf);
554 error = dmu_read(os, zp->z_id, start, bytes, va,
555 DMU_READ_PREFETCH);
556 if (bytes != PAGESIZE && error == 0)
557 bzero(va + bytes, PAGESIZE - bytes);
558 zfs_unmap_page(sf);
559 zfs_vmobject_wlock_12(obj);
560 #if __FreeBSD_version >= 1300081
561 if (error == 0) {
562 vm_page_valid(pp);
563 vm_page_activate(pp);
564 vm_page_do_sunbusy(pp);
565 } else {
566 zfs_vmobject_wlock(obj);
567 if (!vm_page_wired(pp) && pp->valid == 0 &&
568 vm_page_busy_tryupgrade(pp))
569 vm_page_free(pp);
570 else
571 vm_page_sunbusy(pp);
572 zfs_vmobject_wunlock(obj);
573 }
574 #else
575 vm_page_do_sunbusy(pp);
576 vm_page_lock(pp);
577 if (error) {
578 if (pp->wire_count == 0 && pp->valid == 0 &&
579 !vm_page_busied(pp))
580 vm_page_free(pp);
581 } else {
582 pp->valid = VM_PAGE_BITS_ALL;
583 vm_page_activate(pp);
584 }
585 vm_page_unlock(pp);
586 #endif
587 } else {
588 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
589 vm_page_do_sunbusy(pp);
590 }
591 if (error)
592 break;
593 zfs_uio_advance(uio, bytes);
594 len -= bytes;
595 }
596 zfs_vmobject_wunlock_12(obj);
597 return (error);
598 }
599
600 /*
601 * When a file is memory mapped, we must keep the IO data synchronized
602 * between the DMU cache and the memory mapped pages. What this means:
603 *
604 * On Read: We "read" preferentially from memory mapped pages,
605 * else we default from the dmu buffer.
606 *
607 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
608 * the file is memory mapped.
609 */
610 int
mappedread(znode_t * zp,int nbytes,zfs_uio_t * uio)611 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
612 {
613 vnode_t *vp = ZTOV(zp);
614 vm_object_t obj;
615 int64_t start;
616 int len = nbytes;
617 int off;
618 int error = 0;
619
620 ASSERT3P(vp->v_mount, !=, NULL);
621 obj = vp->v_object;
622 ASSERT3P(obj, !=, NULL);
623
624 start = zfs_uio_offset(uio);
625 off = start & PAGEOFFSET;
626 zfs_vmobject_wlock_12(obj);
627 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
628 vm_page_t pp;
629 uint64_t bytes = MIN(PAGESIZE - off, len);
630
631 if ((pp = page_hold(vp, start))) {
632 struct sf_buf *sf;
633 caddr_t va;
634
635 zfs_vmobject_wunlock_12(obj);
636 va = zfs_map_page(pp, &sf);
637 error = vn_io_fault_uiomove(va + off, bytes,
638 GET_UIO_STRUCT(uio));
639 zfs_unmap_page(sf);
640 zfs_vmobject_wlock_12(obj);
641 page_unhold(pp);
642 } else {
643 zfs_vmobject_wunlock_12(obj);
644 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
645 uio, bytes);
646 zfs_vmobject_wlock_12(obj);
647 }
648 len -= bytes;
649 off = 0;
650 if (error)
651 break;
652 }
653 zfs_vmobject_wunlock_12(obj);
654 return (error);
655 }
656
657 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)658 zfs_write_simple(znode_t *zp, const void *data, size_t len,
659 loff_t pos, size_t *presid)
660 {
661 int error = 0;
662 ssize_t resid;
663
664 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
665 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
666
667 if (error) {
668 return (SET_ERROR(error));
669 } else if (presid == NULL) {
670 if (resid != 0) {
671 error = SET_ERROR(EIO);
672 }
673 } else {
674 *presid = resid;
675 }
676 return (error);
677 }
678
679 void
zfs_zrele_async(znode_t * zp)680 zfs_zrele_async(znode_t *zp)
681 {
682 vnode_t *vp = ZTOV(zp);
683 objset_t *os = ITOZSB(vp)->z_os;
684
685 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
686 }
687
688 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)689 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
690 {
691 int error;
692
693 *vpp = arg;
694 error = vn_lock(*vpp, lkflags);
695 if (error != 0)
696 vrele(*vpp);
697 return (error);
698 }
699
700 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)701 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
702 {
703 znode_t *zdp = VTOZ(dvp);
704 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
705 int error;
706 int ltype;
707
708 if (zfsvfs->z_replay == B_FALSE)
709 ASSERT_VOP_LOCKED(dvp, __func__);
710
711 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
712 ASSERT3P(dvp, ==, vp);
713 vref(dvp);
714 ltype = lkflags & LK_TYPE_MASK;
715 if (ltype != VOP_ISLOCKED(dvp)) {
716 if (ltype == LK_EXCLUSIVE)
717 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
718 else /* if (ltype == LK_SHARED) */
719 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
720
721 /*
722 * Relock for the "." case could leave us with
723 * reclaimed vnode.
724 */
725 if (VN_IS_DOOMED(dvp)) {
726 vrele(dvp);
727 return (SET_ERROR(ENOENT));
728 }
729 }
730 return (0);
731 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
732 /*
733 * Note that in this case, dvp is the child vnode, and we
734 * are looking up the parent vnode - exactly reverse from
735 * normal operation. Unlocking dvp requires some rather
736 * tricky unlock/relock dance to prevent mp from being freed;
737 * use vn_vget_ino_gen() which takes care of all that.
738 *
739 * XXX Note that there is a time window when both vnodes are
740 * unlocked. It is possible, although highly unlikely, that
741 * during that window the parent-child relationship between
742 * the vnodes may change, for example, get reversed.
743 * In that case we would have a wrong lock order for the vnodes.
744 * All other filesystems seem to ignore this problem, so we
745 * do the same here.
746 * A potential solution could be implemented as follows:
747 * - using LK_NOWAIT when locking the second vnode and retrying
748 * if necessary
749 * - checking that the parent-child relationship still holds
750 * after locking both vnodes and retrying if it doesn't
751 */
752 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
753 return (error);
754 } else {
755 error = vn_lock(vp, lkflags);
756 if (error != 0)
757 vrele(vp);
758 return (error);
759 }
760 }
761
762 /*
763 * Lookup an entry in a directory, or an extended attribute directory.
764 * If it exists, return a held vnode reference for it.
765 *
766 * IN: dvp - vnode of directory to search.
767 * nm - name of entry to lookup.
768 * pnp - full pathname to lookup [UNUSED].
769 * flags - LOOKUP_XATTR set if looking for an attribute.
770 * rdir - root directory vnode [UNUSED].
771 * cr - credentials of caller.
772 * ct - caller context
773 *
774 * OUT: vpp - vnode of located entry, NULL if not found.
775 *
776 * RETURN: 0 on success, error code on failure.
777 *
778 * Timestamps:
779 * NA
780 */
781 /* ARGSUSED */
782 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,int flags,boolean_t cached)783 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
784 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
785 boolean_t cached)
786 {
787 znode_t *zdp = VTOZ(dvp);
788 znode_t *zp;
789 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
790 #if __FreeBSD_version > 1300124
791 seqc_t dvp_seqc;
792 #endif
793 int error = 0;
794
795 /*
796 * Fast path lookup, however we must skip DNLC lookup
797 * for case folding or normalizing lookups because the
798 * DNLC code only stores the passed in name. This means
799 * creating 'a' and removing 'A' on a case insensitive
800 * file system would work, but DNLC still thinks 'a'
801 * exists and won't let you create it again on the next
802 * pass through fast path.
803 */
804 if (!(flags & LOOKUP_XATTR)) {
805 if (dvp->v_type != VDIR) {
806 return (SET_ERROR(ENOTDIR));
807 } else if (zdp->z_sa_hdl == NULL) {
808 return (SET_ERROR(EIO));
809 }
810 }
811
812 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
813 const char *, nm);
814
815 ZFS_ENTER(zfsvfs);
816 ZFS_VERIFY_ZP(zdp);
817
818 #if __FreeBSD_version > 1300124
819 dvp_seqc = vn_seqc_read_notmodify(dvp);
820 #endif
821
822 *vpp = NULL;
823
824 if (flags & LOOKUP_XATTR) {
825 /*
826 * If the xattr property is off, refuse the lookup request.
827 */
828 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
829 ZFS_EXIT(zfsvfs);
830 return (SET_ERROR(EOPNOTSUPP));
831 }
832
833 /*
834 * We don't allow recursive attributes..
835 * Maybe someday we will.
836 */
837 if (zdp->z_pflags & ZFS_XATTR) {
838 ZFS_EXIT(zfsvfs);
839 return (SET_ERROR(EINVAL));
840 }
841
842 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
843 ZFS_EXIT(zfsvfs);
844 return (error);
845 }
846 *vpp = ZTOV(zp);
847
848 /*
849 * Do we have permission to get into attribute directory?
850 */
851 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
852 if (error) {
853 vrele(ZTOV(zp));
854 }
855
856 ZFS_EXIT(zfsvfs);
857 return (error);
858 }
859
860 /*
861 * Check accessibility of directory if we're not coming in via
862 * VOP_CACHEDLOOKUP.
863 */
864 if (!cached) {
865 #ifdef NOEXECCHECK
866 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
867 cnp->cn_flags &= ~NOEXECCHECK;
868 } else
869 #endif
870 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
871 ZFS_EXIT(zfsvfs);
872 return (error);
873 }
874 }
875
876 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
877 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
878 ZFS_EXIT(zfsvfs);
879 return (SET_ERROR(EILSEQ));
880 }
881
882
883 /*
884 * First handle the special cases.
885 */
886 if ((cnp->cn_flags & ISDOTDOT) != 0) {
887 /*
888 * If we are a snapshot mounted under .zfs, return
889 * the vp for the snapshot directory.
890 */
891 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
892 struct componentname cn;
893 vnode_t *zfsctl_vp;
894 int ltype;
895
896 ZFS_EXIT(zfsvfs);
897 ltype = VOP_ISLOCKED(dvp);
898 VOP_UNLOCK1(dvp);
899 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
900 &zfsctl_vp);
901 if (error == 0) {
902 cn.cn_nameptr = "snapshot";
903 cn.cn_namelen = strlen(cn.cn_nameptr);
904 cn.cn_nameiop = cnp->cn_nameiop;
905 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
906 cn.cn_lkflags = cnp->cn_lkflags;
907 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
908 vput(zfsctl_vp);
909 }
910 vn_lock(dvp, ltype | LK_RETRY);
911 return (error);
912 }
913 }
914 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
915 ZFS_EXIT(zfsvfs);
916 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
917 return (SET_ERROR(ENOTSUP));
918 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
919 return (error);
920 }
921
922 /*
923 * The loop is retry the lookup if the parent-child relationship
924 * changes during the dot-dot locking complexities.
925 */
926 for (;;) {
927 uint64_t parent;
928
929 error = zfs_dirlook(zdp, nm, &zp);
930 if (error == 0)
931 *vpp = ZTOV(zp);
932
933 ZFS_EXIT(zfsvfs);
934 if (error != 0)
935 break;
936
937 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
938 if (error != 0) {
939 /*
940 * If we've got a locking error, then the vnode
941 * got reclaimed because of a force unmount.
942 * We never enter doomed vnodes into the name cache.
943 */
944 *vpp = NULL;
945 return (error);
946 }
947
948 if ((cnp->cn_flags & ISDOTDOT) == 0)
949 break;
950
951 ZFS_ENTER(zfsvfs);
952 if (zdp->z_sa_hdl == NULL) {
953 error = SET_ERROR(EIO);
954 } else {
955 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
956 &parent, sizeof (parent));
957 }
958 if (error != 0) {
959 ZFS_EXIT(zfsvfs);
960 vput(ZTOV(zp));
961 break;
962 }
963 if (zp->z_id == parent) {
964 ZFS_EXIT(zfsvfs);
965 break;
966 }
967 vput(ZTOV(zp));
968 }
969
970 if (error != 0)
971 *vpp = NULL;
972
973 /* Translate errors and add SAVENAME when needed. */
974 if (cnp->cn_flags & ISLASTCN) {
975 switch (nameiop) {
976 case CREATE:
977 case RENAME:
978 if (error == ENOENT) {
979 error = EJUSTRETURN;
980 cnp->cn_flags |= SAVENAME;
981 break;
982 }
983 fallthrough;
984 case DELETE:
985 if (error == 0)
986 cnp->cn_flags |= SAVENAME;
987 break;
988 }
989 }
990
991 #if __FreeBSD_version > 1300124
992 if ((cnp->cn_flags & ISDOTDOT) != 0) {
993 /*
994 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
995 * handle races. In particular different callers may end up
996 * with different vnodes and will try to add conflicting
997 * entries to the namecache.
998 *
999 * While finding different result may be acceptable in face
1000 * of concurrent modification, adding conflicting entries
1001 * trips over an assert in the namecache.
1002 *
1003 * Ultimately let an entry through once everything settles.
1004 */
1005 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
1006 cnp->cn_flags &= ~MAKEENTRY;
1007 }
1008 }
1009 #endif
1010
1011 /* Insert name into cache (as non-existent) if appropriate. */
1012 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1013 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
1014 cache_enter(dvp, NULL, cnp);
1015
1016 /* Insert name into cache if appropriate. */
1017 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1018 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
1019 if (!(cnp->cn_flags & ISLASTCN) ||
1020 (nameiop != DELETE && nameiop != RENAME)) {
1021 cache_enter(dvp, *vpp, cnp);
1022 }
1023 }
1024
1025 return (error);
1026 }
1027
1028 /*
1029 * Attempt to create a new entry in a directory. If the entry
1030 * already exists, truncate the file if permissible, else return
1031 * an error. Return the vp of the created or trunc'd file.
1032 *
1033 * IN: dvp - vnode of directory to put new file entry in.
1034 * name - name of new file entry.
1035 * vap - attributes of new file.
1036 * excl - flag indicating exclusive or non-exclusive mode.
1037 * mode - mode to open file with.
1038 * cr - credentials of caller.
1039 * flag - large file flag [UNUSED].
1040 * ct - caller context
1041 * vsecp - ACL to be set
1042 *
1043 * OUT: vpp - vnode of created or trunc'd entry.
1044 *
1045 * RETURN: 0 on success, error code on failure.
1046 *
1047 * Timestamps:
1048 * dvp - ctime|mtime updated if new entry created
1049 * vp - ctime|mtime always, atime if new
1050 */
1051
1052 /* ARGSUSED */
1053 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp)1054 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
1055 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
1056 {
1057 znode_t *zp;
1058 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1059 zilog_t *zilog;
1060 objset_t *os;
1061 dmu_tx_t *tx;
1062 int error;
1063 uid_t uid = crgetuid(cr);
1064 gid_t gid = crgetgid(cr);
1065 uint64_t projid = ZFS_DEFAULT_PROJID;
1066 zfs_acl_ids_t acl_ids;
1067 boolean_t fuid_dirtied;
1068 uint64_t txtype;
1069 #ifdef DEBUG_VFS_LOCKS
1070 vnode_t *dvp = ZTOV(dzp);
1071 #endif
1072
1073 /*
1074 * If we have an ephemeral id, ACL, or XVATTR then
1075 * make sure file system is at proper version
1076 */
1077 if (zfsvfs->z_use_fuids == B_FALSE &&
1078 (vsecp || (vap->va_mask & AT_XVATTR) ||
1079 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1080 return (SET_ERROR(EINVAL));
1081
1082 ZFS_ENTER(zfsvfs);
1083 ZFS_VERIFY_ZP(dzp);
1084 os = zfsvfs->z_os;
1085 zilog = zfsvfs->z_log;
1086
1087 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1088 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1089 ZFS_EXIT(zfsvfs);
1090 return (SET_ERROR(EILSEQ));
1091 }
1092
1093 if (vap->va_mask & AT_XVATTR) {
1094 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1095 crgetuid(cr), cr, vap->va_type)) != 0) {
1096 ZFS_EXIT(zfsvfs);
1097 return (error);
1098 }
1099 }
1100
1101 *zpp = NULL;
1102
1103 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1104 vap->va_mode &= ~S_ISVTX;
1105
1106 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1107 if (error) {
1108 ZFS_EXIT(zfsvfs);
1109 return (error);
1110 }
1111 ASSERT3P(zp, ==, NULL);
1112
1113 /*
1114 * Create a new file object and update the directory
1115 * to reference it.
1116 */
1117 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
1118 goto out;
1119 }
1120
1121 /*
1122 * We only support the creation of regular files in
1123 * extended attribute directories.
1124 */
1125
1126 if ((dzp->z_pflags & ZFS_XATTR) &&
1127 (vap->va_type != VREG)) {
1128 error = SET_ERROR(EINVAL);
1129 goto out;
1130 }
1131
1132 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1133 cr, vsecp, &acl_ids)) != 0)
1134 goto out;
1135
1136 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1137 projid = zfs_inherit_projid(dzp);
1138 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1139 zfs_acl_ids_free(&acl_ids);
1140 error = SET_ERROR(EDQUOT);
1141 goto out;
1142 }
1143
1144 getnewvnode_reserve_();
1145
1146 tx = dmu_tx_create(os);
1147
1148 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1149 ZFS_SA_BASE_ATTR_SIZE);
1150
1151 fuid_dirtied = zfsvfs->z_fuid_dirty;
1152 if (fuid_dirtied)
1153 zfs_fuid_txhold(zfsvfs, tx);
1154 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1155 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1156 if (!zfsvfs->z_use_sa &&
1157 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1158 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1159 0, acl_ids.z_aclp->z_acl_bytes);
1160 }
1161 error = dmu_tx_assign(tx, TXG_WAIT);
1162 if (error) {
1163 zfs_acl_ids_free(&acl_ids);
1164 dmu_tx_abort(tx);
1165 getnewvnode_drop_reserve();
1166 ZFS_EXIT(zfsvfs);
1167 return (error);
1168 }
1169 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1170 if (fuid_dirtied)
1171 zfs_fuid_sync(zfsvfs, tx);
1172
1173 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
1174 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1175 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1176 vsecp, acl_ids.z_fuidp, vap);
1177 zfs_acl_ids_free(&acl_ids);
1178 dmu_tx_commit(tx);
1179
1180 getnewvnode_drop_reserve();
1181
1182 out:
1183 VNCHECKREF(dvp);
1184 if (error == 0) {
1185 *zpp = zp;
1186 }
1187
1188 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1189 zil_commit(zilog, 0);
1190
1191 ZFS_EXIT(zfsvfs);
1192 return (error);
1193 }
1194
1195 /*
1196 * Remove an entry from a directory.
1197 *
1198 * IN: dvp - vnode of directory to remove entry from.
1199 * name - name of entry to remove.
1200 * cr - credentials of caller.
1201 * ct - caller context
1202 * flags - case flags
1203 *
1204 * RETURN: 0 on success, error code on failure.
1205 *
1206 * Timestamps:
1207 * dvp - ctime|mtime
1208 * vp - ctime (if nlink > 0)
1209 */
1210
1211 /*ARGSUSED*/
1212 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1213 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1214 {
1215 znode_t *dzp = VTOZ(dvp);
1216 znode_t *zp;
1217 znode_t *xzp;
1218 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1219 zilog_t *zilog;
1220 uint64_t xattr_obj;
1221 uint64_t obj = 0;
1222 dmu_tx_t *tx;
1223 boolean_t unlinked;
1224 uint64_t txtype;
1225 int error;
1226
1227
1228 ZFS_ENTER(zfsvfs);
1229 ZFS_VERIFY_ZP(dzp);
1230 zp = VTOZ(vp);
1231 ZFS_VERIFY_ZP(zp);
1232 zilog = zfsvfs->z_log;
1233
1234 xattr_obj = 0;
1235 xzp = NULL;
1236
1237 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1238 goto out;
1239 }
1240
1241 /*
1242 * Need to use rmdir for removing directories.
1243 */
1244 if (vp->v_type == VDIR) {
1245 error = SET_ERROR(EPERM);
1246 goto out;
1247 }
1248
1249 vnevent_remove(vp, dvp, name, ct);
1250
1251 obj = zp->z_id;
1252
1253 /* are there any extended attributes? */
1254 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1255 &xattr_obj, sizeof (xattr_obj));
1256 if (error == 0 && xattr_obj) {
1257 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1258 ASSERT0(error);
1259 }
1260
1261 /*
1262 * We may delete the znode now, or we may put it in the unlinked set;
1263 * it depends on whether we're the last link, and on whether there are
1264 * other holds on the vnode. So we dmu_tx_hold() the right things to
1265 * allow for either case.
1266 */
1267 tx = dmu_tx_create(zfsvfs->z_os);
1268 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1269 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1270 zfs_sa_upgrade_txholds(tx, zp);
1271 zfs_sa_upgrade_txholds(tx, dzp);
1272
1273 if (xzp) {
1274 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1275 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1276 }
1277
1278 /* charge as an update -- would be nice not to charge at all */
1279 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1280
1281 /*
1282 * Mark this transaction as typically resulting in a net free of space
1283 */
1284 dmu_tx_mark_netfree(tx);
1285
1286 error = dmu_tx_assign(tx, TXG_WAIT);
1287 if (error) {
1288 dmu_tx_abort(tx);
1289 ZFS_EXIT(zfsvfs);
1290 return (error);
1291 }
1292
1293 /*
1294 * Remove the directory entry.
1295 */
1296 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1297
1298 if (error) {
1299 dmu_tx_commit(tx);
1300 goto out;
1301 }
1302
1303 if (unlinked) {
1304 zfs_unlinked_add(zp, tx);
1305 vp->v_vflag |= VV_NOSYNC;
1306 }
1307 /* XXX check changes to linux vnops */
1308 txtype = TX_REMOVE;
1309 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1310
1311 dmu_tx_commit(tx);
1312 out:
1313
1314 if (xzp)
1315 vrele(ZTOV(xzp));
1316
1317 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1318 zil_commit(zilog, 0);
1319
1320
1321 ZFS_EXIT(zfsvfs);
1322 return (error);
1323 }
1324
1325
1326 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1327 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1328 struct componentname *cnp, int nameiop)
1329 {
1330 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1331 int error;
1332
1333 cnp->cn_nameptr = __DECONST(char *, name);
1334 cnp->cn_namelen = strlen(name);
1335 cnp->cn_nameiop = nameiop;
1336 cnp->cn_flags = ISLASTCN | SAVENAME;
1337 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1338 cnp->cn_cred = kcred;
1339 #if __FreeBSD_version < 1400037
1340 cnp->cn_thread = curthread;
1341 #endif
1342
1343 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1344 struct vop_lookup_args a;
1345
1346 a.a_gen.a_desc = &vop_lookup_desc;
1347 a.a_dvp = ZTOV(dzp);
1348 a.a_vpp = vpp;
1349 a.a_cnp = cnp;
1350 error = vfs_cache_lookup(&a);
1351 } else {
1352 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1353 B_FALSE);
1354 }
1355 #ifdef ZFS_DEBUG
1356 if (error) {
1357 printf("got error %d on name %s on op %d\n", error, name,
1358 nameiop);
1359 kdb_backtrace();
1360 }
1361 #endif
1362 return (error);
1363 }
1364
1365 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1366 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1367 {
1368 vnode_t *vp;
1369 int error;
1370 struct componentname cn;
1371
1372 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1373 return (error);
1374
1375 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1376 vput(vp);
1377 return (error);
1378 }
1379 /*
1380 * Create a new directory and insert it into dvp using the name
1381 * provided. Return a pointer to the inserted directory.
1382 *
1383 * IN: dvp - vnode of directory to add subdir to.
1384 * dirname - name of new directory.
1385 * vap - attributes of new directory.
1386 * cr - credentials of caller.
1387 * ct - caller context
1388 * flags - case flags
1389 * vsecp - ACL to be set
1390 *
1391 * OUT: vpp - vnode of created directory.
1392 *
1393 * RETURN: 0 on success, error code on failure.
1394 *
1395 * Timestamps:
1396 * dvp - ctime|mtime updated
1397 * vp - ctime|mtime|atime updated
1398 */
1399 /*ARGSUSED*/
1400 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp)1401 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1402 cred_t *cr, int flags, vsecattr_t *vsecp)
1403 {
1404 znode_t *zp;
1405 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1406 zilog_t *zilog;
1407 uint64_t txtype;
1408 dmu_tx_t *tx;
1409 int error;
1410 uid_t uid = crgetuid(cr);
1411 gid_t gid = crgetgid(cr);
1412 zfs_acl_ids_t acl_ids;
1413 boolean_t fuid_dirtied;
1414
1415 ASSERT3U(vap->va_type, ==, VDIR);
1416
1417 /*
1418 * If we have an ephemeral id, ACL, or XVATTR then
1419 * make sure file system is at proper version
1420 */
1421 if (zfsvfs->z_use_fuids == B_FALSE &&
1422 ((vap->va_mask & AT_XVATTR) ||
1423 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1424 return (SET_ERROR(EINVAL));
1425
1426 ZFS_ENTER(zfsvfs);
1427 ZFS_VERIFY_ZP(dzp);
1428 zilog = zfsvfs->z_log;
1429
1430 if (dzp->z_pflags & ZFS_XATTR) {
1431 ZFS_EXIT(zfsvfs);
1432 return (SET_ERROR(EINVAL));
1433 }
1434
1435 if (zfsvfs->z_utf8 && u8_validate(dirname,
1436 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1437 ZFS_EXIT(zfsvfs);
1438 return (SET_ERROR(EILSEQ));
1439 }
1440
1441 if (vap->va_mask & AT_XVATTR) {
1442 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1443 crgetuid(cr), cr, vap->va_type)) != 0) {
1444 ZFS_EXIT(zfsvfs);
1445 return (error);
1446 }
1447 }
1448
1449 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1450 NULL, &acl_ids)) != 0) {
1451 ZFS_EXIT(zfsvfs);
1452 return (error);
1453 }
1454
1455 /*
1456 * First make sure the new directory doesn't exist.
1457 *
1458 * Existence is checked first to make sure we don't return
1459 * EACCES instead of EEXIST which can cause some applications
1460 * to fail.
1461 */
1462 *zpp = NULL;
1463
1464 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1465 zfs_acl_ids_free(&acl_ids);
1466 ZFS_EXIT(zfsvfs);
1467 return (error);
1468 }
1469 ASSERT3P(zp, ==, NULL);
1470
1471 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
1472 zfs_acl_ids_free(&acl_ids);
1473 ZFS_EXIT(zfsvfs);
1474 return (error);
1475 }
1476
1477 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1478 zfs_acl_ids_free(&acl_ids);
1479 ZFS_EXIT(zfsvfs);
1480 return (SET_ERROR(EDQUOT));
1481 }
1482
1483 /*
1484 * Add a new entry to the directory.
1485 */
1486 getnewvnode_reserve_();
1487 tx = dmu_tx_create(zfsvfs->z_os);
1488 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1489 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1490 fuid_dirtied = zfsvfs->z_fuid_dirty;
1491 if (fuid_dirtied)
1492 zfs_fuid_txhold(zfsvfs, tx);
1493 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1494 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1495 acl_ids.z_aclp->z_acl_bytes);
1496 }
1497
1498 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1499 ZFS_SA_BASE_ATTR_SIZE);
1500
1501 error = dmu_tx_assign(tx, TXG_WAIT);
1502 if (error) {
1503 zfs_acl_ids_free(&acl_ids);
1504 dmu_tx_abort(tx);
1505 getnewvnode_drop_reserve();
1506 ZFS_EXIT(zfsvfs);
1507 return (error);
1508 }
1509
1510 /*
1511 * Create new node.
1512 */
1513 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1514
1515 if (fuid_dirtied)
1516 zfs_fuid_sync(zfsvfs, tx);
1517
1518 /*
1519 * Now put new name in parent dir.
1520 */
1521 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1522
1523 *zpp = zp;
1524
1525 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1526 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1527 acl_ids.z_fuidp, vap);
1528
1529 zfs_acl_ids_free(&acl_ids);
1530
1531 dmu_tx_commit(tx);
1532
1533 getnewvnode_drop_reserve();
1534
1535 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1536 zil_commit(zilog, 0);
1537
1538 ZFS_EXIT(zfsvfs);
1539 return (0);
1540 }
1541
1542 #if __FreeBSD_version < 1300124
1543 static void
cache_vop_rmdir(struct vnode * dvp,struct vnode * vp)1544 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
1545 {
1546
1547 cache_purge(dvp);
1548 cache_purge(vp);
1549 }
1550 #endif
1551
1552 /*
1553 * Remove a directory subdir entry. If the current working
1554 * directory is the same as the subdir to be removed, the
1555 * remove will fail.
1556 *
1557 * IN: dvp - vnode of directory to remove from.
1558 * name - name of directory to be removed.
1559 * cwd - vnode of current working directory.
1560 * cr - credentials of caller.
1561 * ct - caller context
1562 * flags - case flags
1563 *
1564 * RETURN: 0 on success, error code on failure.
1565 *
1566 * Timestamps:
1567 * dvp - ctime|mtime updated
1568 */
1569 /*ARGSUSED*/
1570 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1571 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1572 {
1573 znode_t *dzp = VTOZ(dvp);
1574 znode_t *zp = VTOZ(vp);
1575 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1576 zilog_t *zilog;
1577 dmu_tx_t *tx;
1578 int error;
1579
1580 ZFS_ENTER(zfsvfs);
1581 ZFS_VERIFY_ZP(dzp);
1582 ZFS_VERIFY_ZP(zp);
1583 zilog = zfsvfs->z_log;
1584
1585
1586 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1587 goto out;
1588 }
1589
1590 if (vp->v_type != VDIR) {
1591 error = SET_ERROR(ENOTDIR);
1592 goto out;
1593 }
1594
1595 vnevent_rmdir(vp, dvp, name, ct);
1596
1597 tx = dmu_tx_create(zfsvfs->z_os);
1598 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1599 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1600 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1601 zfs_sa_upgrade_txholds(tx, zp);
1602 zfs_sa_upgrade_txholds(tx, dzp);
1603 dmu_tx_mark_netfree(tx);
1604 error = dmu_tx_assign(tx, TXG_WAIT);
1605 if (error) {
1606 dmu_tx_abort(tx);
1607 ZFS_EXIT(zfsvfs);
1608 return (error);
1609 }
1610
1611 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1612
1613 if (error == 0) {
1614 uint64_t txtype = TX_RMDIR;
1615 zfs_log_remove(zilog, tx, txtype, dzp, name,
1616 ZFS_NO_OBJECT, B_FALSE);
1617 }
1618
1619 dmu_tx_commit(tx);
1620
1621 cache_vop_rmdir(dvp, vp);
1622 out:
1623 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1624 zil_commit(zilog, 0);
1625
1626 ZFS_EXIT(zfsvfs);
1627 return (error);
1628 }
1629
1630 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1631 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1632 {
1633 struct componentname cn;
1634 vnode_t *vp;
1635 int error;
1636
1637 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1638 return (error);
1639
1640 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1641 vput(vp);
1642 return (error);
1643 }
1644
1645 /*
1646 * Read as many directory entries as will fit into the provided
1647 * buffer from the given directory cursor position (specified in
1648 * the uio structure).
1649 *
1650 * IN: vp - vnode of directory to read.
1651 * uio - structure supplying read location, range info,
1652 * and return buffer.
1653 * cr - credentials of caller.
1654 * ct - caller context
1655 * flags - case flags
1656 *
1657 * OUT: uio - updated offset and range, buffer filled.
1658 * eofp - set to true if end-of-file detected.
1659 *
1660 * RETURN: 0 on success, error code on failure.
1661 *
1662 * Timestamps:
1663 * vp - atime updated
1664 *
1665 * Note that the low 4 bits of the cookie returned by zap is always zero.
1666 * This allows us to use the low range for "special" directory entries:
1667 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1668 * we use the offset 2 for the '.zfs' directory.
1669 */
1670 /* ARGSUSED */
1671 static int
zfs_readdir(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,int * eofp,int * ncookies,cookie_t ** cookies)1672 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1673 int *ncookies, cookie_t **cookies)
1674 {
1675 znode_t *zp = VTOZ(vp);
1676 iovec_t *iovp;
1677 edirent_t *eodp;
1678 dirent64_t *odp;
1679 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1680 objset_t *os;
1681 caddr_t outbuf;
1682 size_t bufsize;
1683 zap_cursor_t zc;
1684 zap_attribute_t zap;
1685 uint_t bytes_wanted;
1686 uint64_t offset; /* must be unsigned; checks for < 1 */
1687 uint64_t parent;
1688 int local_eof;
1689 int outcount;
1690 int error;
1691 uint8_t prefetch;
1692 boolean_t check_sysattrs;
1693 uint8_t type;
1694 int ncooks;
1695 cookie_t *cooks = NULL;
1696 int flags = 0;
1697
1698 ZFS_ENTER(zfsvfs);
1699 ZFS_VERIFY_ZP(zp);
1700
1701 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1702 &parent, sizeof (parent))) != 0) {
1703 ZFS_EXIT(zfsvfs);
1704 return (error);
1705 }
1706
1707 /*
1708 * If we are not given an eof variable,
1709 * use a local one.
1710 */
1711 if (eofp == NULL)
1712 eofp = &local_eof;
1713
1714 /*
1715 * Check for valid iov_len.
1716 */
1717 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1718 ZFS_EXIT(zfsvfs);
1719 return (SET_ERROR(EINVAL));
1720 }
1721
1722 /*
1723 * Quit if directory has been removed (posix)
1724 */
1725 if ((*eofp = zp->z_unlinked) != 0) {
1726 ZFS_EXIT(zfsvfs);
1727 return (0);
1728 }
1729
1730 error = 0;
1731 os = zfsvfs->z_os;
1732 offset = zfs_uio_offset(uio);
1733 prefetch = zp->z_zn_prefetch;
1734
1735 /*
1736 * Initialize the iterator cursor.
1737 */
1738 if (offset <= 3) {
1739 /*
1740 * Start iteration from the beginning of the directory.
1741 */
1742 zap_cursor_init(&zc, os, zp->z_id);
1743 } else {
1744 /*
1745 * The offset is a serialized cursor.
1746 */
1747 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1748 }
1749
1750 /*
1751 * Get space to change directory entries into fs independent format.
1752 */
1753 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1754 bytes_wanted = iovp->iov_len;
1755 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1756 bufsize = bytes_wanted;
1757 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1758 odp = (struct dirent64 *)outbuf;
1759 } else {
1760 bufsize = bytes_wanted;
1761 outbuf = NULL;
1762 odp = (struct dirent64 *)iovp->iov_base;
1763 }
1764 eodp = (struct edirent *)odp;
1765
1766 if (ncookies != NULL) {
1767 /*
1768 * Minimum entry size is dirent size and 1 byte for a file name.
1769 */
1770 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1771 sizeof (((struct dirent *)NULL)->d_name) + 1);
1772 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1773 *cookies = cooks;
1774 *ncookies = ncooks;
1775 }
1776 /*
1777 * If this VFS supports the system attribute view interface; and
1778 * we're looking at an extended attribute directory; and we care
1779 * about normalization conflicts on this vfs; then we must check
1780 * for normalization conflicts with the sysattr name space.
1781 */
1782 #ifdef TODO
1783 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
1784 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
1785 (flags & V_RDDIR_ENTFLAGS);
1786 #else
1787 check_sysattrs = 0;
1788 #endif
1789
1790 /*
1791 * Transform to file-system independent format
1792 */
1793 outcount = 0;
1794 while (outcount < bytes_wanted) {
1795 ino64_t objnum;
1796 ushort_t reclen;
1797 off64_t *next = NULL;
1798
1799 /*
1800 * Special case `.', `..', and `.zfs'.
1801 */
1802 if (offset == 0) {
1803 (void) strcpy(zap.za_name, ".");
1804 zap.za_normalization_conflict = 0;
1805 objnum = zp->z_id;
1806 type = DT_DIR;
1807 } else if (offset == 1) {
1808 (void) strcpy(zap.za_name, "..");
1809 zap.za_normalization_conflict = 0;
1810 objnum = parent;
1811 type = DT_DIR;
1812 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1813 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1814 zap.za_normalization_conflict = 0;
1815 objnum = ZFSCTL_INO_ROOT;
1816 type = DT_DIR;
1817 } else {
1818 /*
1819 * Grab next entry.
1820 */
1821 if ((error = zap_cursor_retrieve(&zc, &zap))) {
1822 if ((*eofp = (error == ENOENT)) != 0)
1823 break;
1824 else
1825 goto update;
1826 }
1827
1828 if (zap.za_integer_length != 8 ||
1829 zap.za_num_integers != 1) {
1830 cmn_err(CE_WARN, "zap_readdir: bad directory "
1831 "entry, obj = %lld, offset = %lld\n",
1832 (u_longlong_t)zp->z_id,
1833 (u_longlong_t)offset);
1834 error = SET_ERROR(ENXIO);
1835 goto update;
1836 }
1837
1838 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1839 /*
1840 * MacOS X can extract the object type here such as:
1841 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1842 */
1843 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1844
1845 if (check_sysattrs && !zap.za_normalization_conflict) {
1846 #ifdef TODO
1847 zap.za_normalization_conflict =
1848 xattr_sysattr_casechk(zap.za_name);
1849 #else
1850 panic("%s:%u: TODO", __func__, __LINE__);
1851 #endif
1852 }
1853 }
1854
1855 if (flags & V_RDDIR_ACCFILTER) {
1856 /*
1857 * If we have no access at all, don't include
1858 * this entry in the returned information
1859 */
1860 znode_t *ezp;
1861 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
1862 goto skip_entry;
1863 if (!zfs_has_access(ezp, cr)) {
1864 vrele(ZTOV(ezp));
1865 goto skip_entry;
1866 }
1867 vrele(ZTOV(ezp));
1868 }
1869
1870 if (flags & V_RDDIR_ENTFLAGS)
1871 reclen = EDIRENT_RECLEN(strlen(zap.za_name));
1872 else
1873 reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1874
1875 /*
1876 * Will this entry fit in the buffer?
1877 */
1878 if (outcount + reclen > bufsize) {
1879 /*
1880 * Did we manage to fit anything in the buffer?
1881 */
1882 if (!outcount) {
1883 error = SET_ERROR(EINVAL);
1884 goto update;
1885 }
1886 break;
1887 }
1888 if (flags & V_RDDIR_ENTFLAGS) {
1889 /*
1890 * Add extended flag entry:
1891 */
1892 eodp->ed_ino = objnum;
1893 eodp->ed_reclen = reclen;
1894 /* NOTE: ed_off is the offset for the *next* entry */
1895 next = &(eodp->ed_off);
1896 eodp->ed_eflags = zap.za_normalization_conflict ?
1897 ED_CASE_CONFLICT : 0;
1898 (void) strncpy(eodp->ed_name, zap.za_name,
1899 EDIRENT_NAMELEN(reclen));
1900 eodp = (edirent_t *)((intptr_t)eodp + reclen);
1901 } else {
1902 /*
1903 * Add normal entry:
1904 */
1905 odp->d_ino = objnum;
1906 odp->d_reclen = reclen;
1907 odp->d_namlen = strlen(zap.za_name);
1908 /* NOTE: d_off is the offset for the *next* entry. */
1909 next = &odp->d_off;
1910 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1911 odp->d_type = type;
1912 dirent_terminate(odp);
1913 odp = (dirent64_t *)((intptr_t)odp + reclen);
1914 }
1915 outcount += reclen;
1916
1917 ASSERT3S(outcount, <=, bufsize);
1918
1919 /* Prefetch znode */
1920 if (prefetch)
1921 dmu_prefetch(os, objnum, 0, 0, 0,
1922 ZIO_PRIORITY_SYNC_READ);
1923
1924 skip_entry:
1925 /*
1926 * Move to the next entry, fill in the previous offset.
1927 */
1928 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1929 zap_cursor_advance(&zc);
1930 offset = zap_cursor_serialize(&zc);
1931 } else {
1932 offset += 1;
1933 }
1934
1935 /* Fill the offset right after advancing the cursor. */
1936 if (next != NULL)
1937 *next = offset;
1938 if (cooks != NULL) {
1939 *cooks++ = offset;
1940 ncooks--;
1941 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1942 }
1943 }
1944 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1945
1946 /* Subtract unused cookies */
1947 if (ncookies != NULL)
1948 *ncookies -= ncooks;
1949
1950 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1951 iovp->iov_base += outcount;
1952 iovp->iov_len -= outcount;
1953 zfs_uio_resid(uio) -= outcount;
1954 } else if ((error =
1955 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1956 /*
1957 * Reset the pointer.
1958 */
1959 offset = zfs_uio_offset(uio);
1960 }
1961
1962 update:
1963 zap_cursor_fini(&zc);
1964 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1965 kmem_free(outbuf, bufsize);
1966
1967 if (error == ENOENT)
1968 error = 0;
1969
1970 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1971
1972 zfs_uio_setoffset(uio, offset);
1973 ZFS_EXIT(zfsvfs);
1974 if (error != 0 && cookies != NULL) {
1975 free(*cookies, M_TEMP);
1976 *cookies = NULL;
1977 *ncookies = 0;
1978 }
1979 return (error);
1980 }
1981
1982 /*
1983 * Get the requested file attributes and place them in the provided
1984 * vattr structure.
1985 *
1986 * IN: vp - vnode of file.
1987 * vap - va_mask identifies requested attributes.
1988 * If AT_XVATTR set, then optional attrs are requested
1989 * flags - ATTR_NOACLCHECK (CIFS server context)
1990 * cr - credentials of caller.
1991 *
1992 * OUT: vap - attribute values.
1993 *
1994 * RETURN: 0 (always succeeds).
1995 */
1996 /* ARGSUSED */
1997 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)1998 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1999 {
2000 znode_t *zp = VTOZ(vp);
2001 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2002 int error = 0;
2003 uint32_t blksize;
2004 u_longlong_t nblocks;
2005 uint64_t mtime[2], ctime[2], crtime[2], rdev;
2006 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2007 xoptattr_t *xoap = NULL;
2008 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2009 sa_bulk_attr_t bulk[4];
2010 int count = 0;
2011
2012 ZFS_ENTER(zfsvfs);
2013 ZFS_VERIFY_ZP(zp);
2014
2015 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
2016
2017 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2018 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
2019 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
2020 if (vp->v_type == VBLK || vp->v_type == VCHR)
2021 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
2022 &rdev, 8);
2023
2024 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2025 ZFS_EXIT(zfsvfs);
2026 return (error);
2027 }
2028
2029 /*
2030 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2031 * Also, if we are the owner don't bother, since owner should
2032 * always be allowed to read basic attributes of file.
2033 */
2034 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2035 (vap->va_uid != crgetuid(cr))) {
2036 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2037 skipaclchk, cr))) {
2038 ZFS_EXIT(zfsvfs);
2039 return (error);
2040 }
2041 }
2042
2043 /*
2044 * Return all attributes. It's cheaper to provide the answer
2045 * than to determine whether we were asked the question.
2046 */
2047
2048 vap->va_type = IFTOVT(zp->z_mode);
2049 vap->va_mode = zp->z_mode & ~S_IFMT;
2050 vn_fsid(vp, vap);
2051 vap->va_nodeid = zp->z_id;
2052 vap->va_nlink = zp->z_links;
2053 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
2054 zp->z_links < ZFS_LINK_MAX)
2055 vap->va_nlink++;
2056 vap->va_size = zp->z_size;
2057 if (vp->v_type == VBLK || vp->v_type == VCHR)
2058 vap->va_rdev = zfs_cmpldev(rdev);
2059 vap->va_gen = zp->z_gen;
2060 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
2061 vap->va_filerev = zp->z_seq;
2062
2063 /*
2064 * Add in any requested optional attributes and the create time.
2065 * Also set the corresponding bits in the returned attribute bitmap.
2066 */
2067 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2068 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2069 xoap->xoa_archive =
2070 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2071 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2072 }
2073
2074 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2075 xoap->xoa_readonly =
2076 ((zp->z_pflags & ZFS_READONLY) != 0);
2077 XVA_SET_RTN(xvap, XAT_READONLY);
2078 }
2079
2080 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2081 xoap->xoa_system =
2082 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2083 XVA_SET_RTN(xvap, XAT_SYSTEM);
2084 }
2085
2086 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2087 xoap->xoa_hidden =
2088 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2089 XVA_SET_RTN(xvap, XAT_HIDDEN);
2090 }
2091
2092 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2093 xoap->xoa_nounlink =
2094 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2095 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2096 }
2097
2098 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2099 xoap->xoa_immutable =
2100 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2101 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2102 }
2103
2104 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2105 xoap->xoa_appendonly =
2106 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2107 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2108 }
2109
2110 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2111 xoap->xoa_nodump =
2112 ((zp->z_pflags & ZFS_NODUMP) != 0);
2113 XVA_SET_RTN(xvap, XAT_NODUMP);
2114 }
2115
2116 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2117 xoap->xoa_opaque =
2118 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2119 XVA_SET_RTN(xvap, XAT_OPAQUE);
2120 }
2121
2122 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2123 xoap->xoa_av_quarantined =
2124 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2125 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2126 }
2127
2128 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2129 xoap->xoa_av_modified =
2130 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2131 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2132 }
2133
2134 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2135 vp->v_type == VREG) {
2136 zfs_sa_get_scanstamp(zp, xvap);
2137 }
2138
2139 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2140 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2141 XVA_SET_RTN(xvap, XAT_REPARSE);
2142 }
2143 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2144 xoap->xoa_generation = zp->z_gen;
2145 XVA_SET_RTN(xvap, XAT_GEN);
2146 }
2147
2148 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2149 xoap->xoa_offline =
2150 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2151 XVA_SET_RTN(xvap, XAT_OFFLINE);
2152 }
2153
2154 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2155 xoap->xoa_sparse =
2156 ((zp->z_pflags & ZFS_SPARSE) != 0);
2157 XVA_SET_RTN(xvap, XAT_SPARSE);
2158 }
2159
2160 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2161 xoap->xoa_projinherit =
2162 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2163 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2164 }
2165
2166 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2167 xoap->xoa_projid = zp->z_projid;
2168 XVA_SET_RTN(xvap, XAT_PROJID);
2169 }
2170 }
2171
2172 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2173 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2174 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2175 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2176
2177
2178 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2179 vap->va_blksize = blksize;
2180 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2181
2182 if (zp->z_blksz == 0) {
2183 /*
2184 * Block size hasn't been set; suggest maximal I/O transfers.
2185 */
2186 vap->va_blksize = zfsvfs->z_max_blksz;
2187 }
2188
2189 ZFS_EXIT(zfsvfs);
2190 return (0);
2191 }
2192
2193 /*
2194 * Set the file attributes to the values contained in the
2195 * vattr structure.
2196 *
2197 * IN: zp - znode of file to be modified.
2198 * vap - new attribute values.
2199 * If AT_XVATTR set, then optional attrs are being set
2200 * flags - ATTR_UTIME set if non-default time values provided.
2201 * - ATTR_NOACLCHECK (CIFS context only).
2202 * cr - credentials of caller.
2203 * ct - caller context
2204 *
2205 * RETURN: 0 on success, error code on failure.
2206 *
2207 * Timestamps:
2208 * vp - ctime updated, mtime updated if size changed.
2209 */
2210 /* ARGSUSED */
2211 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr)2212 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
2213 {
2214 vnode_t *vp = ZTOV(zp);
2215 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2216 objset_t *os;
2217 zilog_t *zilog;
2218 dmu_tx_t *tx;
2219 vattr_t oldva;
2220 xvattr_t tmpxvattr;
2221 uint_t mask = vap->va_mask;
2222 uint_t saved_mask = 0;
2223 uint64_t saved_mode;
2224 int trim_mask = 0;
2225 uint64_t new_mode;
2226 uint64_t new_uid, new_gid;
2227 uint64_t xattr_obj;
2228 uint64_t mtime[2], ctime[2];
2229 uint64_t projid = ZFS_INVALID_PROJID;
2230 znode_t *attrzp;
2231 int need_policy = FALSE;
2232 int err, err2;
2233 zfs_fuid_info_t *fuidp = NULL;
2234 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2235 xoptattr_t *xoap;
2236 zfs_acl_t *aclp;
2237 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2238 boolean_t fuid_dirtied = B_FALSE;
2239 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2240 int count = 0, xattr_count = 0;
2241
2242 if (mask == 0)
2243 return (0);
2244
2245 if (mask & AT_NOSET)
2246 return (SET_ERROR(EINVAL));
2247
2248 ZFS_ENTER(zfsvfs);
2249 ZFS_VERIFY_ZP(zp);
2250
2251 os = zfsvfs->z_os;
2252 zilog = zfsvfs->z_log;
2253
2254 /*
2255 * Make sure that if we have ephemeral uid/gid or xvattr specified
2256 * that file system is at proper version level
2257 */
2258
2259 if (zfsvfs->z_use_fuids == B_FALSE &&
2260 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2261 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2262 (mask & AT_XVATTR))) {
2263 ZFS_EXIT(zfsvfs);
2264 return (SET_ERROR(EINVAL));
2265 }
2266
2267 if (mask & AT_SIZE && vp->v_type == VDIR) {
2268 ZFS_EXIT(zfsvfs);
2269 return (SET_ERROR(EISDIR));
2270 }
2271
2272 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2273 ZFS_EXIT(zfsvfs);
2274 return (SET_ERROR(EINVAL));
2275 }
2276
2277 /*
2278 * If this is an xvattr_t, then get a pointer to the structure of
2279 * optional attributes. If this is NULL, then we have a vattr_t.
2280 */
2281 xoap = xva_getxoptattr(xvap);
2282
2283 xva_init(&tmpxvattr);
2284
2285 /*
2286 * Immutable files can only alter immutable bit and atime
2287 */
2288 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2289 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2290 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2291 ZFS_EXIT(zfsvfs);
2292 return (SET_ERROR(EPERM));
2293 }
2294
2295 /*
2296 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2297 */
2298
2299 /*
2300 * Verify timestamps doesn't overflow 32 bits.
2301 * ZFS can handle large timestamps, but 32bit syscalls can't
2302 * handle times greater than 2039. This check should be removed
2303 * once large timestamps are fully supported.
2304 */
2305 if (mask & (AT_ATIME | AT_MTIME)) {
2306 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2307 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2308 ZFS_EXIT(zfsvfs);
2309 return (SET_ERROR(EOVERFLOW));
2310 }
2311 }
2312 if (xoap != NULL && (mask & AT_XVATTR)) {
2313 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2314 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2315 ZFS_EXIT(zfsvfs);
2316 return (SET_ERROR(EOVERFLOW));
2317 }
2318
2319 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2320 if (!dmu_objset_projectquota_enabled(os) ||
2321 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2322 ZFS_EXIT(zfsvfs);
2323 return (SET_ERROR(EOPNOTSUPP));
2324 }
2325
2326 projid = xoap->xoa_projid;
2327 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2328 ZFS_EXIT(zfsvfs);
2329 return (SET_ERROR(EINVAL));
2330 }
2331
2332 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2333 projid = ZFS_INVALID_PROJID;
2334 else
2335 need_policy = TRUE;
2336 }
2337
2338 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2339 (xoap->xoa_projinherit !=
2340 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2341 (!dmu_objset_projectquota_enabled(os) ||
2342 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2343 ZFS_EXIT(zfsvfs);
2344 return (SET_ERROR(EOPNOTSUPP));
2345 }
2346 }
2347
2348 attrzp = NULL;
2349 aclp = NULL;
2350
2351 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2352 ZFS_EXIT(zfsvfs);
2353 return (SET_ERROR(EROFS));
2354 }
2355
2356 /*
2357 * First validate permissions
2358 */
2359
2360 if (mask & AT_SIZE) {
2361 /*
2362 * XXX - Note, we are not providing any open
2363 * mode flags here (like FNDELAY), so we may
2364 * block if there are locks present... this
2365 * should be addressed in openat().
2366 */
2367 /* XXX - would it be OK to generate a log record here? */
2368 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2369 if (err) {
2370 ZFS_EXIT(zfsvfs);
2371 return (err);
2372 }
2373 }
2374
2375 if (mask & (AT_ATIME|AT_MTIME) ||
2376 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2377 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2378 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2379 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2380 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2381 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2382 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2383 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2384 skipaclchk, cr);
2385 }
2386
2387 if (mask & (AT_UID|AT_GID)) {
2388 int idmask = (mask & (AT_UID|AT_GID));
2389 int take_owner;
2390 int take_group;
2391
2392 /*
2393 * NOTE: even if a new mode is being set,
2394 * we may clear S_ISUID/S_ISGID bits.
2395 */
2396
2397 if (!(mask & AT_MODE))
2398 vap->va_mode = zp->z_mode;
2399
2400 /*
2401 * Take ownership or chgrp to group we are a member of
2402 */
2403
2404 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2405 take_group = (mask & AT_GID) &&
2406 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2407
2408 /*
2409 * If both AT_UID and AT_GID are set then take_owner and
2410 * take_group must both be set in order to allow taking
2411 * ownership.
2412 *
2413 * Otherwise, send the check through secpolicy_vnode_setattr()
2414 *
2415 */
2416
2417 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2418 ((idmask == AT_UID) && take_owner) ||
2419 ((idmask == AT_GID) && take_group)) {
2420 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2421 skipaclchk, cr) == 0) {
2422 /*
2423 * Remove setuid/setgid for non-privileged users
2424 */
2425 secpolicy_setid_clear(vap, vp, cr);
2426 trim_mask = (mask & (AT_UID|AT_GID));
2427 } else {
2428 need_policy = TRUE;
2429 }
2430 } else {
2431 need_policy = TRUE;
2432 }
2433 }
2434
2435 oldva.va_mode = zp->z_mode;
2436 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2437 if (mask & AT_XVATTR) {
2438 /*
2439 * Update xvattr mask to include only those attributes
2440 * that are actually changing.
2441 *
2442 * the bits will be restored prior to actually setting
2443 * the attributes so the caller thinks they were set.
2444 */
2445 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2446 if (xoap->xoa_appendonly !=
2447 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2448 need_policy = TRUE;
2449 } else {
2450 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2451 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2452 }
2453 }
2454
2455 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2456 if (xoap->xoa_projinherit !=
2457 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2458 need_policy = TRUE;
2459 } else {
2460 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2461 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2462 }
2463 }
2464
2465 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2466 if (xoap->xoa_nounlink !=
2467 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2468 need_policy = TRUE;
2469 } else {
2470 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2471 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2472 }
2473 }
2474
2475 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2476 if (xoap->xoa_immutable !=
2477 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2478 need_policy = TRUE;
2479 } else {
2480 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2481 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2482 }
2483 }
2484
2485 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2486 if (xoap->xoa_nodump !=
2487 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2488 need_policy = TRUE;
2489 } else {
2490 XVA_CLR_REQ(xvap, XAT_NODUMP);
2491 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2492 }
2493 }
2494
2495 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2496 if (xoap->xoa_av_modified !=
2497 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2498 need_policy = TRUE;
2499 } else {
2500 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2501 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2502 }
2503 }
2504
2505 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2506 if ((vp->v_type != VREG &&
2507 xoap->xoa_av_quarantined) ||
2508 xoap->xoa_av_quarantined !=
2509 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2510 need_policy = TRUE;
2511 } else {
2512 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2513 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2514 }
2515 }
2516
2517 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2518 ZFS_EXIT(zfsvfs);
2519 return (SET_ERROR(EPERM));
2520 }
2521
2522 if (need_policy == FALSE &&
2523 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2524 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2525 need_policy = TRUE;
2526 }
2527 }
2528
2529 if (mask & AT_MODE) {
2530 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
2531 err = secpolicy_setid_setsticky_clear(vp, vap,
2532 &oldva, cr);
2533 if (err) {
2534 ZFS_EXIT(zfsvfs);
2535 return (err);
2536 }
2537 trim_mask |= AT_MODE;
2538 } else {
2539 need_policy = TRUE;
2540 }
2541 }
2542
2543 if (need_policy) {
2544 /*
2545 * If trim_mask is set then take ownership
2546 * has been granted or write_acl is present and user
2547 * has the ability to modify mode. In that case remove
2548 * UID|GID and or MODE from mask so that
2549 * secpolicy_vnode_setattr() doesn't revoke it.
2550 */
2551
2552 if (trim_mask) {
2553 saved_mask = vap->va_mask;
2554 vap->va_mask &= ~trim_mask;
2555 if (trim_mask & AT_MODE) {
2556 /*
2557 * Save the mode, as secpolicy_vnode_setattr()
2558 * will overwrite it with ova.va_mode.
2559 */
2560 saved_mode = vap->va_mode;
2561 }
2562 }
2563 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2564 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2565 if (err) {
2566 ZFS_EXIT(zfsvfs);
2567 return (err);
2568 }
2569
2570 if (trim_mask) {
2571 vap->va_mask |= saved_mask;
2572 if (trim_mask & AT_MODE) {
2573 /*
2574 * Recover the mode after
2575 * secpolicy_vnode_setattr().
2576 */
2577 vap->va_mode = saved_mode;
2578 }
2579 }
2580 }
2581
2582 /*
2583 * secpolicy_vnode_setattr, or take ownership may have
2584 * changed va_mask
2585 */
2586 mask = vap->va_mask;
2587
2588 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2589 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2590 &xattr_obj, sizeof (xattr_obj));
2591
2592 if (err == 0 && xattr_obj) {
2593 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2594 if (err == 0) {
2595 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2596 if (err != 0)
2597 vrele(ZTOV(attrzp));
2598 }
2599 if (err)
2600 goto out2;
2601 }
2602 if (mask & AT_UID) {
2603 new_uid = zfs_fuid_create(zfsvfs,
2604 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2605 if (new_uid != zp->z_uid &&
2606 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2607 new_uid)) {
2608 if (attrzp)
2609 vput(ZTOV(attrzp));
2610 err = SET_ERROR(EDQUOT);
2611 goto out2;
2612 }
2613 }
2614
2615 if (mask & AT_GID) {
2616 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2617 cr, ZFS_GROUP, &fuidp);
2618 if (new_gid != zp->z_gid &&
2619 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2620 new_gid)) {
2621 if (attrzp)
2622 vput(ZTOV(attrzp));
2623 err = SET_ERROR(EDQUOT);
2624 goto out2;
2625 }
2626 }
2627
2628 if (projid != ZFS_INVALID_PROJID &&
2629 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2630 if (attrzp)
2631 vput(ZTOV(attrzp));
2632 err = SET_ERROR(EDQUOT);
2633 goto out2;
2634 }
2635 }
2636 tx = dmu_tx_create(os);
2637
2638 if (mask & AT_MODE) {
2639 uint64_t pmode = zp->z_mode;
2640 uint64_t acl_obj;
2641 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2642
2643 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2644 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2645 err = SET_ERROR(EPERM);
2646 goto out;
2647 }
2648
2649 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2650 goto out;
2651
2652 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2653 /*
2654 * Are we upgrading ACL from old V0 format
2655 * to V1 format?
2656 */
2657 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2658 zfs_znode_acl_version(zp) ==
2659 ZFS_ACL_VERSION_INITIAL) {
2660 dmu_tx_hold_free(tx, acl_obj, 0,
2661 DMU_OBJECT_END);
2662 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2663 0, aclp->z_acl_bytes);
2664 } else {
2665 dmu_tx_hold_write(tx, acl_obj, 0,
2666 aclp->z_acl_bytes);
2667 }
2668 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2669 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2670 0, aclp->z_acl_bytes);
2671 }
2672 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2673 } else {
2674 if (((mask & AT_XVATTR) &&
2675 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2676 (projid != ZFS_INVALID_PROJID &&
2677 !(zp->z_pflags & ZFS_PROJID)))
2678 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2679 else
2680 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2681 }
2682
2683 if (attrzp) {
2684 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2685 }
2686
2687 fuid_dirtied = zfsvfs->z_fuid_dirty;
2688 if (fuid_dirtied)
2689 zfs_fuid_txhold(zfsvfs, tx);
2690
2691 zfs_sa_upgrade_txholds(tx, zp);
2692
2693 err = dmu_tx_assign(tx, TXG_WAIT);
2694 if (err)
2695 goto out;
2696
2697 count = 0;
2698 /*
2699 * Set each attribute requested.
2700 * We group settings according to the locks they need to acquire.
2701 *
2702 * Note: you cannot set ctime directly, although it will be
2703 * updated as a side-effect of calling this function.
2704 */
2705
2706 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2707 /*
2708 * For the existed object that is upgraded from old system,
2709 * its on-disk layout has no slot for the project ID attribute.
2710 * But quota accounting logic needs to access related slots by
2711 * offset directly. So we need to adjust old objects' layout
2712 * to make the project ID to some unified and fixed offset.
2713 */
2714 if (attrzp)
2715 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2716 if (err == 0)
2717 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2718
2719 if (unlikely(err == EEXIST))
2720 err = 0;
2721 else if (err != 0)
2722 goto out;
2723 else
2724 projid = ZFS_INVALID_PROJID;
2725 }
2726
2727 if (mask & (AT_UID|AT_GID|AT_MODE))
2728 mutex_enter(&zp->z_acl_lock);
2729
2730 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2731 &zp->z_pflags, sizeof (zp->z_pflags));
2732
2733 if (attrzp) {
2734 if (mask & (AT_UID|AT_GID|AT_MODE))
2735 mutex_enter(&attrzp->z_acl_lock);
2736 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2737 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2738 sizeof (attrzp->z_pflags));
2739 if (projid != ZFS_INVALID_PROJID) {
2740 attrzp->z_projid = projid;
2741 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2742 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2743 sizeof (attrzp->z_projid));
2744 }
2745 }
2746
2747 if (mask & (AT_UID|AT_GID)) {
2748
2749 if (mask & AT_UID) {
2750 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2751 &new_uid, sizeof (new_uid));
2752 zp->z_uid = new_uid;
2753 if (attrzp) {
2754 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2755 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2756 sizeof (new_uid));
2757 attrzp->z_uid = new_uid;
2758 }
2759 }
2760
2761 if (mask & AT_GID) {
2762 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2763 NULL, &new_gid, sizeof (new_gid));
2764 zp->z_gid = new_gid;
2765 if (attrzp) {
2766 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2767 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2768 sizeof (new_gid));
2769 attrzp->z_gid = new_gid;
2770 }
2771 }
2772 if (!(mask & AT_MODE)) {
2773 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2774 NULL, &new_mode, sizeof (new_mode));
2775 new_mode = zp->z_mode;
2776 }
2777 err = zfs_acl_chown_setattr(zp);
2778 ASSERT0(err);
2779 if (attrzp) {
2780 vn_seqc_write_begin(ZTOV(attrzp));
2781 err = zfs_acl_chown_setattr(attrzp);
2782 vn_seqc_write_end(ZTOV(attrzp));
2783 ASSERT0(err);
2784 }
2785 }
2786
2787 if (mask & AT_MODE) {
2788 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2789 &new_mode, sizeof (new_mode));
2790 zp->z_mode = new_mode;
2791 ASSERT3P(aclp, !=, NULL);
2792 err = zfs_aclset_common(zp, aclp, cr, tx);
2793 ASSERT0(err);
2794 if (zp->z_acl_cached)
2795 zfs_acl_free(zp->z_acl_cached);
2796 zp->z_acl_cached = aclp;
2797 aclp = NULL;
2798 }
2799
2800
2801 if (mask & AT_ATIME) {
2802 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2803 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2804 &zp->z_atime, sizeof (zp->z_atime));
2805 }
2806
2807 if (mask & AT_MTIME) {
2808 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2809 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2810 mtime, sizeof (mtime));
2811 }
2812
2813 if (projid != ZFS_INVALID_PROJID) {
2814 zp->z_projid = projid;
2815 SA_ADD_BULK_ATTR(bulk, count,
2816 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2817 sizeof (zp->z_projid));
2818 }
2819
2820 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2821 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2822 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2823 NULL, mtime, sizeof (mtime));
2824 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2825 &ctime, sizeof (ctime));
2826 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2827 } else if (mask != 0) {
2828 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2829 &ctime, sizeof (ctime));
2830 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2831 if (attrzp) {
2832 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2833 SA_ZPL_CTIME(zfsvfs), NULL,
2834 &ctime, sizeof (ctime));
2835 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2836 mtime, ctime);
2837 }
2838 }
2839
2840 /*
2841 * Do this after setting timestamps to prevent timestamp
2842 * update from toggling bit
2843 */
2844
2845 if (xoap && (mask & AT_XVATTR)) {
2846
2847 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2848 xoap->xoa_createtime = vap->va_birthtime;
2849 /*
2850 * restore trimmed off masks
2851 * so that return masks can be set for caller.
2852 */
2853
2854 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2855 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2856 }
2857 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2858 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2859 }
2860 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2861 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2862 }
2863 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2864 XVA_SET_REQ(xvap, XAT_NODUMP);
2865 }
2866 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2867 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2868 }
2869 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2870 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2871 }
2872 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2873 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2874 }
2875
2876 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2877 ASSERT3S(vp->v_type, ==, VREG);
2878
2879 zfs_xvattr_set(zp, xvap, tx);
2880 }
2881
2882 if (fuid_dirtied)
2883 zfs_fuid_sync(zfsvfs, tx);
2884
2885 if (mask != 0)
2886 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2887
2888 if (mask & (AT_UID|AT_GID|AT_MODE))
2889 mutex_exit(&zp->z_acl_lock);
2890
2891 if (attrzp) {
2892 if (mask & (AT_UID|AT_GID|AT_MODE))
2893 mutex_exit(&attrzp->z_acl_lock);
2894 }
2895 out:
2896 if (err == 0 && attrzp) {
2897 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2898 xattr_count, tx);
2899 ASSERT0(err2);
2900 }
2901
2902 if (attrzp)
2903 vput(ZTOV(attrzp));
2904
2905 if (aclp)
2906 zfs_acl_free(aclp);
2907
2908 if (fuidp) {
2909 zfs_fuid_info_free(fuidp);
2910 fuidp = NULL;
2911 }
2912
2913 if (err) {
2914 dmu_tx_abort(tx);
2915 } else {
2916 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2917 dmu_tx_commit(tx);
2918 }
2919
2920 out2:
2921 if (os->os_sync == ZFS_SYNC_ALWAYS)
2922 zil_commit(zilog, 0);
2923
2924 ZFS_EXIT(zfsvfs);
2925 return (err);
2926 }
2927
2928 /*
2929 * Look up the directory entries corresponding to the source and target
2930 * directory/name pairs.
2931 */
2932 static int
zfs_rename_relock_lookup(znode_t * sdzp,const struct componentname * scnp,znode_t ** szpp,znode_t * tdzp,const struct componentname * tcnp,znode_t ** tzpp)2933 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2934 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2935 znode_t **tzpp)
2936 {
2937 zfsvfs_t *zfsvfs;
2938 znode_t *szp, *tzp;
2939 int error;
2940
2941 /*
2942 * Before using sdzp and tdzp we must ensure that they are live.
2943 * As a porting legacy from illumos we have two things to worry
2944 * about. One is typical for FreeBSD and it is that the vnode is
2945 * not reclaimed (doomed). The other is that the znode is live.
2946 * The current code can invalidate the znode without acquiring the
2947 * corresponding vnode lock if the object represented by the znode
2948 * and vnode is no longer valid after a rollback or receive operation.
2949 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
2950 * that protects the znodes from the invalidation.
2951 */
2952 zfsvfs = sdzp->z_zfsvfs;
2953 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2954 ZFS_ENTER(zfsvfs);
2955 ZFS_VERIFY_ZP(sdzp);
2956 ZFS_VERIFY_ZP(tdzp);
2957
2958 /*
2959 * Re-resolve svp to be certain it still exists and fetch the
2960 * correct vnode.
2961 */
2962 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2963 if (error != 0) {
2964 /* Source entry invalid or not there. */
2965 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2966 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2967 error = SET_ERROR(EINVAL);
2968 goto out;
2969 }
2970 *szpp = szp;
2971
2972 /*
2973 * Re-resolve tvp, if it disappeared we just carry on.
2974 */
2975 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2976 if (error != 0) {
2977 vrele(ZTOV(szp));
2978 if ((tcnp->cn_flags & ISDOTDOT) != 0)
2979 error = SET_ERROR(EINVAL);
2980 goto out;
2981 }
2982 *tzpp = tzp;
2983 out:
2984 ZFS_EXIT(zfsvfs);
2985 return (error);
2986 }
2987
2988 /*
2989 * We acquire all but fdvp locks using non-blocking acquisitions. If we
2990 * fail to acquire any lock in the path we will drop all held locks,
2991 * acquire the new lock in a blocking fashion, and then release it and
2992 * restart the rename. This acquire/release step ensures that we do not
2993 * spin on a lock waiting for release. On error release all vnode locks
2994 * and decrement references the way tmpfs_rename() would do.
2995 */
2996 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)2997 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2998 struct vnode *tdvp, struct vnode **tvpp,
2999 const struct componentname *scnp, const struct componentname *tcnp)
3000 {
3001 struct vnode *nvp, *svp, *tvp;
3002 znode_t *sdzp, *tdzp, *szp, *tzp;
3003 int error;
3004
3005 VOP_UNLOCK1(tdvp);
3006 if (*tvpp != NULL && *tvpp != tdvp)
3007 VOP_UNLOCK1(*tvpp);
3008
3009 relock:
3010 error = vn_lock(sdvp, LK_EXCLUSIVE);
3011 if (error)
3012 goto out;
3013 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
3014 if (error != 0) {
3015 VOP_UNLOCK1(sdvp);
3016 if (error != EBUSY)
3017 goto out;
3018 error = vn_lock(tdvp, LK_EXCLUSIVE);
3019 if (error)
3020 goto out;
3021 VOP_UNLOCK1(tdvp);
3022 goto relock;
3023 }
3024 tdzp = VTOZ(tdvp);
3025 sdzp = VTOZ(sdvp);
3026
3027 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
3028 if (error != 0) {
3029 VOP_UNLOCK1(sdvp);
3030 VOP_UNLOCK1(tdvp);
3031 goto out;
3032 }
3033 svp = ZTOV(szp);
3034 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
3035
3036 /*
3037 * Now try acquire locks on svp and tvp.
3038 */
3039 nvp = svp;
3040 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3041 if (error != 0) {
3042 VOP_UNLOCK1(sdvp);
3043 VOP_UNLOCK1(tdvp);
3044 if (tvp != NULL)
3045 vrele(tvp);
3046 if (error != EBUSY) {
3047 vrele(nvp);
3048 goto out;
3049 }
3050 error = vn_lock(nvp, LK_EXCLUSIVE);
3051 if (error != 0) {
3052 vrele(nvp);
3053 goto out;
3054 }
3055 VOP_UNLOCK1(nvp);
3056 /*
3057 * Concurrent rename race.
3058 * XXX ?
3059 */
3060 if (nvp == tdvp) {
3061 vrele(nvp);
3062 error = SET_ERROR(EINVAL);
3063 goto out;
3064 }
3065 vrele(*svpp);
3066 *svpp = nvp;
3067 goto relock;
3068 }
3069 vrele(*svpp);
3070 *svpp = nvp;
3071
3072 if (*tvpp != NULL)
3073 vrele(*tvpp);
3074 *tvpp = NULL;
3075 if (tvp != NULL) {
3076 nvp = tvp;
3077 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3078 if (error != 0) {
3079 VOP_UNLOCK1(sdvp);
3080 VOP_UNLOCK1(tdvp);
3081 VOP_UNLOCK1(*svpp);
3082 if (error != EBUSY) {
3083 vrele(nvp);
3084 goto out;
3085 }
3086 error = vn_lock(nvp, LK_EXCLUSIVE);
3087 if (error != 0) {
3088 vrele(nvp);
3089 goto out;
3090 }
3091 vput(nvp);
3092 goto relock;
3093 }
3094 *tvpp = nvp;
3095 }
3096
3097 return (0);
3098
3099 out:
3100 return (error);
3101 }
3102
3103 /*
3104 * Note that we must use VRELE_ASYNC in this function as it walks
3105 * up the directory tree and vrele may need to acquire an exclusive
3106 * lock if a last reference to a vnode is dropped.
3107 */
3108 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)3109 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3110 {
3111 zfsvfs_t *zfsvfs;
3112 znode_t *zp, *zp1;
3113 uint64_t parent;
3114 int error;
3115
3116 zfsvfs = tdzp->z_zfsvfs;
3117 if (tdzp == szp)
3118 return (SET_ERROR(EINVAL));
3119 if (tdzp == sdzp)
3120 return (0);
3121 if (tdzp->z_id == zfsvfs->z_root)
3122 return (0);
3123 zp = tdzp;
3124 for (;;) {
3125 ASSERT(!zp->z_unlinked);
3126 if ((error = sa_lookup(zp->z_sa_hdl,
3127 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3128 break;
3129
3130 if (parent == szp->z_id) {
3131 error = SET_ERROR(EINVAL);
3132 break;
3133 }
3134 if (parent == zfsvfs->z_root)
3135 break;
3136 if (parent == sdzp->z_id)
3137 break;
3138
3139 error = zfs_zget(zfsvfs, parent, &zp1);
3140 if (error != 0)
3141 break;
3142
3143 if (zp != tdzp)
3144 VN_RELE_ASYNC(ZTOV(zp),
3145 dsl_pool_zrele_taskq(
3146 dmu_objset_pool(zfsvfs->z_os)));
3147 zp = zp1;
3148 }
3149
3150 if (error == ENOTDIR)
3151 panic("checkpath: .. not a directory\n");
3152 if (zp != tdzp)
3153 VN_RELE_ASYNC(ZTOV(zp),
3154 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3155 return (error);
3156 }
3157
3158 #if __FreeBSD_version < 1300124
3159 static void
cache_vop_rename(struct vnode * fdvp,struct vnode * fvp,struct vnode * tdvp,struct vnode * tvp,struct componentname * fcnp,struct componentname * tcnp)3160 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
3161 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
3162 {
3163
3164 cache_purge(fvp);
3165 if (tvp != NULL)
3166 cache_purge(tvp);
3167 cache_purge_negative(tdvp);
3168 }
3169 #endif
3170
3171 static int
3172 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3173 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3174 cred_t *cr);
3175
3176 /*
3177 * Move an entry from the provided source directory to the target
3178 * directory. Change the entry name as indicated.
3179 *
3180 * IN: sdvp - Source directory containing the "old entry".
3181 * scnp - Old entry name.
3182 * tdvp - Target directory to contain the "new entry".
3183 * tcnp - New entry name.
3184 * cr - credentials of caller.
3185 * INOUT: svpp - Source file
3186 * tvpp - Target file, may point to NULL initially
3187 *
3188 * RETURN: 0 on success, error code on failure.
3189 *
3190 * Timestamps:
3191 * sdvp,tdvp - ctime|mtime updated
3192 */
3193 /*ARGSUSED*/
3194 static int
zfs_do_rename(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3195 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3196 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3197 cred_t *cr)
3198 {
3199 int error;
3200
3201 ASSERT_VOP_ELOCKED(tdvp, __func__);
3202 if (*tvpp != NULL)
3203 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3204
3205 /* Reject renames across filesystems. */
3206 if ((*svpp)->v_mount != tdvp->v_mount ||
3207 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3208 error = SET_ERROR(EXDEV);
3209 goto out;
3210 }
3211
3212 if (zfsctl_is_node(tdvp)) {
3213 error = SET_ERROR(EXDEV);
3214 goto out;
3215 }
3216
3217 /*
3218 * Lock all four vnodes to ensure safety and semantics of renaming.
3219 */
3220 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3221 if (error != 0) {
3222 /* no vnodes are locked in the case of error here */
3223 return (error);
3224 }
3225
3226 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3227 VOP_UNLOCK1(sdvp);
3228 VOP_UNLOCK1(*svpp);
3229 out:
3230 if (*tvpp != NULL)
3231 VOP_UNLOCK1(*tvpp);
3232 if (tdvp != *tvpp)
3233 VOP_UNLOCK1(tdvp);
3234
3235 return (error);
3236 }
3237
3238 static int
zfs_do_rename_impl(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3239 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3240 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3241 cred_t *cr)
3242 {
3243 dmu_tx_t *tx;
3244 zfsvfs_t *zfsvfs;
3245 zilog_t *zilog;
3246 znode_t *tdzp, *sdzp, *tzp, *szp;
3247 const char *snm = scnp->cn_nameptr;
3248 const char *tnm = tcnp->cn_nameptr;
3249 int error;
3250
3251 tdzp = VTOZ(tdvp);
3252 sdzp = VTOZ(sdvp);
3253 zfsvfs = tdzp->z_zfsvfs;
3254
3255 ZFS_ENTER(zfsvfs);
3256 ZFS_VERIFY_ZP(tdzp);
3257 ZFS_VERIFY_ZP(sdzp);
3258 zilog = zfsvfs->z_log;
3259
3260 if (zfsvfs->z_utf8 && u8_validate(tnm,
3261 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3262 error = SET_ERROR(EILSEQ);
3263 goto out;
3264 }
3265
3266 /* If source and target are the same file, there is nothing to do. */
3267 if ((*svpp) == (*tvpp)) {
3268 error = 0;
3269 goto out;
3270 }
3271
3272 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3273 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3274 (*tvpp)->v_mountedhere != NULL)) {
3275 error = SET_ERROR(EXDEV);
3276 goto out;
3277 }
3278
3279 szp = VTOZ(*svpp);
3280 ZFS_VERIFY_ZP(szp);
3281 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3282 if (tzp != NULL)
3283 ZFS_VERIFY_ZP(tzp);
3284
3285 /*
3286 * This is to prevent the creation of links into attribute space
3287 * by renaming a linked file into/outof an attribute directory.
3288 * See the comment in zfs_link() for why this is considered bad.
3289 */
3290 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3291 error = SET_ERROR(EINVAL);
3292 goto out;
3293 }
3294
3295 /*
3296 * If we are using project inheritance, means if the directory has
3297 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3298 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3299 * such case, we only allow renames into our tree when the project
3300 * IDs are the same.
3301 */
3302 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3303 tdzp->z_projid != szp->z_projid) {
3304 error = SET_ERROR(EXDEV);
3305 goto out;
3306 }
3307
3308 /*
3309 * Must have write access at the source to remove the old entry
3310 * and write access at the target to create the new entry.
3311 * Note that if target and source are the same, this can be
3312 * done in a single check.
3313 */
3314 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
3315 goto out;
3316
3317 if ((*svpp)->v_type == VDIR) {
3318 /*
3319 * Avoid ".", "..", and aliases of "." for obvious reasons.
3320 */
3321 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3322 sdzp == szp ||
3323 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3324 error = EINVAL;
3325 goto out;
3326 }
3327
3328 /*
3329 * Check to make sure rename is valid.
3330 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3331 */
3332 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3333 goto out;
3334 }
3335
3336 /*
3337 * Does target exist?
3338 */
3339 if (tzp) {
3340 /*
3341 * Source and target must be the same type.
3342 */
3343 if ((*svpp)->v_type == VDIR) {
3344 if ((*tvpp)->v_type != VDIR) {
3345 error = SET_ERROR(ENOTDIR);
3346 goto out;
3347 } else {
3348 cache_purge(tdvp);
3349 if (sdvp != tdvp)
3350 cache_purge(sdvp);
3351 }
3352 } else {
3353 if ((*tvpp)->v_type == VDIR) {
3354 error = SET_ERROR(EISDIR);
3355 goto out;
3356 }
3357 }
3358 }
3359
3360 vn_seqc_write_begin(*svpp);
3361 vn_seqc_write_begin(sdvp);
3362 if (*tvpp != NULL)
3363 vn_seqc_write_begin(*tvpp);
3364 if (tdvp != *tvpp)
3365 vn_seqc_write_begin(tdvp);
3366
3367 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3368 if (tzp)
3369 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3370
3371 /*
3372 * notify the target directory if it is not the same
3373 * as source directory.
3374 */
3375 if (tdvp != sdvp) {
3376 vnevent_rename_dest_dir(tdvp, ct);
3377 }
3378
3379 tx = dmu_tx_create(zfsvfs->z_os);
3380 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3381 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3382 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3383 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3384 if (sdzp != tdzp) {
3385 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3386 zfs_sa_upgrade_txholds(tx, tdzp);
3387 }
3388 if (tzp) {
3389 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3390 zfs_sa_upgrade_txholds(tx, tzp);
3391 }
3392
3393 zfs_sa_upgrade_txholds(tx, szp);
3394 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3395 error = dmu_tx_assign(tx, TXG_WAIT);
3396 if (error) {
3397 dmu_tx_abort(tx);
3398 goto out_seq;
3399 }
3400
3401 if (tzp) /* Attempt to remove the existing target */
3402 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3403
3404 if (error == 0) {
3405 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3406 if (error == 0) {
3407 szp->z_pflags |= ZFS_AV_MODIFIED;
3408
3409 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3410 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3411 ASSERT0(error);
3412
3413 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3414 NULL);
3415 if (error == 0) {
3416 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3417 snm, tdzp, tnm, szp);
3418
3419 /*
3420 * Update path information for the target vnode
3421 */
3422 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
3423 } else {
3424 /*
3425 * At this point, we have successfully created
3426 * the target name, but have failed to remove
3427 * the source name. Since the create was done
3428 * with the ZRENAMING flag, there are
3429 * complications; for one, the link count is
3430 * wrong. The easiest way to deal with this
3431 * is to remove the newly created target, and
3432 * return the original error. This must
3433 * succeed; fortunately, it is very unlikely to
3434 * fail, since we just created it.
3435 */
3436 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3437 ZRENAMING, NULL));
3438 }
3439 }
3440 if (error == 0) {
3441 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3442 }
3443 }
3444
3445 dmu_tx_commit(tx);
3446
3447 out_seq:
3448 vn_seqc_write_end(*svpp);
3449 vn_seqc_write_end(sdvp);
3450 if (*tvpp != NULL)
3451 vn_seqc_write_end(*tvpp);
3452 if (tdvp != *tvpp)
3453 vn_seqc_write_end(tdvp);
3454
3455 out:
3456 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3457 zil_commit(zilog, 0);
3458 ZFS_EXIT(zfsvfs);
3459
3460 return (error);
3461 }
3462
3463 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags)3464 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3465 cred_t *cr, int flags)
3466 {
3467 struct componentname scn, tcn;
3468 vnode_t *sdvp, *tdvp;
3469 vnode_t *svp, *tvp;
3470 int error;
3471 svp = tvp = NULL;
3472
3473 sdvp = ZTOV(sdzp);
3474 tdvp = ZTOV(tdzp);
3475 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3476 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3477 VOP_UNLOCK1(sdvp);
3478 if (error != 0)
3479 goto fail;
3480 VOP_UNLOCK1(svp);
3481
3482 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3483 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3484 if (error == EJUSTRETURN)
3485 tvp = NULL;
3486 else if (error != 0) {
3487 VOP_UNLOCK1(tdvp);
3488 goto fail;
3489 }
3490
3491 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3492 fail:
3493 if (svp != NULL)
3494 vrele(svp);
3495 if (tvp != NULL)
3496 vrele(tvp);
3497
3498 return (error);
3499 }
3500
3501 /*
3502 * Insert the indicated symbolic reference entry into the directory.
3503 *
3504 * IN: dvp - Directory to contain new symbolic link.
3505 * link - Name for new symlink entry.
3506 * vap - Attributes of new entry.
3507 * cr - credentials of caller.
3508 * ct - caller context
3509 * flags - case flags
3510 *
3511 * RETURN: 0 on success, error code on failure.
3512 *
3513 * Timestamps:
3514 * dvp - ctime|mtime updated
3515 */
3516 /*ARGSUSED*/
3517 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags)3518 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3519 const char *link, znode_t **zpp, cred_t *cr, int flags)
3520 {
3521 znode_t *zp;
3522 dmu_tx_t *tx;
3523 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3524 zilog_t *zilog;
3525 uint64_t len = strlen(link);
3526 int error;
3527 zfs_acl_ids_t acl_ids;
3528 boolean_t fuid_dirtied;
3529 uint64_t txtype = TX_SYMLINK;
3530
3531 ASSERT3S(vap->va_type, ==, VLNK);
3532
3533 ZFS_ENTER(zfsvfs);
3534 ZFS_VERIFY_ZP(dzp);
3535 zilog = zfsvfs->z_log;
3536
3537 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3538 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3539 ZFS_EXIT(zfsvfs);
3540 return (SET_ERROR(EILSEQ));
3541 }
3542
3543 if (len > MAXPATHLEN) {
3544 ZFS_EXIT(zfsvfs);
3545 return (SET_ERROR(ENAMETOOLONG));
3546 }
3547
3548 if ((error = zfs_acl_ids_create(dzp, 0,
3549 vap, cr, NULL, &acl_ids)) != 0) {
3550 ZFS_EXIT(zfsvfs);
3551 return (error);
3552 }
3553
3554 /*
3555 * Attempt to lock directory; fail if entry already exists.
3556 */
3557 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3558 if (error) {
3559 zfs_acl_ids_free(&acl_ids);
3560 ZFS_EXIT(zfsvfs);
3561 return (error);
3562 }
3563
3564 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3565 zfs_acl_ids_free(&acl_ids);
3566 ZFS_EXIT(zfsvfs);
3567 return (error);
3568 }
3569
3570 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3571 0 /* projid */)) {
3572 zfs_acl_ids_free(&acl_ids);
3573 ZFS_EXIT(zfsvfs);
3574 return (SET_ERROR(EDQUOT));
3575 }
3576
3577 getnewvnode_reserve_();
3578 tx = dmu_tx_create(zfsvfs->z_os);
3579 fuid_dirtied = zfsvfs->z_fuid_dirty;
3580 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3581 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3582 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3583 ZFS_SA_BASE_ATTR_SIZE + len);
3584 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3585 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3586 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3587 acl_ids.z_aclp->z_acl_bytes);
3588 }
3589 if (fuid_dirtied)
3590 zfs_fuid_txhold(zfsvfs, tx);
3591 error = dmu_tx_assign(tx, TXG_WAIT);
3592 if (error) {
3593 zfs_acl_ids_free(&acl_ids);
3594 dmu_tx_abort(tx);
3595 getnewvnode_drop_reserve();
3596 ZFS_EXIT(zfsvfs);
3597 return (error);
3598 }
3599
3600 /*
3601 * Create a new object for the symlink.
3602 * for version 4 ZPL datasets the symlink will be an SA attribute
3603 */
3604 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3605
3606 if (fuid_dirtied)
3607 zfs_fuid_sync(zfsvfs, tx);
3608
3609 if (zp->z_is_sa)
3610 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3611 __DECONST(void *, link), len, tx);
3612 else
3613 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3614
3615 zp->z_size = len;
3616 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3617 &zp->z_size, sizeof (zp->z_size), tx);
3618 /*
3619 * Insert the new object into the directory.
3620 */
3621 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
3622
3623 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3624 *zpp = zp;
3625
3626 zfs_acl_ids_free(&acl_ids);
3627
3628 dmu_tx_commit(tx);
3629
3630 getnewvnode_drop_reserve();
3631
3632 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3633 zil_commit(zilog, 0);
3634
3635 ZFS_EXIT(zfsvfs);
3636 return (error);
3637 }
3638
3639 /*
3640 * Return, in the buffer contained in the provided uio structure,
3641 * the symbolic path referred to by vp.
3642 *
3643 * IN: vp - vnode of symbolic link.
3644 * uio - structure to contain the link path.
3645 * cr - credentials of caller.
3646 * ct - caller context
3647 *
3648 * OUT: uio - structure containing the link path.
3649 *
3650 * RETURN: 0 on success, error code on failure.
3651 *
3652 * Timestamps:
3653 * vp - atime updated
3654 */
3655 /* ARGSUSED */
3656 static int
zfs_readlink(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,caller_context_t * ct)3657 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3658 {
3659 znode_t *zp = VTOZ(vp);
3660 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3661 int error;
3662
3663 ZFS_ENTER(zfsvfs);
3664 ZFS_VERIFY_ZP(zp);
3665
3666 if (zp->z_is_sa)
3667 error = sa_lookup_uio(zp->z_sa_hdl,
3668 SA_ZPL_SYMLINK(zfsvfs), uio);
3669 else
3670 error = zfs_sa_readlink(zp, uio);
3671
3672 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3673
3674 ZFS_EXIT(zfsvfs);
3675 return (error);
3676 }
3677
3678 /*
3679 * Insert a new entry into directory tdvp referencing svp.
3680 *
3681 * IN: tdvp - Directory to contain new entry.
3682 * svp - vnode of new entry.
3683 * name - name of new entry.
3684 * cr - credentials of caller.
3685 *
3686 * RETURN: 0 on success, error code on failure.
3687 *
3688 * Timestamps:
3689 * tdvp - ctime|mtime updated
3690 * svp - ctime updated
3691 */
3692 /* ARGSUSED */
3693 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3694 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3695 int flags)
3696 {
3697 znode_t *tzp;
3698 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3699 zilog_t *zilog;
3700 dmu_tx_t *tx;
3701 int error;
3702 uint64_t parent;
3703 uid_t owner;
3704
3705 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3706
3707 ZFS_ENTER(zfsvfs);
3708 ZFS_VERIFY_ZP(tdzp);
3709 zilog = zfsvfs->z_log;
3710
3711 /*
3712 * POSIX dictates that we return EPERM here.
3713 * Better choices include ENOTSUP or EISDIR.
3714 */
3715 if (ZTOV(szp)->v_type == VDIR) {
3716 ZFS_EXIT(zfsvfs);
3717 return (SET_ERROR(EPERM));
3718 }
3719
3720 ZFS_VERIFY_ZP(szp);
3721
3722 /*
3723 * If we are using project inheritance, means if the directory has
3724 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3725 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3726 * such case, we only allow hard link creation in our tree when the
3727 * project IDs are the same.
3728 */
3729 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3730 tdzp->z_projid != szp->z_projid) {
3731 ZFS_EXIT(zfsvfs);
3732 return (SET_ERROR(EXDEV));
3733 }
3734
3735 if (szp->z_pflags & (ZFS_APPENDONLY |
3736 ZFS_IMMUTABLE | ZFS_READONLY)) {
3737 ZFS_EXIT(zfsvfs);
3738 return (SET_ERROR(EPERM));
3739 }
3740
3741 /* Prevent links to .zfs/shares files */
3742
3743 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3744 &parent, sizeof (uint64_t))) != 0) {
3745 ZFS_EXIT(zfsvfs);
3746 return (error);
3747 }
3748 if (parent == zfsvfs->z_shares_dir) {
3749 ZFS_EXIT(zfsvfs);
3750 return (SET_ERROR(EPERM));
3751 }
3752
3753 if (zfsvfs->z_utf8 && u8_validate(name,
3754 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3755 ZFS_EXIT(zfsvfs);
3756 return (SET_ERROR(EILSEQ));
3757 }
3758
3759 /*
3760 * We do not support links between attributes and non-attributes
3761 * because of the potential security risk of creating links
3762 * into "normal" file space in order to circumvent restrictions
3763 * imposed in attribute space.
3764 */
3765 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3766 ZFS_EXIT(zfsvfs);
3767 return (SET_ERROR(EINVAL));
3768 }
3769
3770
3771 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3772 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3773 ZFS_EXIT(zfsvfs);
3774 return (SET_ERROR(EPERM));
3775 }
3776
3777 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3778 ZFS_EXIT(zfsvfs);
3779 return (error);
3780 }
3781
3782 /*
3783 * Attempt to lock directory; fail if entry already exists.
3784 */
3785 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3786 if (error) {
3787 ZFS_EXIT(zfsvfs);
3788 return (error);
3789 }
3790
3791 tx = dmu_tx_create(zfsvfs->z_os);
3792 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3793 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3794 zfs_sa_upgrade_txholds(tx, szp);
3795 zfs_sa_upgrade_txholds(tx, tdzp);
3796 error = dmu_tx_assign(tx, TXG_WAIT);
3797 if (error) {
3798 dmu_tx_abort(tx);
3799 ZFS_EXIT(zfsvfs);
3800 return (error);
3801 }
3802
3803 error = zfs_link_create(tdzp, name, szp, tx, 0);
3804
3805 if (error == 0) {
3806 uint64_t txtype = TX_LINK;
3807 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3808 }
3809
3810 dmu_tx_commit(tx);
3811
3812 if (error == 0) {
3813 vnevent_link(ZTOV(szp), ct);
3814 }
3815
3816 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3817 zil_commit(zilog, 0);
3818
3819 ZFS_EXIT(zfsvfs);
3820 return (error);
3821 }
3822
3823 /*
3824 * Free or allocate space in a file. Currently, this function only
3825 * supports the `F_FREESP' command. However, this command is somewhat
3826 * misnamed, as its functionality includes the ability to allocate as
3827 * well as free space.
3828 *
3829 * IN: ip - inode of file to free data in.
3830 * cmd - action to take (only F_FREESP supported).
3831 * bfp - section of file to free/alloc.
3832 * flag - current file open mode flags.
3833 * offset - current file offset.
3834 * cr - credentials of caller.
3835 *
3836 * RETURN: 0 on success, error code on failure.
3837 *
3838 * Timestamps:
3839 * ip - ctime|mtime updated
3840 */
3841 /* ARGSUSED */
3842 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3843 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3844 offset_t offset, cred_t *cr)
3845 {
3846 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3847 uint64_t off, len;
3848 int error;
3849
3850 ZFS_ENTER(zfsvfs);
3851 ZFS_VERIFY_ZP(zp);
3852
3853 if (cmd != F_FREESP) {
3854 ZFS_EXIT(zfsvfs);
3855 return (SET_ERROR(EINVAL));
3856 }
3857
3858 /*
3859 * Callers might not be able to detect properly that we are read-only,
3860 * so check it explicitly here.
3861 */
3862 if (zfs_is_readonly(zfsvfs)) {
3863 ZFS_EXIT(zfsvfs);
3864 return (SET_ERROR(EROFS));
3865 }
3866
3867 if (bfp->l_len < 0) {
3868 ZFS_EXIT(zfsvfs);
3869 return (SET_ERROR(EINVAL));
3870 }
3871
3872 /*
3873 * Permissions aren't checked on Solaris because on this OS
3874 * zfs_space() can only be called with an opened file handle.
3875 * On Linux we can get here through truncate_range() which
3876 * operates directly on inodes, so we need to check access rights.
3877 */
3878 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
3879 ZFS_EXIT(zfsvfs);
3880 return (error);
3881 }
3882
3883 off = bfp->l_start;
3884 len = bfp->l_len; /* 0 means from off to end of file */
3885
3886 error = zfs_freesp(zp, off, len, flag, TRUE);
3887
3888 ZFS_EXIT(zfsvfs);
3889 return (error);
3890 }
3891
3892 /*ARGSUSED*/
3893 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3894 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3895 {
3896 znode_t *zp = VTOZ(vp);
3897 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3898 int error;
3899
3900 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3901 if (zp->z_sa_hdl == NULL) {
3902 /*
3903 * The fs has been unmounted, or we did a
3904 * suspend/resume and this file no longer exists.
3905 */
3906 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3907 vrecycle(vp);
3908 return;
3909 }
3910
3911 if (zp->z_unlinked) {
3912 /*
3913 * Fast path to recycle a vnode of a removed file.
3914 */
3915 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3916 vrecycle(vp);
3917 return;
3918 }
3919
3920 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3921 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3922
3923 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3924 zfs_sa_upgrade_txholds(tx, zp);
3925 error = dmu_tx_assign(tx, TXG_WAIT);
3926 if (error) {
3927 dmu_tx_abort(tx);
3928 } else {
3929 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3930 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3931 zp->z_atime_dirty = 0;
3932 dmu_tx_commit(tx);
3933 }
3934 }
3935 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3936 }
3937
3938
3939 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid));
3940 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid));
3941
3942 /*ARGSUSED*/
3943 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3944 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3945 {
3946 znode_t *zp = VTOZ(vp);
3947 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3948 uint32_t gen;
3949 uint64_t gen64;
3950 uint64_t object = zp->z_id;
3951 zfid_short_t *zfid;
3952 int size, i, error;
3953
3954 ZFS_ENTER(zfsvfs);
3955 ZFS_VERIFY_ZP(zp);
3956
3957 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3958 &gen64, sizeof (uint64_t))) != 0) {
3959 ZFS_EXIT(zfsvfs);
3960 return (error);
3961 }
3962
3963 gen = (uint32_t)gen64;
3964
3965 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3966 fidp->fid_len = size;
3967
3968 zfid = (zfid_short_t *)fidp;
3969
3970 zfid->zf_len = size;
3971
3972 for (i = 0; i < sizeof (zfid->zf_object); i++)
3973 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3974
3975 /* Must have a non-zero generation number to distinguish from .zfs */
3976 if (gen == 0)
3977 gen = 1;
3978 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3979 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3980
3981 if (size == LONG_FID_LEN) {
3982 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3983 zfid_long_t *zlfid;
3984
3985 zlfid = (zfid_long_t *)fidp;
3986
3987 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3988 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3989
3990 /* XXX - this should be the generation number for the objset */
3991 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3992 zlfid->zf_setgen[i] = 0;
3993 }
3994
3995 ZFS_EXIT(zfsvfs);
3996 return (0);
3997 }
3998
3999 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)4000 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4001 caller_context_t *ct)
4002 {
4003 znode_t *zp;
4004 zfsvfs_t *zfsvfs;
4005
4006 switch (cmd) {
4007 case _PC_LINK_MAX:
4008 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
4009 return (0);
4010
4011 case _PC_FILESIZEBITS:
4012 *valp = 64;
4013 return (0);
4014 case _PC_MIN_HOLE_SIZE:
4015 *valp = (int)SPA_MINBLOCKSIZE;
4016 return (0);
4017 case _PC_ACL_EXTENDED:
4018 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
4019 zp = VTOZ(vp);
4020 zfsvfs = zp->z_zfsvfs;
4021 ZFS_ENTER(zfsvfs);
4022 ZFS_VERIFY_ZP(zp);
4023 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
4024 ZFS_EXIT(zfsvfs);
4025 #else
4026 *valp = 0;
4027 #endif
4028 return (0);
4029
4030 case _PC_ACL_NFS4:
4031 zp = VTOZ(vp);
4032 zfsvfs = zp->z_zfsvfs;
4033 ZFS_ENTER(zfsvfs);
4034 ZFS_VERIFY_ZP(zp);
4035 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
4036 ZFS_EXIT(zfsvfs);
4037 return (0);
4038
4039 case _PC_ACL_PATH_MAX:
4040 *valp = ACL_MAX_ENTRIES;
4041 return (0);
4042
4043 default:
4044 return (EOPNOTSUPP);
4045 }
4046 }
4047
4048 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)4049 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4050 int *rahead)
4051 {
4052 znode_t *zp = VTOZ(vp);
4053 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4054 zfs_locked_range_t *lr;
4055 vm_object_t object;
4056 off_t start, end, obj_size;
4057 uint_t blksz;
4058 int pgsin_b, pgsin_a;
4059 int error;
4060
4061 ZFS_ENTER(zfsvfs);
4062 ZFS_VERIFY_ZP(zp);
4063
4064 start = IDX_TO_OFF(ma[0]->pindex);
4065 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4066
4067 /*
4068 * Lock a range covering all required and optional pages.
4069 * Note that we need to handle the case of the block size growing.
4070 */
4071 for (;;) {
4072 blksz = zp->z_blksz;
4073 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
4074 rounddown(start, blksz),
4075 roundup(end, blksz) - rounddown(start, blksz), RL_READER);
4076 if (lr == NULL) {
4077 if (rahead != NULL) {
4078 *rahead = 0;
4079 rahead = NULL;
4080 }
4081 if (rbehind != NULL) {
4082 *rbehind = 0;
4083 rbehind = NULL;
4084 }
4085 break;
4086 }
4087 if (blksz == zp->z_blksz)
4088 break;
4089 zfs_rangelock_exit(lr);
4090 }
4091
4092 object = ma[0]->object;
4093 zfs_vmobject_wlock(object);
4094 obj_size = object->un_pager.vnp.vnp_size;
4095 zfs_vmobject_wunlock(object);
4096 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4097 if (lr != NULL)
4098 zfs_rangelock_exit(lr);
4099 ZFS_EXIT(zfsvfs);
4100 return (zfs_vm_pagerret_bad);
4101 }
4102
4103 pgsin_b = 0;
4104 if (rbehind != NULL) {
4105 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4106 pgsin_b = MIN(*rbehind, pgsin_b);
4107 }
4108
4109 pgsin_a = 0;
4110 if (rahead != NULL) {
4111 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4112 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4113 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4114 pgsin_a = MIN(*rahead, pgsin_a);
4115 }
4116
4117 /*
4118 * NB: we need to pass the exact byte size of the data that we expect
4119 * to read after accounting for the file size. This is required because
4120 * ZFS will panic if we request DMU to read beyond the end of the last
4121 * allocated block.
4122 */
4123 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
4124 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
4125
4126 if (lr != NULL)
4127 zfs_rangelock_exit(lr);
4128 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4129
4130 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4131
4132 ZFS_EXIT(zfsvfs);
4133
4134 if (error != 0)
4135 return (zfs_vm_pagerret_error);
4136
4137 VM_CNT_INC(v_vnodein);
4138 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4139 if (rbehind != NULL)
4140 *rbehind = pgsin_b;
4141 if (rahead != NULL)
4142 *rahead = pgsin_a;
4143 return (zfs_vm_pagerret_ok);
4144 }
4145
4146 #ifndef _SYS_SYSPROTO_H_
4147 struct vop_getpages_args {
4148 struct vnode *a_vp;
4149 vm_page_t *a_m;
4150 int a_count;
4151 int *a_rbehind;
4152 int *a_rahead;
4153 };
4154 #endif
4155
4156 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4157 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4158 {
4159
4160 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4161 ap->a_rahead));
4162 }
4163
4164 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4165 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4166 int *rtvals)
4167 {
4168 znode_t *zp = VTOZ(vp);
4169 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4170 zfs_locked_range_t *lr;
4171 dmu_tx_t *tx;
4172 struct sf_buf *sf;
4173 vm_object_t object;
4174 vm_page_t m;
4175 caddr_t va;
4176 size_t tocopy;
4177 size_t lo_len;
4178 vm_ooffset_t lo_off;
4179 vm_ooffset_t off;
4180 uint_t blksz;
4181 int ncount;
4182 int pcount;
4183 int err;
4184 int i;
4185
4186 ZFS_ENTER(zfsvfs);
4187 ZFS_VERIFY_ZP(zp);
4188
4189 object = vp->v_object;
4190 pcount = btoc(len);
4191 ncount = pcount;
4192
4193 KASSERT(ma[0]->object == object, ("mismatching object"));
4194 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4195
4196 for (i = 0; i < pcount; i++)
4197 rtvals[i] = zfs_vm_pagerret_error;
4198
4199 off = IDX_TO_OFF(ma[0]->pindex);
4200 blksz = zp->z_blksz;
4201 lo_off = rounddown(off, blksz);
4202 lo_len = roundup(len + (off - lo_off), blksz);
4203 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4204
4205 zfs_vmobject_wlock(object);
4206 if (len + off > object->un_pager.vnp.vnp_size) {
4207 if (object->un_pager.vnp.vnp_size > off) {
4208 int pgoff;
4209
4210 len = object->un_pager.vnp.vnp_size - off;
4211 ncount = btoc(len);
4212 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4213 /*
4214 * If the object is locked and the following
4215 * conditions hold, then the page's dirty
4216 * field cannot be concurrently changed by a
4217 * pmap operation.
4218 */
4219 m = ma[ncount - 1];
4220 vm_page_assert_sbusied(m);
4221 KASSERT(!pmap_page_is_write_mapped(m),
4222 ("zfs_putpages: page %p is not read-only",
4223 m));
4224 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4225 pgoff);
4226 }
4227 } else {
4228 len = 0;
4229 ncount = 0;
4230 }
4231 if (ncount < pcount) {
4232 for (i = ncount; i < pcount; i++) {
4233 rtvals[i] = zfs_vm_pagerret_bad;
4234 }
4235 }
4236 }
4237 zfs_vmobject_wunlock(object);
4238
4239 if (ncount == 0)
4240 goto out;
4241
4242 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4243 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4244 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4245 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4246 zp->z_projid))) {
4247 goto out;
4248 }
4249
4250 tx = dmu_tx_create(zfsvfs->z_os);
4251 dmu_tx_hold_write(tx, zp->z_id, off, len);
4252
4253 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4254 zfs_sa_upgrade_txholds(tx, zp);
4255 err = dmu_tx_assign(tx, TXG_WAIT);
4256 if (err != 0) {
4257 dmu_tx_abort(tx);
4258 goto out;
4259 }
4260
4261 if (zp->z_blksz < PAGE_SIZE) {
4262 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4263 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4264 va = zfs_map_page(ma[i], &sf);
4265 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4266 zfs_unmap_page(sf);
4267 }
4268 } else {
4269 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4270 }
4271
4272 if (err == 0) {
4273 uint64_t mtime[2], ctime[2];
4274 sa_bulk_attr_t bulk[3];
4275 int count = 0;
4276
4277 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4278 &mtime, 16);
4279 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4280 &ctime, 16);
4281 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4282 &zp->z_pflags, 8);
4283 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4284 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4285 ASSERT0(err);
4286 /*
4287 * XXX we should be passing a callback to undirty
4288 * but that would make the locking messier
4289 */
4290 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4291 len, 0, NULL, NULL);
4292
4293 zfs_vmobject_wlock(object);
4294 for (i = 0; i < ncount; i++) {
4295 rtvals[i] = zfs_vm_pagerret_ok;
4296 vm_page_undirty(ma[i]);
4297 }
4298 zfs_vmobject_wunlock(object);
4299 VM_CNT_INC(v_vnodeout);
4300 VM_CNT_ADD(v_vnodepgsout, ncount);
4301 }
4302 dmu_tx_commit(tx);
4303
4304 out:
4305 zfs_rangelock_exit(lr);
4306 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4307 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4308 zil_commit(zfsvfs->z_log, zp->z_id);
4309
4310 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4311
4312 ZFS_EXIT(zfsvfs);
4313 return (rtvals[0]);
4314 }
4315
4316 #ifndef _SYS_SYSPROTO_H_
4317 struct vop_putpages_args {
4318 struct vnode *a_vp;
4319 vm_page_t *a_m;
4320 int a_count;
4321 int a_sync;
4322 int *a_rtvals;
4323 };
4324 #endif
4325
4326 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4327 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4328 {
4329
4330 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4331 ap->a_rtvals));
4332 }
4333
4334 #ifndef _SYS_SYSPROTO_H_
4335 struct vop_bmap_args {
4336 struct vnode *a_vp;
4337 daddr_t a_bn;
4338 struct bufobj **a_bop;
4339 daddr_t *a_bnp;
4340 int *a_runp;
4341 int *a_runb;
4342 };
4343 #endif
4344
4345 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4346 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4347 {
4348
4349 if (ap->a_bop != NULL)
4350 *ap->a_bop = &ap->a_vp->v_bufobj;
4351 if (ap->a_bnp != NULL)
4352 *ap->a_bnp = ap->a_bn;
4353 if (ap->a_runp != NULL)
4354 *ap->a_runp = 0;
4355 if (ap->a_runb != NULL)
4356 *ap->a_runb = 0;
4357
4358 return (0);
4359 }
4360
4361 #ifndef _SYS_SYSPROTO_H_
4362 struct vop_open_args {
4363 struct vnode *a_vp;
4364 int a_mode;
4365 struct ucred *a_cred;
4366 struct thread *a_td;
4367 };
4368 #endif
4369
4370 static int
zfs_freebsd_open(struct vop_open_args * ap)4371 zfs_freebsd_open(struct vop_open_args *ap)
4372 {
4373 vnode_t *vp = ap->a_vp;
4374 znode_t *zp = VTOZ(vp);
4375 int error;
4376
4377 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4378 if (error == 0)
4379 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4380 return (error);
4381 }
4382
4383 #ifndef _SYS_SYSPROTO_H_
4384 struct vop_close_args {
4385 struct vnode *a_vp;
4386 int a_fflag;
4387 struct ucred *a_cred;
4388 struct thread *a_td;
4389 };
4390 #endif
4391
4392 static int
zfs_freebsd_close(struct vop_close_args * ap)4393 zfs_freebsd_close(struct vop_close_args *ap)
4394 {
4395
4396 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4397 }
4398
4399 #ifndef _SYS_SYSPROTO_H_
4400 struct vop_ioctl_args {
4401 struct vnode *a_vp;
4402 ulong_t a_command;
4403 caddr_t a_data;
4404 int a_fflag;
4405 struct ucred *cred;
4406 struct thread *td;
4407 };
4408 #endif
4409
4410 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4411 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4412 {
4413
4414 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4415 ap->a_fflag, ap->a_cred, NULL));
4416 }
4417
4418 static int
ioflags(int ioflags)4419 ioflags(int ioflags)
4420 {
4421 int flags = 0;
4422
4423 if (ioflags & IO_APPEND)
4424 flags |= FAPPEND;
4425 if (ioflags & IO_NDELAY)
4426 flags |= FNONBLOCK;
4427 if (ioflags & IO_SYNC)
4428 flags |= (FSYNC | FDSYNC | FRSYNC);
4429
4430 return (flags);
4431 }
4432
4433 #ifndef _SYS_SYSPROTO_H_
4434 struct vop_read_args {
4435 struct vnode *a_vp;
4436 struct uio *a_uio;
4437 int a_ioflag;
4438 struct ucred *a_cred;
4439 };
4440 #endif
4441
4442 static int
zfs_freebsd_read(struct vop_read_args * ap)4443 zfs_freebsd_read(struct vop_read_args *ap)
4444 {
4445 zfs_uio_t uio;
4446 zfs_uio_init(&uio, ap->a_uio);
4447 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4448 ap->a_cred));
4449 }
4450
4451 #ifndef _SYS_SYSPROTO_H_
4452 struct vop_write_args {
4453 struct vnode *a_vp;
4454 struct uio *a_uio;
4455 int a_ioflag;
4456 struct ucred *a_cred;
4457 };
4458 #endif
4459
4460 static int
zfs_freebsd_write(struct vop_write_args * ap)4461 zfs_freebsd_write(struct vop_write_args *ap)
4462 {
4463 zfs_uio_t uio;
4464 zfs_uio_init(&uio, ap->a_uio);
4465 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4466 ap->a_cred));
4467 }
4468
4469 #if __FreeBSD_version >= 1300102
4470 /*
4471 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4472 * the comment above cache_fplookup for details.
4473 */
4474 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4475 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4476 {
4477 vnode_t *vp;
4478 znode_t *zp;
4479 uint64_t pflags;
4480
4481 vp = v->a_vp;
4482 zp = VTOZ_SMR(vp);
4483 if (__predict_false(zp == NULL))
4484 return (EAGAIN);
4485 pflags = atomic_load_64(&zp->z_pflags);
4486 if (pflags & ZFS_AV_QUARANTINED)
4487 return (EAGAIN);
4488 if (pflags & ZFS_XATTR)
4489 return (EAGAIN);
4490 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4491 return (EAGAIN);
4492 return (0);
4493 }
4494 #endif
4495
4496 #if __FreeBSD_version >= 1300139
4497 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4498 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4499 {
4500 vnode_t *vp;
4501 znode_t *zp;
4502 char *target;
4503
4504 vp = v->a_vp;
4505 zp = VTOZ_SMR(vp);
4506 if (__predict_false(zp == NULL)) {
4507 return (EAGAIN);
4508 }
4509
4510 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4511 if (target == NULL) {
4512 return (EAGAIN);
4513 }
4514 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4515 }
4516 #endif
4517
4518 #ifndef _SYS_SYSPROTO_H_
4519 struct vop_access_args {
4520 struct vnode *a_vp;
4521 accmode_t a_accmode;
4522 struct ucred *a_cred;
4523 struct thread *a_td;
4524 };
4525 #endif
4526
4527 static int
zfs_freebsd_access(struct vop_access_args * ap)4528 zfs_freebsd_access(struct vop_access_args *ap)
4529 {
4530 vnode_t *vp = ap->a_vp;
4531 znode_t *zp = VTOZ(vp);
4532 accmode_t accmode;
4533 int error = 0;
4534
4535
4536 if (ap->a_accmode == VEXEC) {
4537 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4538 return (0);
4539 }
4540
4541 /*
4542 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4543 */
4544 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4545 if (accmode != 0)
4546 error = zfs_access(zp, accmode, 0, ap->a_cred);
4547
4548 /*
4549 * VADMIN has to be handled by vaccess().
4550 */
4551 if (error == 0) {
4552 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4553 if (accmode != 0) {
4554 #if __FreeBSD_version >= 1300105
4555 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4556 zp->z_gid, accmode, ap->a_cred);
4557 #else
4558 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4559 zp->z_gid, accmode, ap->a_cred, NULL);
4560 #endif
4561 }
4562 }
4563
4564 /*
4565 * For VEXEC, ensure that at least one execute bit is set for
4566 * non-directories.
4567 */
4568 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4569 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4570 error = EACCES;
4571 }
4572
4573 return (error);
4574 }
4575
4576 #ifndef _SYS_SYSPROTO_H_
4577 struct vop_lookup_args {
4578 struct vnode *a_dvp;
4579 struct vnode **a_vpp;
4580 struct componentname *a_cnp;
4581 };
4582 #endif
4583
4584 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4585 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4586 {
4587 struct componentname *cnp = ap->a_cnp;
4588 char nm[NAME_MAX + 1];
4589
4590 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4591 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4592
4593 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4594 cnp->cn_cred, 0, cached));
4595 }
4596
4597 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4598 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4599 {
4600
4601 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4602 }
4603
4604 #ifndef _SYS_SYSPROTO_H_
4605 struct vop_lookup_args {
4606 struct vnode *a_dvp;
4607 struct vnode **a_vpp;
4608 struct componentname *a_cnp;
4609 };
4610 #endif
4611
4612 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4613 zfs_cache_lookup(struct vop_lookup_args *ap)
4614 {
4615 zfsvfs_t *zfsvfs;
4616
4617 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4618 if (zfsvfs->z_use_namecache)
4619 return (vfs_cache_lookup(ap));
4620 else
4621 return (zfs_freebsd_lookup(ap, B_FALSE));
4622 }
4623
4624 #ifndef _SYS_SYSPROTO_H_
4625 struct vop_create_args {
4626 struct vnode *a_dvp;
4627 struct vnode **a_vpp;
4628 struct componentname *a_cnp;
4629 struct vattr *a_vap;
4630 };
4631 #endif
4632
4633 static int
zfs_freebsd_create(struct vop_create_args * ap)4634 zfs_freebsd_create(struct vop_create_args *ap)
4635 {
4636 zfsvfs_t *zfsvfs;
4637 struct componentname *cnp = ap->a_cnp;
4638 vattr_t *vap = ap->a_vap;
4639 znode_t *zp = NULL;
4640 int rc, mode;
4641
4642 ASSERT(cnp->cn_flags & SAVENAME);
4643
4644 vattr_init_mask(vap);
4645 mode = vap->va_mode & ALLPERMS;
4646 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4647 *ap->a_vpp = NULL;
4648
4649 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode,
4650 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
4651 if (rc == 0)
4652 *ap->a_vpp = ZTOV(zp);
4653 if (zfsvfs->z_use_namecache &&
4654 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4655 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4656
4657 return (rc);
4658 }
4659
4660 #ifndef _SYS_SYSPROTO_H_
4661 struct vop_remove_args {
4662 struct vnode *a_dvp;
4663 struct vnode *a_vp;
4664 struct componentname *a_cnp;
4665 };
4666 #endif
4667
4668 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4669 zfs_freebsd_remove(struct vop_remove_args *ap)
4670 {
4671
4672 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4673
4674 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4675 ap->a_cnp->cn_cred));
4676 }
4677
4678 #ifndef _SYS_SYSPROTO_H_
4679 struct vop_mkdir_args {
4680 struct vnode *a_dvp;
4681 struct vnode **a_vpp;
4682 struct componentname *a_cnp;
4683 struct vattr *a_vap;
4684 };
4685 #endif
4686
4687 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4688 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4689 {
4690 vattr_t *vap = ap->a_vap;
4691 znode_t *zp = NULL;
4692 int rc;
4693
4694 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4695
4696 vattr_init_mask(vap);
4697 *ap->a_vpp = NULL;
4698
4699 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4700 ap->a_cnp->cn_cred, 0, NULL);
4701
4702 if (rc == 0)
4703 *ap->a_vpp = ZTOV(zp);
4704 return (rc);
4705 }
4706
4707 #ifndef _SYS_SYSPROTO_H_
4708 struct vop_rmdir_args {
4709 struct vnode *a_dvp;
4710 struct vnode *a_vp;
4711 struct componentname *a_cnp;
4712 };
4713 #endif
4714
4715 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4716 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4717 {
4718 struct componentname *cnp = ap->a_cnp;
4719
4720 ASSERT(cnp->cn_flags & SAVENAME);
4721
4722 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4723 }
4724
4725 #ifndef _SYS_SYSPROTO_H_
4726 struct vop_readdir_args {
4727 struct vnode *a_vp;
4728 struct uio *a_uio;
4729 struct ucred *a_cred;
4730 int *a_eofflag;
4731 int *a_ncookies;
4732 cookie_t **a_cookies;
4733 };
4734 #endif
4735
4736 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4737 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4738 {
4739 zfs_uio_t uio;
4740 zfs_uio_init(&uio, ap->a_uio);
4741 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4742 ap->a_ncookies, ap->a_cookies));
4743 }
4744
4745 #ifndef _SYS_SYSPROTO_H_
4746 struct vop_fsync_args {
4747 struct vnode *a_vp;
4748 int a_waitfor;
4749 struct thread *a_td;
4750 };
4751 #endif
4752
4753 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4754 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4755 {
4756
4757 vop_stdfsync(ap);
4758 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4759 }
4760
4761 #ifndef _SYS_SYSPROTO_H_
4762 struct vop_getattr_args {
4763 struct vnode *a_vp;
4764 struct vattr *a_vap;
4765 struct ucred *a_cred;
4766 };
4767 #endif
4768
4769 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4770 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4771 {
4772 vattr_t *vap = ap->a_vap;
4773 xvattr_t xvap;
4774 ulong_t fflags = 0;
4775 int error;
4776
4777 xva_init(&xvap);
4778 xvap.xva_vattr = *vap;
4779 xvap.xva_vattr.va_mask |= AT_XVATTR;
4780
4781 /* Convert chflags into ZFS-type flags. */
4782 /* XXX: what about SF_SETTABLE?. */
4783 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4784 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4785 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4786 XVA_SET_REQ(&xvap, XAT_NODUMP);
4787 XVA_SET_REQ(&xvap, XAT_READONLY);
4788 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4789 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4790 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4791 XVA_SET_REQ(&xvap, XAT_REPARSE);
4792 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4793 XVA_SET_REQ(&xvap, XAT_SPARSE);
4794
4795 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4796 if (error != 0)
4797 return (error);
4798
4799 /* Convert ZFS xattr into chflags. */
4800 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4801 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4802 fflags |= (fflag); \
4803 } while (0)
4804 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4805 xvap.xva_xoptattrs.xoa_immutable);
4806 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4807 xvap.xva_xoptattrs.xoa_appendonly);
4808 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4809 xvap.xva_xoptattrs.xoa_nounlink);
4810 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4811 xvap.xva_xoptattrs.xoa_archive);
4812 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4813 xvap.xva_xoptattrs.xoa_nodump);
4814 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4815 xvap.xva_xoptattrs.xoa_readonly);
4816 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4817 xvap.xva_xoptattrs.xoa_system);
4818 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4819 xvap.xva_xoptattrs.xoa_hidden);
4820 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4821 xvap.xva_xoptattrs.xoa_reparse);
4822 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4823 xvap.xva_xoptattrs.xoa_offline);
4824 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4825 xvap.xva_xoptattrs.xoa_sparse);
4826
4827 #undef FLAG_CHECK
4828 *vap = xvap.xva_vattr;
4829 vap->va_flags = fflags;
4830 return (0);
4831 }
4832
4833 #ifndef _SYS_SYSPROTO_H_
4834 struct vop_setattr_args {
4835 struct vnode *a_vp;
4836 struct vattr *a_vap;
4837 struct ucred *a_cred;
4838 };
4839 #endif
4840
4841 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)4842 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4843 {
4844 vnode_t *vp = ap->a_vp;
4845 vattr_t *vap = ap->a_vap;
4846 cred_t *cred = ap->a_cred;
4847 xvattr_t xvap;
4848 ulong_t fflags;
4849 uint64_t zflags;
4850
4851 vattr_init_mask(vap);
4852 vap->va_mask &= ~AT_NOSET;
4853
4854 xva_init(&xvap);
4855 xvap.xva_vattr = *vap;
4856
4857 zflags = VTOZ(vp)->z_pflags;
4858
4859 if (vap->va_flags != VNOVAL) {
4860 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4861 int error;
4862
4863 if (zfsvfs->z_use_fuids == B_FALSE)
4864 return (EOPNOTSUPP);
4865
4866 fflags = vap->va_flags;
4867 /*
4868 * XXX KDM
4869 * We need to figure out whether it makes sense to allow
4870 * UF_REPARSE through, since we don't really have other
4871 * facilities to handle reparse points and zfs_setattr()
4872 * doesn't currently allow setting that attribute anyway.
4873 */
4874 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4875 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4876 UF_OFFLINE|UF_SPARSE)) != 0)
4877 return (EOPNOTSUPP);
4878 /*
4879 * Unprivileged processes are not permitted to unset system
4880 * flags, or modify flags if any system flags are set.
4881 * Privileged non-jail processes may not modify system flags
4882 * if securelevel > 0 and any existing system flags are set.
4883 * Privileged jail processes behave like privileged non-jail
4884 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4885 * otherwise, they behave like unprivileged processes.
4886 */
4887 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4888 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4889 if (zflags &
4890 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4891 error = securelevel_gt(cred, 0);
4892 if (error != 0)
4893 return (error);
4894 }
4895 } else {
4896 /*
4897 * Callers may only modify the file flags on
4898 * objects they have VADMIN rights for.
4899 */
4900 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4901 curthread)) != 0)
4902 return (error);
4903 if (zflags &
4904 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4905 ZFS_NOUNLINK)) {
4906 return (EPERM);
4907 }
4908 if (fflags &
4909 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4910 return (EPERM);
4911 }
4912 }
4913
4914 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4915 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4916 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4917 XVA_SET_REQ(&xvap, (xflag)); \
4918 (xfield) = ((fflags & (fflag)) != 0); \
4919 } \
4920 } while (0)
4921 /* Convert chflags into ZFS-type flags. */
4922 /* XXX: what about SF_SETTABLE?. */
4923 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4924 xvap.xva_xoptattrs.xoa_immutable);
4925 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4926 xvap.xva_xoptattrs.xoa_appendonly);
4927 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4928 xvap.xva_xoptattrs.xoa_nounlink);
4929 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4930 xvap.xva_xoptattrs.xoa_archive);
4931 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4932 xvap.xva_xoptattrs.xoa_nodump);
4933 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4934 xvap.xva_xoptattrs.xoa_readonly);
4935 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4936 xvap.xva_xoptattrs.xoa_system);
4937 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4938 xvap.xva_xoptattrs.xoa_hidden);
4939 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4940 xvap.xva_xoptattrs.xoa_reparse);
4941 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4942 xvap.xva_xoptattrs.xoa_offline);
4943 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4944 xvap.xva_xoptattrs.xoa_sparse);
4945 #undef FLAG_CHANGE
4946 }
4947 if (vap->va_birthtime.tv_sec != VNOVAL) {
4948 xvap.xva_vattr.va_mask |= AT_XVATTR;
4949 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4950 }
4951 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
4952 }
4953
4954 #ifndef _SYS_SYSPROTO_H_
4955 struct vop_rename_args {
4956 struct vnode *a_fdvp;
4957 struct vnode *a_fvp;
4958 struct componentname *a_fcnp;
4959 struct vnode *a_tdvp;
4960 struct vnode *a_tvp;
4961 struct componentname *a_tcnp;
4962 };
4963 #endif
4964
4965 static int
zfs_freebsd_rename(struct vop_rename_args * ap)4966 zfs_freebsd_rename(struct vop_rename_args *ap)
4967 {
4968 vnode_t *fdvp = ap->a_fdvp;
4969 vnode_t *fvp = ap->a_fvp;
4970 vnode_t *tdvp = ap->a_tdvp;
4971 vnode_t *tvp = ap->a_tvp;
4972 int error;
4973
4974 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4975 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4976
4977 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4978 ap->a_tcnp, ap->a_fcnp->cn_cred);
4979
4980 vrele(fdvp);
4981 vrele(fvp);
4982 vrele(tdvp);
4983 if (tvp != NULL)
4984 vrele(tvp);
4985
4986 return (error);
4987 }
4988
4989 #ifndef _SYS_SYSPROTO_H_
4990 struct vop_symlink_args {
4991 struct vnode *a_dvp;
4992 struct vnode **a_vpp;
4993 struct componentname *a_cnp;
4994 struct vattr *a_vap;
4995 char *a_target;
4996 };
4997 #endif
4998
4999 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)5000 zfs_freebsd_symlink(struct vop_symlink_args *ap)
5001 {
5002 struct componentname *cnp = ap->a_cnp;
5003 vattr_t *vap = ap->a_vap;
5004 znode_t *zp = NULL;
5005 #if __FreeBSD_version >= 1300139
5006 char *symlink;
5007 size_t symlink_len;
5008 #endif
5009 int rc;
5010
5011 ASSERT(cnp->cn_flags & SAVENAME);
5012
5013 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
5014 vattr_init_mask(vap);
5015 *ap->a_vpp = NULL;
5016
5017 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
5018 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
5019 if (rc == 0) {
5020 *ap->a_vpp = ZTOV(zp);
5021 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
5022 #if __FreeBSD_version >= 1300139
5023 MPASS(zp->z_cached_symlink == NULL);
5024 symlink_len = strlen(ap->a_target);
5025 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5026 if (symlink != NULL) {
5027 memcpy(symlink, ap->a_target, symlink_len);
5028 symlink[symlink_len] = '\0';
5029 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5030 (uintptr_t)symlink);
5031 }
5032 #endif
5033 }
5034 return (rc);
5035 }
5036
5037 #ifndef _SYS_SYSPROTO_H_
5038 struct vop_readlink_args {
5039 struct vnode *a_vp;
5040 struct uio *a_uio;
5041 struct ucred *a_cred;
5042 };
5043 #endif
5044
5045 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)5046 zfs_freebsd_readlink(struct vop_readlink_args *ap)
5047 {
5048 zfs_uio_t uio;
5049 int error;
5050 #if __FreeBSD_version >= 1300139
5051 znode_t *zp = VTOZ(ap->a_vp);
5052 char *symlink, *base;
5053 size_t symlink_len;
5054 bool trycache;
5055 #endif
5056
5057 zfs_uio_init(&uio, ap->a_uio);
5058 #if __FreeBSD_version >= 1300139
5059 trycache = false;
5060 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
5061 zfs_uio_iovcnt(&uio) == 1) {
5062 base = zfs_uio_iovbase(&uio, 0);
5063 symlink_len = zfs_uio_iovlen(&uio, 0);
5064 trycache = true;
5065 }
5066 #endif
5067 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5068 #if __FreeBSD_version >= 1300139
5069 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5070 error != 0 || !trycache) {
5071 return (error);
5072 }
5073 symlink_len -= zfs_uio_resid(&uio);
5074 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5075 if (symlink != NULL) {
5076 memcpy(symlink, base, symlink_len);
5077 symlink[symlink_len] = '\0';
5078 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5079 (uintptr_t)NULL, (uintptr_t)symlink)) {
5080 cache_symlink_free(symlink, symlink_len + 1);
5081 }
5082 }
5083 #endif
5084 return (error);
5085 }
5086
5087 #ifndef _SYS_SYSPROTO_H_
5088 struct vop_link_args {
5089 struct vnode *a_tdvp;
5090 struct vnode *a_vp;
5091 struct componentname *a_cnp;
5092 };
5093 #endif
5094
5095 static int
zfs_freebsd_link(struct vop_link_args * ap)5096 zfs_freebsd_link(struct vop_link_args *ap)
5097 {
5098 struct componentname *cnp = ap->a_cnp;
5099 vnode_t *vp = ap->a_vp;
5100 vnode_t *tdvp = ap->a_tdvp;
5101
5102 if (tdvp->v_mount != vp->v_mount)
5103 return (EXDEV);
5104
5105 ASSERT(cnp->cn_flags & SAVENAME);
5106
5107 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5108 cnp->cn_nameptr, cnp->cn_cred, 0));
5109 }
5110
5111 #ifndef _SYS_SYSPROTO_H_
5112 struct vop_inactive_args {
5113 struct vnode *a_vp;
5114 struct thread *a_td;
5115 };
5116 #endif
5117
5118 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5119 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5120 {
5121 vnode_t *vp = ap->a_vp;
5122
5123 #if __FreeBSD_version >= 1300123
5124 zfs_inactive(vp, curthread->td_ucred, NULL);
5125 #else
5126 zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5127 #endif
5128 return (0);
5129 }
5130
5131 #if __FreeBSD_version >= 1300042
5132 #ifndef _SYS_SYSPROTO_H_
5133 struct vop_need_inactive_args {
5134 struct vnode *a_vp;
5135 struct thread *a_td;
5136 };
5137 #endif
5138
5139 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5140 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5141 {
5142 vnode_t *vp = ap->a_vp;
5143 znode_t *zp = VTOZ(vp);
5144 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5145 int need;
5146
5147 if (vn_need_pageq_flush(vp))
5148 return (1);
5149
5150 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5151 return (1);
5152 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5153 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5154
5155 return (need);
5156 }
5157 #endif
5158
5159 #ifndef _SYS_SYSPROTO_H_
5160 struct vop_reclaim_args {
5161 struct vnode *a_vp;
5162 struct thread *a_td;
5163 };
5164 #endif
5165
5166 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5167 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5168 {
5169 vnode_t *vp = ap->a_vp;
5170 znode_t *zp = VTOZ(vp);
5171 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5172
5173 ASSERT3P(zp, !=, NULL);
5174
5175 #if __FreeBSD_version < 1300042
5176 /* Destroy the vm object and flush associated pages. */
5177 vnode_destroy_vobject(vp);
5178 #endif
5179 /*
5180 * z_teardown_inactive_lock protects from a race with
5181 * zfs_znode_dmu_fini in zfsvfs_teardown during
5182 * force unmount.
5183 */
5184 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5185 if (zp->z_sa_hdl == NULL)
5186 zfs_znode_free(zp);
5187 else
5188 zfs_zinactive(zp);
5189 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5190
5191 vp->v_data = NULL;
5192 return (0);
5193 }
5194
5195 #ifndef _SYS_SYSPROTO_H_
5196 struct vop_fid_args {
5197 struct vnode *a_vp;
5198 struct fid *a_fid;
5199 };
5200 #endif
5201
5202 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5203 zfs_freebsd_fid(struct vop_fid_args *ap)
5204 {
5205
5206 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5207 }
5208
5209
5210 #ifndef _SYS_SYSPROTO_H_
5211 struct vop_pathconf_args {
5212 struct vnode *a_vp;
5213 int a_name;
5214 register_t *a_retval;
5215 } *ap;
5216 #endif
5217
5218 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5219 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5220 {
5221 ulong_t val;
5222 int error;
5223
5224 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5225 curthread->td_ucred, NULL);
5226 if (error == 0) {
5227 *ap->a_retval = val;
5228 return (error);
5229 }
5230 if (error != EOPNOTSUPP)
5231 return (error);
5232
5233 switch (ap->a_name) {
5234 case _PC_NAME_MAX:
5235 *ap->a_retval = NAME_MAX;
5236 return (0);
5237 case _PC_PIPE_BUF:
5238 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5239 *ap->a_retval = PIPE_BUF;
5240 return (0);
5241 }
5242 return (EINVAL);
5243 default:
5244 return (vop_stdpathconf(ap));
5245 }
5246 }
5247
5248 /*
5249 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5250 * extended attribute name:
5251 *
5252 * NAMESPACE PREFIX
5253 * system freebsd:system:
5254 * user (none, can be used to access ZFS fsattr(5) attributes
5255 * created on Solaris)
5256 */
5257 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size)5258 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5259 size_t size)
5260 {
5261 const char *namespace, *prefix, *suffix;
5262
5263 /* We don't allow '/' character in attribute name. */
5264 if (strchr(name, '/') != NULL)
5265 return (SET_ERROR(EINVAL));
5266 /* We don't allow attribute names that start with "freebsd:" string. */
5267 if (strncmp(name, "freebsd:", 8) == 0)
5268 return (SET_ERROR(EINVAL));
5269
5270 bzero(attrname, size);
5271
5272 switch (attrnamespace) {
5273 case EXTATTR_NAMESPACE_USER:
5274 #if 0
5275 prefix = "freebsd:";
5276 namespace = EXTATTR_NAMESPACE_USER_STRING;
5277 suffix = ":";
5278 #else
5279 /*
5280 * This is the default namespace by which we can access all
5281 * attributes created on Solaris.
5282 */
5283 prefix = namespace = suffix = "";
5284 #endif
5285 break;
5286 case EXTATTR_NAMESPACE_SYSTEM:
5287 prefix = "freebsd:";
5288 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5289 suffix = ":";
5290 break;
5291 case EXTATTR_NAMESPACE_EMPTY:
5292 default:
5293 return (SET_ERROR(EINVAL));
5294 }
5295 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5296 name) >= size) {
5297 return (SET_ERROR(ENAMETOOLONG));
5298 }
5299 return (0);
5300 }
5301
5302 static int
zfs_ensure_xattr_cached(znode_t * zp)5303 zfs_ensure_xattr_cached(znode_t *zp)
5304 {
5305 int error = 0;
5306
5307 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5308
5309 if (zp->z_xattr_cached != NULL)
5310 return (0);
5311
5312 if (rw_write_held(&zp->z_xattr_lock))
5313 return (zfs_sa_get_xattr(zp));
5314
5315 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5316 rw_exit(&zp->z_xattr_lock);
5317 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5318 }
5319 if (zp->z_xattr_cached == NULL)
5320 error = zfs_sa_get_xattr(zp);
5321 rw_downgrade(&zp->z_xattr_lock);
5322 return (error);
5323 }
5324
5325 #ifndef _SYS_SYSPROTO_H_
5326 struct vop_getextattr {
5327 IN struct vnode *a_vp;
5328 IN int a_attrnamespace;
5329 IN const char *a_name;
5330 INOUT struct uio *a_uio;
5331 OUT size_t *a_size;
5332 IN struct ucred *a_cred;
5333 IN struct thread *a_td;
5334 };
5335 #endif
5336
5337 static int
zfs_getextattr_dir(struct vop_getextattr_args * ap,const char * attrname)5338 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5339 {
5340 struct thread *td = ap->a_td;
5341 struct nameidata nd;
5342 struct vattr va;
5343 vnode_t *xvp = NULL, *vp;
5344 int error, flags;
5345
5346 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5347 LOOKUP_XATTR, B_FALSE);
5348 if (error != 0)
5349 return (error);
5350
5351 flags = FREAD;
5352 #if __FreeBSD_version < 1400043
5353 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5354 xvp, td);
5355 #else
5356 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5357 #endif
5358 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5359 vp = nd.ni_vp;
5360 NDFREE(&nd, NDF_ONLY_PNBUF);
5361 if (error != 0)
5362 return (error);
5363
5364 if (ap->a_size != NULL) {
5365 error = VOP_GETATTR(vp, &va, ap->a_cred);
5366 if (error == 0)
5367 *ap->a_size = (size_t)va.va_size;
5368 } else if (ap->a_uio != NULL)
5369 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5370
5371 VOP_UNLOCK1(vp);
5372 vn_close(vp, flags, ap->a_cred, td);
5373 return (error);
5374 }
5375
5376 static int
zfs_getextattr_sa(struct vop_getextattr_args * ap,const char * attrname)5377 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5378 {
5379 znode_t *zp = VTOZ(ap->a_vp);
5380 uchar_t *nv_value;
5381 uint_t nv_size;
5382 int error;
5383
5384 error = zfs_ensure_xattr_cached(zp);
5385 if (error != 0)
5386 return (error);
5387
5388 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5389 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5390
5391 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5392 &nv_value, &nv_size);
5393 if (error)
5394 return (error);
5395
5396 if (ap->a_size != NULL)
5397 *ap->a_size = nv_size;
5398 else if (ap->a_uio != NULL)
5399 error = uiomove(nv_value, nv_size, ap->a_uio);
5400
5401 return (error);
5402 }
5403
5404 /*
5405 * Vnode operation to retrieve a named extended attribute.
5406 */
5407 static int
zfs_getextattr(struct vop_getextattr_args * ap)5408 zfs_getextattr(struct vop_getextattr_args *ap)
5409 {
5410 znode_t *zp = VTOZ(ap->a_vp);
5411 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5412 char attrname[EXTATTR_MAXNAMELEN+1];
5413 int error;
5414
5415 /*
5416 * If the xattr property is off, refuse the request.
5417 */
5418 if (!(zfsvfs->z_flags & ZSB_XATTR))
5419 return (SET_ERROR(EOPNOTSUPP));
5420
5421 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5422 ap->a_cred, ap->a_td, VREAD);
5423 if (error != 0)
5424 return (error);
5425
5426 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5427 sizeof (attrname));
5428 if (error != 0)
5429 return (error);
5430
5431 error = ENOENT;
5432 ZFS_ENTER(zfsvfs);
5433 ZFS_VERIFY_ZP(zp)
5434 rw_enter(&zp->z_xattr_lock, RW_READER);
5435 if (zfsvfs->z_use_sa && zp->z_is_sa)
5436 error = zfs_getextattr_sa(ap, attrname);
5437 if (error == ENOENT)
5438 error = zfs_getextattr_dir(ap, attrname);
5439 rw_exit(&zp->z_xattr_lock);
5440 ZFS_EXIT(zfsvfs);
5441 if (error == ENOENT)
5442 error = SET_ERROR(ENOATTR);
5443 return (error);
5444 }
5445
5446 #ifndef _SYS_SYSPROTO_H_
5447 struct vop_deleteextattr {
5448 IN struct vnode *a_vp;
5449 IN int a_attrnamespace;
5450 IN const char *a_name;
5451 IN struct ucred *a_cred;
5452 IN struct thread *a_td;
5453 };
5454 #endif
5455
5456 static int
zfs_deleteextattr_dir(struct vop_deleteextattr_args * ap,const char * attrname)5457 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5458 {
5459 struct nameidata nd;
5460 vnode_t *xvp = NULL, *vp;
5461 int error;
5462
5463 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5464 LOOKUP_XATTR, B_FALSE);
5465 if (error != 0)
5466 return (error);
5467
5468 #if __FreeBSD_version < 1400043
5469 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5470 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5471 #else
5472 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5473 UIO_SYSSPACE, attrname, xvp);
5474 #endif
5475 error = namei(&nd);
5476 vp = nd.ni_vp;
5477 if (error != 0) {
5478 NDFREE(&nd, NDF_ONLY_PNBUF);
5479 return (error);
5480 }
5481
5482 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5483 NDFREE(&nd, NDF_ONLY_PNBUF);
5484
5485 vput(nd.ni_dvp);
5486 if (vp == nd.ni_dvp)
5487 vrele(vp);
5488 else
5489 vput(vp);
5490
5491 return (error);
5492 }
5493
5494 static int
zfs_deleteextattr_sa(struct vop_deleteextattr_args * ap,const char * attrname)5495 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5496 {
5497 znode_t *zp = VTOZ(ap->a_vp);
5498 nvlist_t *nvl;
5499 int error;
5500
5501 error = zfs_ensure_xattr_cached(zp);
5502 if (error != 0)
5503 return (error);
5504
5505 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5506 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5507
5508 nvl = zp->z_xattr_cached;
5509 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5510 if (error == 0)
5511 error = zfs_sa_set_xattr(zp);
5512 if (error != 0) {
5513 zp->z_xattr_cached = NULL;
5514 nvlist_free(nvl);
5515 }
5516 return (error);
5517 }
5518
5519 /*
5520 * Vnode operation to remove a named attribute.
5521 */
5522 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5523 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5524 {
5525 znode_t *zp = VTOZ(ap->a_vp);
5526 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5527 char attrname[EXTATTR_MAXNAMELEN+1];
5528 int error;
5529
5530 /*
5531 * If the xattr property is off, refuse the request.
5532 */
5533 if (!(zfsvfs->z_flags & ZSB_XATTR))
5534 return (SET_ERROR(EOPNOTSUPP));
5535
5536 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5537 ap->a_cred, ap->a_td, VWRITE);
5538 if (error != 0)
5539 return (error);
5540
5541 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5542 sizeof (attrname));
5543 if (error != 0)
5544 return (error);
5545
5546 size_t size = 0;
5547 struct vop_getextattr_args vga = {
5548 .a_vp = ap->a_vp,
5549 .a_size = &size,
5550 .a_cred = ap->a_cred,
5551 .a_td = ap->a_td,
5552 };
5553 error = ENOENT;
5554 ZFS_ENTER(zfsvfs);
5555 ZFS_VERIFY_ZP(zp);
5556 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5557 if (zfsvfs->z_use_sa && zp->z_is_sa) {
5558 error = zfs_getextattr_sa(&vga, attrname);
5559 if (error == 0)
5560 error = zfs_deleteextattr_sa(ap, attrname);
5561 }
5562 if (error == ENOENT) {
5563 error = zfs_getextattr_dir(&vga, attrname);
5564 if (error == 0)
5565 error = zfs_deleteextattr_dir(ap, attrname);
5566 }
5567 rw_exit(&zp->z_xattr_lock);
5568 ZFS_EXIT(zfsvfs);
5569 if (error == ENOENT)
5570 error = SET_ERROR(ENOATTR);
5571 return (error);
5572 }
5573
5574 #ifndef _SYS_SYSPROTO_H_
5575 struct vop_setextattr {
5576 IN struct vnode *a_vp;
5577 IN int a_attrnamespace;
5578 IN const char *a_name;
5579 INOUT struct uio *a_uio;
5580 IN struct ucred *a_cred;
5581 IN struct thread *a_td;
5582 };
5583 #endif
5584
5585 static int
zfs_setextattr_dir(struct vop_setextattr_args * ap,const char * attrname)5586 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5587 {
5588 struct thread *td = ap->a_td;
5589 struct nameidata nd;
5590 struct vattr va;
5591 vnode_t *xvp = NULL, *vp;
5592 int error, flags;
5593
5594 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5595 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5596 if (error != 0)
5597 return (error);
5598
5599 flags = FFLAGS(O_WRONLY | O_CREAT);
5600 #if __FreeBSD_version < 1400043
5601 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5602 #else
5603 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5604 #endif
5605 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5606 NULL);
5607 vp = nd.ni_vp;
5608 NDFREE(&nd, NDF_ONLY_PNBUF);
5609 if (error != 0)
5610 return (error);
5611
5612 VATTR_NULL(&va);
5613 va.va_size = 0;
5614 error = VOP_SETATTR(vp, &va, ap->a_cred);
5615 if (error == 0)
5616 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5617
5618 VOP_UNLOCK1(vp);
5619 vn_close(vp, flags, ap->a_cred, td);
5620 return (error);
5621 }
5622
5623 static int
zfs_setextattr_sa(struct vop_setextattr_args * ap,const char * attrname)5624 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5625 {
5626 znode_t *zp = VTOZ(ap->a_vp);
5627 nvlist_t *nvl;
5628 size_t sa_size;
5629 int error;
5630
5631 error = zfs_ensure_xattr_cached(zp);
5632 if (error != 0)
5633 return (error);
5634
5635 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5636 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5637
5638 nvl = zp->z_xattr_cached;
5639 size_t entry_size = ap->a_uio->uio_resid;
5640 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5641 return (SET_ERROR(EFBIG));
5642 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5643 if (error != 0)
5644 return (error);
5645 if (sa_size > DXATTR_MAX_SA_SIZE)
5646 return (SET_ERROR(EFBIG));
5647 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5648 error = uiomove(buf, entry_size, ap->a_uio);
5649 if (error == 0)
5650 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5651 kmem_free(buf, entry_size);
5652 if (error == 0)
5653 error = zfs_sa_set_xattr(zp);
5654 if (error != 0) {
5655 zp->z_xattr_cached = NULL;
5656 nvlist_free(nvl);
5657 }
5658 return (error);
5659 }
5660
5661 /*
5662 * Vnode operation to set a named attribute.
5663 */
5664 static int
zfs_setextattr(struct vop_setextattr_args * ap)5665 zfs_setextattr(struct vop_setextattr_args *ap)
5666 {
5667 znode_t *zp = VTOZ(ap->a_vp);
5668 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5669 char attrname[EXTATTR_MAXNAMELEN+1];
5670 int error;
5671
5672 /*
5673 * If the xattr property is off, refuse the request.
5674 */
5675 if (!(zfsvfs->z_flags & ZSB_XATTR))
5676 return (SET_ERROR(EOPNOTSUPP));
5677
5678 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5679 ap->a_cred, ap->a_td, VWRITE);
5680 if (error != 0)
5681 return (error);
5682
5683 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5684 sizeof (attrname));
5685 if (error != 0)
5686 return (error);
5687
5688 struct vop_deleteextattr_args vda = {
5689 .a_vp = ap->a_vp,
5690 .a_cred = ap->a_cred,
5691 .a_td = ap->a_td,
5692 };
5693 error = ENOENT;
5694 ZFS_ENTER(zfsvfs);
5695 ZFS_VERIFY_ZP(zp);
5696 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5697 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5698 error = zfs_setextattr_sa(ap, attrname);
5699 if (error == 0)
5700 /*
5701 * Successfully put into SA, we need to clear the one
5702 * in dir if present.
5703 */
5704 zfs_deleteextattr_dir(&vda, attrname);
5705 }
5706 if (error) {
5707 error = zfs_setextattr_dir(ap, attrname);
5708 if (error == 0 && zp->z_is_sa)
5709 /*
5710 * Successfully put into dir, we need to clear the one
5711 * in SA if present.
5712 */
5713 zfs_deleteextattr_sa(&vda, attrname);
5714 }
5715 rw_exit(&zp->z_xattr_lock);
5716 ZFS_EXIT(zfsvfs);
5717 return (error);
5718 }
5719
5720 #ifndef _SYS_SYSPROTO_H_
5721 struct vop_listextattr {
5722 IN struct vnode *a_vp;
5723 IN int a_attrnamespace;
5724 INOUT struct uio *a_uio;
5725 OUT size_t *a_size;
5726 IN struct ucred *a_cred;
5727 IN struct thread *a_td;
5728 };
5729 #endif
5730
5731 static int
zfs_listextattr_dir(struct vop_listextattr_args * ap,const char * attrprefix)5732 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5733 {
5734 struct thread *td = ap->a_td;
5735 struct nameidata nd;
5736 uint8_t dirbuf[sizeof (struct dirent)];
5737 struct iovec aiov;
5738 struct uio auio;
5739 vnode_t *xvp = NULL, *vp;
5740 int error, eof;
5741
5742 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5743 LOOKUP_XATTR, B_FALSE);
5744 if (error != 0) {
5745 /*
5746 * ENOATTR means that the EA directory does not yet exist,
5747 * i.e. there are no extended attributes there.
5748 */
5749 if (error == ENOATTR)
5750 error = 0;
5751 return (error);
5752 }
5753
5754 #if __FreeBSD_version < 1400043
5755 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5756 UIO_SYSSPACE, ".", xvp, td);
5757 #else
5758 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5759 UIO_SYSSPACE, ".", xvp);
5760 #endif
5761 error = namei(&nd);
5762 vp = nd.ni_vp;
5763 NDFREE(&nd, NDF_ONLY_PNBUF);
5764 if (error != 0)
5765 return (error);
5766
5767 auio.uio_iov = &aiov;
5768 auio.uio_iovcnt = 1;
5769 auio.uio_segflg = UIO_SYSSPACE;
5770 auio.uio_td = td;
5771 auio.uio_rw = UIO_READ;
5772 auio.uio_offset = 0;
5773
5774 size_t plen = strlen(attrprefix);
5775
5776 do {
5777 aiov.iov_base = (void *)dirbuf;
5778 aiov.iov_len = sizeof (dirbuf);
5779 auio.uio_resid = sizeof (dirbuf);
5780 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5781 if (error != 0)
5782 break;
5783 int done = sizeof (dirbuf) - auio.uio_resid;
5784 for (int pos = 0; pos < done; ) {
5785 struct dirent *dp = (struct dirent *)(dirbuf + pos);
5786 pos += dp->d_reclen;
5787 /*
5788 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5789 * is what we get when attribute was created on Solaris.
5790 */
5791 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5792 continue;
5793 else if (plen == 0 &&
5794 strncmp(dp->d_name, "freebsd:", 8) == 0)
5795 continue;
5796 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5797 continue;
5798 uint8_t nlen = dp->d_namlen - plen;
5799 if (ap->a_size != NULL) {
5800 *ap->a_size += 1 + nlen;
5801 } else if (ap->a_uio != NULL) {
5802 /*
5803 * Format of extattr name entry is one byte for
5804 * length and the rest for name.
5805 */
5806 error = uiomove(&nlen, 1, ap->a_uio);
5807 if (error == 0) {
5808 char *namep = dp->d_name + plen;
5809 error = uiomove(namep, nlen, ap->a_uio);
5810 }
5811 if (error != 0)
5812 break;
5813 }
5814 }
5815 } while (!eof && error == 0);
5816
5817 vput(vp);
5818 return (error);
5819 }
5820
5821 static int
zfs_listextattr_sa(struct vop_listextattr_args * ap,const char * attrprefix)5822 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5823 {
5824 znode_t *zp = VTOZ(ap->a_vp);
5825 int error;
5826
5827 error = zfs_ensure_xattr_cached(zp);
5828 if (error != 0)
5829 return (error);
5830
5831 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5832 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5833
5834 size_t plen = strlen(attrprefix);
5835 nvpair_t *nvp = NULL;
5836 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5837 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5838
5839 const char *name = nvpair_name(nvp);
5840 if (plen == 0 && strncmp(name, "freebsd:", 8) == 0)
5841 continue;
5842 else if (strncmp(name, attrprefix, plen) != 0)
5843 continue;
5844 uint8_t nlen = strlen(name) - plen;
5845 if (ap->a_size != NULL) {
5846 *ap->a_size += 1 + nlen;
5847 } else if (ap->a_uio != NULL) {
5848 /*
5849 * Format of extattr name entry is one byte for
5850 * length and the rest for name.
5851 */
5852 error = uiomove(&nlen, 1, ap->a_uio);
5853 if (error == 0) {
5854 char *namep = __DECONST(char *, name) + plen;
5855 error = uiomove(namep, nlen, ap->a_uio);
5856 }
5857 if (error != 0)
5858 break;
5859 }
5860 }
5861
5862 return (error);
5863 }
5864
5865 /*
5866 * Vnode operation to retrieve extended attributes on a vnode.
5867 */
5868 static int
zfs_listextattr(struct vop_listextattr_args * ap)5869 zfs_listextattr(struct vop_listextattr_args *ap)
5870 {
5871 znode_t *zp = VTOZ(ap->a_vp);
5872 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5873 char attrprefix[16];
5874 int error;
5875
5876 if (ap->a_size != NULL)
5877 *ap->a_size = 0;
5878
5879 /*
5880 * If the xattr property is off, refuse the request.
5881 */
5882 if (!(zfsvfs->z_flags & ZSB_XATTR))
5883 return (SET_ERROR(EOPNOTSUPP));
5884
5885 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5886 ap->a_cred, ap->a_td, VREAD);
5887 if (error != 0)
5888 return (error);
5889
5890 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5891 sizeof (attrprefix));
5892 if (error != 0)
5893 return (error);
5894
5895 ZFS_ENTER(zfsvfs);
5896 ZFS_VERIFY_ZP(zp);
5897 rw_enter(&zp->z_xattr_lock, RW_READER);
5898 if (zfsvfs->z_use_sa && zp->z_is_sa)
5899 error = zfs_listextattr_sa(ap, attrprefix);
5900 if (error == 0)
5901 error = zfs_listextattr_dir(ap, attrprefix);
5902 rw_exit(&zp->z_xattr_lock);
5903 ZFS_EXIT(zfsvfs);
5904 return (error);
5905 }
5906
5907 #ifndef _SYS_SYSPROTO_H_
5908 struct vop_getacl_args {
5909 struct vnode *vp;
5910 acl_type_t type;
5911 struct acl *aclp;
5912 struct ucred *cred;
5913 struct thread *td;
5914 };
5915 #endif
5916
5917 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)5918 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5919 {
5920 int error;
5921 vsecattr_t vsecattr;
5922
5923 if (ap->a_type != ACL_TYPE_NFS4)
5924 return (EINVAL);
5925
5926 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5927 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5928 &vsecattr, 0, ap->a_cred)))
5929 return (error);
5930
5931 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5932 vsecattr.vsa_aclcnt);
5933 if (vsecattr.vsa_aclentp != NULL)
5934 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5935
5936 return (error);
5937 }
5938
5939 #ifndef _SYS_SYSPROTO_H_
5940 struct vop_setacl_args {
5941 struct vnode *vp;
5942 acl_type_t type;
5943 struct acl *aclp;
5944 struct ucred *cred;
5945 struct thread *td;
5946 };
5947 #endif
5948
5949 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)5950 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5951 {
5952 int error;
5953 vsecattr_t vsecattr;
5954 int aclbsize; /* size of acl list in bytes */
5955 aclent_t *aaclp;
5956
5957 if (ap->a_type != ACL_TYPE_NFS4)
5958 return (EINVAL);
5959
5960 if (ap->a_aclp == NULL)
5961 return (EINVAL);
5962
5963 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5964 return (EINVAL);
5965
5966 /*
5967 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5968 * splitting every entry into two and appending "canonical six"
5969 * entries at the end. Don't allow for setting an ACL that would
5970 * cause chmod(2) to run out of ACL entries.
5971 */
5972 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5973 return (ENOSPC);
5974
5975 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5976 if (error != 0)
5977 return (error);
5978
5979 vsecattr.vsa_mask = VSA_ACE;
5980 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
5981 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5982 aaclp = vsecattr.vsa_aclentp;
5983 vsecattr.vsa_aclentsz = aclbsize;
5984
5985 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5986 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
5987 kmem_free(aaclp, aclbsize);
5988
5989 return (error);
5990 }
5991
5992 #ifndef _SYS_SYSPROTO_H_
5993 struct vop_aclcheck_args {
5994 struct vnode *vp;
5995 acl_type_t type;
5996 struct acl *aclp;
5997 struct ucred *cred;
5998 struct thread *td;
5999 };
6000 #endif
6001
6002 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)6003 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6004 {
6005
6006 return (EOPNOTSUPP);
6007 }
6008
6009 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)6010 zfs_vptocnp(struct vop_vptocnp_args *ap)
6011 {
6012 vnode_t *covered_vp;
6013 vnode_t *vp = ap->a_vp;
6014 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6015 znode_t *zp = VTOZ(vp);
6016 int ltype;
6017 int error;
6018
6019 ZFS_ENTER(zfsvfs);
6020 ZFS_VERIFY_ZP(zp);
6021
6022 /*
6023 * If we are a snapshot mounted under .zfs, run the operation
6024 * on the covered vnode.
6025 */
6026 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6027 char name[MAXNAMLEN + 1];
6028 znode_t *dzp;
6029 size_t len;
6030
6031 error = zfs_znode_parent_and_name(zp, &dzp, name);
6032 if (error == 0) {
6033 len = strlen(name);
6034 if (*ap->a_buflen < len)
6035 error = SET_ERROR(ENOMEM);
6036 }
6037 if (error == 0) {
6038 *ap->a_buflen -= len;
6039 bcopy(name, ap->a_buf + *ap->a_buflen, len);
6040 *ap->a_vpp = ZTOV(dzp);
6041 }
6042 ZFS_EXIT(zfsvfs);
6043 return (error);
6044 }
6045 ZFS_EXIT(zfsvfs);
6046
6047 covered_vp = vp->v_mount->mnt_vnodecovered;
6048 #if __FreeBSD_version >= 1300045
6049 enum vgetstate vs = vget_prep(covered_vp);
6050 #else
6051 vhold(covered_vp);
6052 #endif
6053 ltype = VOP_ISLOCKED(vp);
6054 VOP_UNLOCK1(vp);
6055 #if __FreeBSD_version >= 1300045
6056 error = vget_finish(covered_vp, LK_SHARED, vs);
6057 #else
6058 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
6059 #endif
6060 if (error == 0) {
6061 #if __FreeBSD_version >= 1300123
6062 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6063 ap->a_buflen);
6064 #else
6065 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
6066 ap->a_buf, ap->a_buflen);
6067 #endif
6068 vput(covered_vp);
6069 }
6070 vn_lock(vp, ltype | LK_RETRY);
6071 if (VN_IS_DOOMED(vp))
6072 error = SET_ERROR(ENOENT);
6073 return (error);
6074 }
6075
6076 struct vop_vector zfs_vnodeops;
6077 struct vop_vector zfs_fifoops;
6078 struct vop_vector zfs_shareops;
6079
6080 struct vop_vector zfs_vnodeops = {
6081 .vop_default = &default_vnodeops,
6082 .vop_inactive = zfs_freebsd_inactive,
6083 #if __FreeBSD_version >= 1300042
6084 .vop_need_inactive = zfs_freebsd_need_inactive,
6085 #endif
6086 .vop_reclaim = zfs_freebsd_reclaim,
6087 #if __FreeBSD_version >= 1300102
6088 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6089 #endif
6090 #if __FreeBSD_version >= 1300139
6091 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6092 #endif
6093 .vop_access = zfs_freebsd_access,
6094 .vop_allocate = VOP_EINVAL,
6095 .vop_lookup = zfs_cache_lookup,
6096 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6097 .vop_getattr = zfs_freebsd_getattr,
6098 .vop_setattr = zfs_freebsd_setattr,
6099 .vop_create = zfs_freebsd_create,
6100 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6101 .vop_mkdir = zfs_freebsd_mkdir,
6102 .vop_readdir = zfs_freebsd_readdir,
6103 .vop_fsync = zfs_freebsd_fsync,
6104 .vop_open = zfs_freebsd_open,
6105 .vop_close = zfs_freebsd_close,
6106 .vop_rmdir = zfs_freebsd_rmdir,
6107 .vop_ioctl = zfs_freebsd_ioctl,
6108 .vop_link = zfs_freebsd_link,
6109 .vop_symlink = zfs_freebsd_symlink,
6110 .vop_readlink = zfs_freebsd_readlink,
6111 .vop_read = zfs_freebsd_read,
6112 .vop_write = zfs_freebsd_write,
6113 .vop_remove = zfs_freebsd_remove,
6114 .vop_rename = zfs_freebsd_rename,
6115 .vop_pathconf = zfs_freebsd_pathconf,
6116 .vop_bmap = zfs_freebsd_bmap,
6117 .vop_fid = zfs_freebsd_fid,
6118 .vop_getextattr = zfs_getextattr,
6119 .vop_deleteextattr = zfs_deleteextattr,
6120 .vop_setextattr = zfs_setextattr,
6121 .vop_listextattr = zfs_listextattr,
6122 .vop_getacl = zfs_freebsd_getacl,
6123 .vop_setacl = zfs_freebsd_setacl,
6124 .vop_aclcheck = zfs_freebsd_aclcheck,
6125 .vop_getpages = zfs_freebsd_getpages,
6126 .vop_putpages = zfs_freebsd_putpages,
6127 .vop_vptocnp = zfs_vptocnp,
6128 #if __FreeBSD_version >= 1300064
6129 .vop_lock1 = vop_lock,
6130 .vop_unlock = vop_unlock,
6131 .vop_islocked = vop_islocked,
6132 #endif
6133 #if __FreeBSD_version >= 1400043
6134 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6135 #endif
6136 };
6137 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6138
6139 struct vop_vector zfs_fifoops = {
6140 .vop_default = &fifo_specops,
6141 .vop_fsync = zfs_freebsd_fsync,
6142 #if __FreeBSD_version >= 1300102
6143 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6144 #endif
6145 #if __FreeBSD_version >= 1300139
6146 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6147 #endif
6148 .vop_access = zfs_freebsd_access,
6149 .vop_getattr = zfs_freebsd_getattr,
6150 .vop_inactive = zfs_freebsd_inactive,
6151 .vop_read = VOP_PANIC,
6152 .vop_reclaim = zfs_freebsd_reclaim,
6153 .vop_setattr = zfs_freebsd_setattr,
6154 .vop_write = VOP_PANIC,
6155 .vop_pathconf = zfs_freebsd_pathconf,
6156 .vop_fid = zfs_freebsd_fid,
6157 .vop_getacl = zfs_freebsd_getacl,
6158 .vop_setacl = zfs_freebsd_setacl,
6159 .vop_aclcheck = zfs_freebsd_aclcheck,
6160 #if __FreeBSD_version >= 1400043
6161 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6162 #endif
6163 };
6164 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6165
6166 /*
6167 * special share hidden files vnode operations template
6168 */
6169 struct vop_vector zfs_shareops = {
6170 .vop_default = &default_vnodeops,
6171 #if __FreeBSD_version >= 1300121
6172 .vop_fplookup_vexec = VOP_EAGAIN,
6173 #endif
6174 #if __FreeBSD_version >= 1300139
6175 .vop_fplookup_symlink = VOP_EAGAIN,
6176 #endif
6177 .vop_access = zfs_freebsd_access,
6178 .vop_inactive = zfs_freebsd_inactive,
6179 .vop_reclaim = zfs_freebsd_reclaim,
6180 .vop_fid = zfs_freebsd_fid,
6181 .vop_pathconf = zfs_freebsd_pathconf,
6182 #if __FreeBSD_version >= 1400043
6183 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6184 #endif
6185 };
6186 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6187