1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <security/mac/mac_framework.h>
38 #include <sys/vfs.h>
39 #include <sys/endian.h>
40 #include <sys/vm.h>
41 #include <sys/vnode.h>
42 #if __FreeBSD_version >= 1300102
43 #include <sys/smr.h>
44 #endif
45 #include <sys/dirent.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <sys/kmem.h>
49 #include <sys/taskq.h>
50 #include <sys/uio.h>
51 #include <sys/atomic.h>
52 #include <sys/namei.h>
53 #include <sys/mman.h>
54 #include <sys/cmn_err.h>
55 #include <sys/kdb.h>
56 #include <sys/sysproto.h>
57 #include <sys/errno.h>
58 #include <sys/unistd.h>
59 #include <sys/zfs_dir.h>
60 #include <sys/zfs_ioctl.h>
61 #include <sys/fs/zfs.h>
62 #include <sys/dmu.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/spa.h>
65 #include <sys/txg.h>
66 #include <sys/dbuf.h>
67 #include <sys/zap.h>
68 #include <sys/sa.h>
69 #include <sys/policy.h>
70 #include <sys/sunddi.h>
71 #include <sys/filio.h>
72 #include <sys/sid.h>
73 #include <sys/zfs_ctldir.h>
74 #include <sys/zfs_fuid.h>
75 #include <sys/zfs_quota.h>
76 #include <sys/zfs_sa.h>
77 #include <sys/zfs_rlock.h>
78 #include <sys/bio.h>
79 #include <sys/buf.h>
80 #include <sys/sched.h>
81 #include <sys/acl.h>
82 #include <sys/vmmeter.h>
83 #include <vm/vm_param.h>
84 #include <sys/zil.h>
85 #include <sys/zfs_vnops.h>
86 #include <sys/module.h>
87 #include <sys/sysent.h>
88 #include <sys/dmu_impl.h>
89 #include <sys/brt.h>
90 #include <sys/zfeature.h>
91
92 #include <vm/vm_object.h>
93
94 #include <sys/extattr.h>
95 #include <sys/priv.h>
96
97 #ifndef VN_OPEN_INVFS
98 #define VN_OPEN_INVFS 0x0
99 #endif
100
101 VFS_SMR_DECLARE;
102
103 #if __FreeBSD_version < 1300103
104 #define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF)
105 #endif
106
107 #if __FreeBSD_version >= 1300047
108 #define vm_page_wire_lock(pp)
109 #define vm_page_wire_unlock(pp)
110 #else
111 #define vm_page_wire_lock(pp) vm_page_lock(pp)
112 #define vm_page_wire_unlock(pp) vm_page_unlock(pp)
113 #endif
114
115 #ifdef DEBUG_VFS_LOCKS
116 #define VNCHECKREF(vp) \
117 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
118 ("%s: wrong ref counts", __func__));
119 #else
120 #define VNCHECKREF(vp)
121 #endif
122
123 #if __FreeBSD_version >= 1400045
124 typedef uint64_t cookie_t;
125 #else
126 typedef ulong_t cookie_t;
127 #endif
128
129 /*
130 * Programming rules.
131 *
132 * Each vnode op performs some logical unit of work. To do this, the ZPL must
133 * properly lock its in-core state, create a DMU transaction, do the work,
134 * record this work in the intent log (ZIL), commit the DMU transaction,
135 * and wait for the intent log to commit if it is a synchronous operation.
136 * Moreover, the vnode ops must work in both normal and log replay context.
137 * The ordering of events is important to avoid deadlocks and references
138 * to freed memory. The example below illustrates the following Big Rules:
139 *
140 * (1) A check must be made in each zfs thread for a mounted file system.
141 * This is done avoiding races using zfs_enter(zfsvfs).
142 * A zfs_exit(zfsvfs) is needed before all returns. Any znodes
143 * must be checked with zfs_verify_zp(zp). Both of these macros
144 * can return EIO from the calling function.
145 *
146 * (2) VN_RELE() should always be the last thing except for zil_commit()
147 * (if necessary) and zfs_exit(). This is for 3 reasons:
148 * First, if it's the last reference, the vnode/znode
149 * can be freed, so the zp may point to freed memory. Second, the last
150 * reference will call zfs_zinactive(), which may induce a lot of work --
151 * pushing cached pages (which acquires range locks) and syncing out
152 * cached atime changes. Third, zfs_zinactive() may require a new tx,
153 * which could deadlock the system if you were already holding one.
154 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
155 *
156 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
157 * as they can span dmu_tx_assign() calls.
158 *
159 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
160 * dmu_tx_assign(). This is critical because we don't want to block
161 * while holding locks.
162 *
163 * If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT. This
164 * reduces lock contention and CPU usage when we must wait (note that if
165 * throughput is constrained by the storage, nearly every transaction
166 * must wait).
167 *
168 * Note, in particular, that if a lock is sometimes acquired before
169 * the tx assigns, and sometimes after (e.g. z_lock), then failing
170 * to use a non-blocking assign can deadlock the system. The scenario:
171 *
172 * Thread A has grabbed a lock before calling dmu_tx_assign().
173 * Thread B is in an already-assigned tx, and blocks for this lock.
174 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
175 * forever, because the previous txg can't quiesce until B's tx commits.
176 *
177 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
178 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
179 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
180 * to indicate that this operation has already called dmu_tx_wait().
181 * This will ensure that we don't retry forever, waiting a short bit
182 * each time.
183 *
184 * (5) If the operation succeeded, generate the intent log entry for it
185 * before dropping locks. This ensures that the ordering of events
186 * in the intent log matches the order in which they actually occurred.
187 * During ZIL replay the zfs_log_* functions will update the sequence
188 * number to indicate the zil transaction has replayed.
189 *
190 * (6) At the end of each vnode op, the DMU tx must always commit,
191 * regardless of whether there were any errors.
192 *
193 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
194 * to ensure that synchronous semantics are provided when necessary.
195 *
196 * In general, this is how things should be ordered in each vnode op:
197 *
198 * zfs_enter(zfsvfs); // exit if unmounted
199 * top:
200 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
201 * rw_enter(...); // grab any other locks you need
202 * tx = dmu_tx_create(...); // get DMU tx
203 * dmu_tx_hold_*(); // hold each object you might modify
204 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
205 * if (error) {
206 * rw_exit(...); // drop locks
207 * zfs_dirent_unlock(dl); // unlock directory entry
208 * VN_RELE(...); // release held vnodes
209 * if (error == ERESTART) {
210 * waited = B_TRUE;
211 * dmu_tx_wait(tx);
212 * dmu_tx_abort(tx);
213 * goto top;
214 * }
215 * dmu_tx_abort(tx); // abort DMU tx
216 * zfs_exit(zfsvfs); // finished in zfs
217 * return (error); // really out of space
218 * }
219 * error = do_real_work(); // do whatever this VOP does
220 * if (error == 0)
221 * zfs_log_*(...); // on success, make ZIL entry
222 * dmu_tx_commit(tx); // commit DMU tx -- error or not
223 * rw_exit(...); // drop locks
224 * zfs_dirent_unlock(dl); // unlock directory entry
225 * VN_RELE(...); // release held vnodes
226 * zil_commit(zilog, foid); // synchronous when necessary
227 * zfs_exit(zfsvfs); // finished in zfs
228 * return (error); // done, report error
229 */
230 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)231 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
232 {
233 (void) cr;
234 znode_t *zp = VTOZ(*vpp);
235 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
236 int error;
237
238 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
239 return (error);
240
241 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
242 ((flag & FAPPEND) == 0)) {
243 zfs_exit(zfsvfs, FTAG);
244 return (SET_ERROR(EPERM));
245 }
246
247 /* Keep a count of the synchronous opens in the znode */
248 if (flag & O_SYNC)
249 atomic_inc_32(&zp->z_sync_cnt);
250
251 zfs_exit(zfsvfs, FTAG);
252 return (0);
253 }
254
255 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)256 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
257 {
258 (void) offset, (void) cr;
259 znode_t *zp = VTOZ(vp);
260 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
261 int error;
262
263 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
264 return (error);
265
266 /* Decrement the synchronous opens in the znode */
267 if ((flag & O_SYNC) && (count == 1))
268 atomic_dec_32(&zp->z_sync_cnt);
269
270 zfs_exit(zfsvfs, FTAG);
271 return (0);
272 }
273
274 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)275 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
276 int *rvalp)
277 {
278 (void) flag, (void) cred, (void) rvalp;
279 loff_t off;
280 int error;
281
282 switch (com) {
283 case _FIOFFS:
284 {
285 return (0);
286
287 /*
288 * The following two ioctls are used by bfu. Faking out,
289 * necessary to avoid bfu errors.
290 */
291 }
292 case _FIOGDIO:
293 case _FIOSDIO:
294 {
295 return (0);
296 }
297
298 case F_SEEK_DATA:
299 case F_SEEK_HOLE:
300 {
301 off = *(offset_t *)data;
302 /* offset parameter is in/out */
303 error = zfs_holey(VTOZ(vp), com, &off);
304 if (error)
305 return (error);
306 *(offset_t *)data = off;
307 return (0);
308 }
309 }
310 return (SET_ERROR(ENOTTY));
311 }
312
313 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)314 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
315 {
316 vm_object_t obj;
317 vm_page_t pp;
318 int64_t end;
319
320 /*
321 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
322 * aligned boundaries, if the range is not aligned. As a result a
323 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
324 * It may happen that all DEV_BSIZE subranges are marked clean and thus
325 * the whole page would be considered clean despite have some
326 * dirty data.
327 * For this reason we should shrink the range to DEV_BSIZE aligned
328 * boundaries before calling vm_page_clear_dirty.
329 */
330 end = rounddown2(off + nbytes, DEV_BSIZE);
331 off = roundup2(off, DEV_BSIZE);
332 nbytes = end - off;
333
334 obj = vp->v_object;
335 zfs_vmobject_assert_wlocked_12(obj);
336 #if __FreeBSD_version < 1300050
337 for (;;) {
338 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
339 pp->valid) {
340 if (vm_page_xbusied(pp)) {
341 /*
342 * Reference the page before unlocking and
343 * sleeping so that the page daemon is less
344 * likely to reclaim it.
345 */
346 vm_page_reference(pp);
347 vm_page_lock(pp);
348 zfs_vmobject_wunlock(obj);
349 vm_page_busy_sleep(pp, "zfsmwb", true);
350 zfs_vmobject_wlock(obj);
351 continue;
352 }
353 vm_page_sbusy(pp);
354 } else if (pp != NULL) {
355 ASSERT(!pp->valid);
356 pp = NULL;
357 }
358 if (pp != NULL) {
359 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
360 vm_object_pip_add(obj, 1);
361 pmap_remove_write(pp);
362 if (nbytes != 0)
363 vm_page_clear_dirty(pp, off, nbytes);
364 }
365 break;
366 }
367 #else
368 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
369 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
370 VM_ALLOC_IGN_SBUSY);
371 if (pp != NULL) {
372 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
373 vm_object_pip_add(obj, 1);
374 pmap_remove_write(pp);
375 if (nbytes != 0)
376 vm_page_clear_dirty(pp, off, nbytes);
377 }
378 #endif
379 return (pp);
380 }
381
382 static void
page_unbusy(vm_page_t pp)383 page_unbusy(vm_page_t pp)
384 {
385
386 vm_page_sunbusy(pp);
387 #if __FreeBSD_version >= 1300041
388 vm_object_pip_wakeup(pp->object);
389 #else
390 vm_object_pip_subtract(pp->object, 1);
391 #endif
392 }
393
394 #if __FreeBSD_version > 1300051
395 static vm_page_t
page_hold(vnode_t * vp,int64_t start)396 page_hold(vnode_t *vp, int64_t start)
397 {
398 vm_object_t obj;
399 vm_page_t m;
400
401 obj = vp->v_object;
402 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
403 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
404 VM_ALLOC_NOBUSY);
405 return (m);
406 }
407 #else
408 static vm_page_t
page_hold(vnode_t * vp,int64_t start)409 page_hold(vnode_t *vp, int64_t start)
410 {
411 vm_object_t obj;
412 vm_page_t pp;
413
414 obj = vp->v_object;
415 zfs_vmobject_assert_wlocked(obj);
416
417 for (;;) {
418 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
419 pp->valid) {
420 if (vm_page_xbusied(pp)) {
421 /*
422 * Reference the page before unlocking and
423 * sleeping so that the page daemon is less
424 * likely to reclaim it.
425 */
426 vm_page_reference(pp);
427 vm_page_lock(pp);
428 zfs_vmobject_wunlock(obj);
429 vm_page_busy_sleep(pp, "zfsmwb", true);
430 zfs_vmobject_wlock(obj);
431 continue;
432 }
433
434 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
435 vm_page_wire_lock(pp);
436 vm_page_hold(pp);
437 vm_page_wire_unlock(pp);
438
439 } else
440 pp = NULL;
441 break;
442 }
443 return (pp);
444 }
445 #endif
446
447 static void
page_unhold(vm_page_t pp)448 page_unhold(vm_page_t pp)
449 {
450
451 vm_page_wire_lock(pp);
452 #if __FreeBSD_version >= 1300035
453 vm_page_unwire(pp, PQ_ACTIVE);
454 #else
455 vm_page_unhold(pp);
456 #endif
457 vm_page_wire_unlock(pp);
458 }
459
460 /*
461 * When a file is memory mapped, we must keep the IO data synchronized
462 * between the DMU cache and the memory mapped pages. What this means:
463 *
464 * On Write: If we find a memory mapped page, we write to *both*
465 * the page and the dmu buffer.
466 */
467 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)468 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
469 {
470 vm_object_t obj;
471 struct sf_buf *sf;
472 vnode_t *vp = ZTOV(zp);
473 caddr_t va;
474 int off;
475
476 ASSERT3P(vp->v_mount, !=, NULL);
477 obj = vp->v_object;
478 ASSERT3P(obj, !=, NULL);
479
480 off = start & PAGEOFFSET;
481 zfs_vmobject_wlock_12(obj);
482 #if __FreeBSD_version >= 1300041
483 vm_object_pip_add(obj, 1);
484 #endif
485 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
486 vm_page_t pp;
487 int nbytes = imin(PAGESIZE - off, len);
488
489 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
490 zfs_vmobject_wunlock_12(obj);
491
492 va = zfs_map_page(pp, &sf);
493 (void) dmu_read(os, zp->z_id, start + off, nbytes,
494 va + off, DMU_READ_PREFETCH);
495 zfs_unmap_page(sf);
496
497 zfs_vmobject_wlock_12(obj);
498 page_unbusy(pp);
499 }
500 len -= nbytes;
501 off = 0;
502 }
503 #if __FreeBSD_version >= 1300041
504 vm_object_pip_wakeup(obj);
505 #else
506 vm_object_pip_wakeupn(obj, 0);
507 #endif
508 zfs_vmobject_wunlock_12(obj);
509 }
510
511 /*
512 * Read with UIO_NOCOPY flag means that sendfile(2) requests
513 * ZFS to populate a range of page cache pages with data.
514 *
515 * NOTE: this function could be optimized to pre-allocate
516 * all pages in advance, drain exclusive busy on all of them,
517 * map them into contiguous KVA region and populate them
518 * in one single dmu_read() call.
519 */
520 int
mappedread_sf(znode_t * zp,int nbytes,zfs_uio_t * uio)521 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
522 {
523 vnode_t *vp = ZTOV(zp);
524 objset_t *os = zp->z_zfsvfs->z_os;
525 struct sf_buf *sf;
526 vm_object_t obj;
527 vm_page_t pp;
528 int64_t start;
529 caddr_t va;
530 int len = nbytes;
531 int error = 0;
532
533 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
534 ASSERT3P(vp->v_mount, !=, NULL);
535 obj = vp->v_object;
536 ASSERT3P(obj, !=, NULL);
537 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
538
539 zfs_vmobject_wlock_12(obj);
540 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
541 int bytes = MIN(PAGESIZE, len);
542
543 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
544 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
545 if (vm_page_none_valid(pp)) {
546 zfs_vmobject_wunlock_12(obj);
547 va = zfs_map_page(pp, &sf);
548 error = dmu_read(os, zp->z_id, start, bytes, va,
549 DMU_READ_PREFETCH);
550 if (bytes != PAGESIZE && error == 0)
551 memset(va + bytes, 0, PAGESIZE - bytes);
552 zfs_unmap_page(sf);
553 zfs_vmobject_wlock_12(obj);
554 #if __FreeBSD_version >= 1300081
555 if (error == 0) {
556 vm_page_valid(pp);
557 vm_page_activate(pp);
558 vm_page_do_sunbusy(pp);
559 } else {
560 zfs_vmobject_wlock(obj);
561 if (!vm_page_wired(pp) && pp->valid == 0 &&
562 vm_page_busy_tryupgrade(pp))
563 vm_page_free(pp);
564 else
565 vm_page_sunbusy(pp);
566 zfs_vmobject_wunlock(obj);
567 }
568 #else
569 vm_page_do_sunbusy(pp);
570 vm_page_lock(pp);
571 if (error) {
572 if (pp->wire_count == 0 && pp->valid == 0 &&
573 !vm_page_busied(pp))
574 vm_page_free(pp);
575 } else {
576 pp->valid = VM_PAGE_BITS_ALL;
577 vm_page_activate(pp);
578 }
579 vm_page_unlock(pp);
580 #endif
581 } else {
582 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
583 vm_page_do_sunbusy(pp);
584 }
585 if (error)
586 break;
587 zfs_uio_advance(uio, bytes);
588 len -= bytes;
589 }
590 zfs_vmobject_wunlock_12(obj);
591 return (error);
592 }
593
594 /*
595 * When a file is memory mapped, we must keep the IO data synchronized
596 * between the DMU cache and the memory mapped pages. What this means:
597 *
598 * On Read: We "read" preferentially from memory mapped pages,
599 * else we default from the dmu buffer.
600 *
601 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
602 * the file is memory mapped.
603 */
604 int
mappedread(znode_t * zp,int nbytes,zfs_uio_t * uio)605 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
606 {
607 vnode_t *vp = ZTOV(zp);
608 vm_object_t obj;
609 int64_t start;
610 int len = nbytes;
611 int off;
612 int error = 0;
613
614 ASSERT3P(vp->v_mount, !=, NULL);
615 obj = vp->v_object;
616 ASSERT3P(obj, !=, NULL);
617
618 start = zfs_uio_offset(uio);
619 off = start & PAGEOFFSET;
620 zfs_vmobject_wlock_12(obj);
621 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
622 vm_page_t pp;
623 uint64_t bytes = MIN(PAGESIZE - off, len);
624
625 if ((pp = page_hold(vp, start))) {
626 struct sf_buf *sf;
627 caddr_t va;
628
629 zfs_vmobject_wunlock_12(obj);
630 va = zfs_map_page(pp, &sf);
631 error = vn_io_fault_uiomove(va + off, bytes,
632 GET_UIO_STRUCT(uio));
633 zfs_unmap_page(sf);
634 zfs_vmobject_wlock_12(obj);
635 page_unhold(pp);
636 } else {
637 zfs_vmobject_wunlock_12(obj);
638 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
639 uio, bytes);
640 zfs_vmobject_wlock_12(obj);
641 }
642 len -= bytes;
643 off = 0;
644 if (error)
645 break;
646 }
647 zfs_vmobject_wunlock_12(obj);
648 return (error);
649 }
650
651 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)652 zfs_write_simple(znode_t *zp, const void *data, size_t len,
653 loff_t pos, size_t *presid)
654 {
655 int error = 0;
656 ssize_t resid;
657
658 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
659 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
660
661 if (error) {
662 return (SET_ERROR(error));
663 } else if (presid == NULL) {
664 if (resid != 0) {
665 error = SET_ERROR(EIO);
666 }
667 } else {
668 *presid = resid;
669 }
670 return (error);
671 }
672
673 void
zfs_zrele_async(znode_t * zp)674 zfs_zrele_async(znode_t *zp)
675 {
676 vnode_t *vp = ZTOV(zp);
677 objset_t *os = ITOZSB(vp)->z_os;
678
679 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
680 }
681
682 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)683 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
684 {
685 int error;
686
687 *vpp = arg;
688 error = vn_lock(*vpp, lkflags);
689 if (error != 0)
690 vrele(*vpp);
691 return (error);
692 }
693
694 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)695 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
696 {
697 znode_t *zdp = VTOZ(dvp);
698 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
699 int error;
700 int ltype;
701
702 if (zfsvfs->z_replay == B_FALSE)
703 ASSERT_VOP_LOCKED(dvp, __func__);
704
705 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
706 ASSERT3P(dvp, ==, vp);
707 vref(dvp);
708 ltype = lkflags & LK_TYPE_MASK;
709 if (ltype != VOP_ISLOCKED(dvp)) {
710 if (ltype == LK_EXCLUSIVE)
711 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
712 else /* if (ltype == LK_SHARED) */
713 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
714
715 /*
716 * Relock for the "." case could leave us with
717 * reclaimed vnode.
718 */
719 if (VN_IS_DOOMED(dvp)) {
720 vrele(dvp);
721 return (SET_ERROR(ENOENT));
722 }
723 }
724 return (0);
725 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
726 /*
727 * Note that in this case, dvp is the child vnode, and we
728 * are looking up the parent vnode - exactly reverse from
729 * normal operation. Unlocking dvp requires some rather
730 * tricky unlock/relock dance to prevent mp from being freed;
731 * use vn_vget_ino_gen() which takes care of all that.
732 *
733 * XXX Note that there is a time window when both vnodes are
734 * unlocked. It is possible, although highly unlikely, that
735 * during that window the parent-child relationship between
736 * the vnodes may change, for example, get reversed.
737 * In that case we would have a wrong lock order for the vnodes.
738 * All other filesystems seem to ignore this problem, so we
739 * do the same here.
740 * A potential solution could be implemented as follows:
741 * - using LK_NOWAIT when locking the second vnode and retrying
742 * if necessary
743 * - checking that the parent-child relationship still holds
744 * after locking both vnodes and retrying if it doesn't
745 */
746 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
747 return (error);
748 } else {
749 error = vn_lock(vp, lkflags);
750 if (error != 0)
751 vrele(vp);
752 return (error);
753 }
754 }
755
756 /*
757 * Lookup an entry in a directory, or an extended attribute directory.
758 * If it exists, return a held vnode reference for it.
759 *
760 * IN: dvp - vnode of directory to search.
761 * nm - name of entry to lookup.
762 * pnp - full pathname to lookup [UNUSED].
763 * flags - LOOKUP_XATTR set if looking for an attribute.
764 * rdir - root directory vnode [UNUSED].
765 * cr - credentials of caller.
766 * ct - caller context
767 *
768 * OUT: vpp - vnode of located entry, NULL if not found.
769 *
770 * RETURN: 0 on success, error code on failure.
771 *
772 * Timestamps:
773 * NA
774 */
775 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,int flags,boolean_t cached)776 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
777 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
778 boolean_t cached)
779 {
780 znode_t *zdp = VTOZ(dvp);
781 znode_t *zp;
782 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
783 #if __FreeBSD_version > 1300124
784 seqc_t dvp_seqc;
785 #endif
786 int error = 0;
787
788 /*
789 * Fast path lookup, however we must skip DNLC lookup
790 * for case folding or normalizing lookups because the
791 * DNLC code only stores the passed in name. This means
792 * creating 'a' and removing 'A' on a case insensitive
793 * file system would work, but DNLC still thinks 'a'
794 * exists and won't let you create it again on the next
795 * pass through fast path.
796 */
797 if (!(flags & LOOKUP_XATTR)) {
798 if (dvp->v_type != VDIR) {
799 return (SET_ERROR(ENOTDIR));
800 } else if (zdp->z_sa_hdl == NULL) {
801 return (SET_ERROR(EIO));
802 }
803 }
804
805 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
806 const char *, nm);
807
808 if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
809 return (error);
810
811 #if __FreeBSD_version > 1300124
812 dvp_seqc = vn_seqc_read_notmodify(dvp);
813 #endif
814
815 *vpp = NULL;
816
817 if (flags & LOOKUP_XATTR) {
818 /*
819 * If the xattr property is off, refuse the lookup request.
820 */
821 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
822 zfs_exit(zfsvfs, FTAG);
823 return (SET_ERROR(EOPNOTSUPP));
824 }
825
826 /*
827 * We don't allow recursive attributes..
828 * Maybe someday we will.
829 */
830 if (zdp->z_pflags & ZFS_XATTR) {
831 zfs_exit(zfsvfs, FTAG);
832 return (SET_ERROR(EINVAL));
833 }
834
835 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
836 zfs_exit(zfsvfs, FTAG);
837 return (error);
838 }
839 *vpp = ZTOV(zp);
840
841 /*
842 * Do we have permission to get into attribute directory?
843 */
844 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
845 if (error) {
846 vrele(ZTOV(zp));
847 }
848
849 zfs_exit(zfsvfs, FTAG);
850 return (error);
851 }
852
853 /*
854 * Check accessibility of directory if we're not coming in via
855 * VOP_CACHEDLOOKUP.
856 */
857 if (!cached) {
858 #ifdef NOEXECCHECK
859 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
860 cnp->cn_flags &= ~NOEXECCHECK;
861 } else
862 #endif
863 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
864 NULL))) {
865 zfs_exit(zfsvfs, FTAG);
866 return (error);
867 }
868 }
869
870 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
871 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
872 zfs_exit(zfsvfs, FTAG);
873 return (SET_ERROR(EILSEQ));
874 }
875
876
877 /*
878 * First handle the special cases.
879 */
880 if ((cnp->cn_flags & ISDOTDOT) != 0) {
881 /*
882 * If we are a snapshot mounted under .zfs, return
883 * the vp for the snapshot directory.
884 */
885 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
886 struct componentname cn;
887 vnode_t *zfsctl_vp;
888 int ltype;
889
890 zfs_exit(zfsvfs, FTAG);
891 ltype = VOP_ISLOCKED(dvp);
892 VOP_UNLOCK1(dvp);
893 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
894 &zfsctl_vp);
895 if (error == 0) {
896 cn.cn_nameptr = "snapshot";
897 cn.cn_namelen = strlen(cn.cn_nameptr);
898 cn.cn_nameiop = cnp->cn_nameiop;
899 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
900 cn.cn_lkflags = cnp->cn_lkflags;
901 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
902 vput(zfsctl_vp);
903 }
904 vn_lock(dvp, ltype | LK_RETRY);
905 return (error);
906 }
907 }
908 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
909 zfs_exit(zfsvfs, FTAG);
910 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
911 return (SET_ERROR(ENOTSUP));
912 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
913 return (error);
914 }
915
916 /*
917 * The loop is retry the lookup if the parent-child relationship
918 * changes during the dot-dot locking complexities.
919 */
920 for (;;) {
921 uint64_t parent;
922
923 error = zfs_dirlook(zdp, nm, &zp);
924 if (error == 0)
925 *vpp = ZTOV(zp);
926
927 zfs_exit(zfsvfs, FTAG);
928 if (error != 0)
929 break;
930
931 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
932 if (error != 0) {
933 /*
934 * If we've got a locking error, then the vnode
935 * got reclaimed because of a force unmount.
936 * We never enter doomed vnodes into the name cache.
937 */
938 *vpp = NULL;
939 return (error);
940 }
941
942 if ((cnp->cn_flags & ISDOTDOT) == 0)
943 break;
944
945 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
946 vput(ZTOV(zp));
947 *vpp = NULL;
948 return (error);
949 }
950 if (zdp->z_sa_hdl == NULL) {
951 error = SET_ERROR(EIO);
952 } else {
953 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
954 &parent, sizeof (parent));
955 }
956 if (error != 0) {
957 zfs_exit(zfsvfs, FTAG);
958 vput(ZTOV(zp));
959 break;
960 }
961 if (zp->z_id == parent) {
962 zfs_exit(zfsvfs, FTAG);
963 break;
964 }
965 vput(ZTOV(zp));
966 }
967
968 if (error != 0)
969 *vpp = NULL;
970
971 /* Translate errors and add SAVENAME when needed. */
972 if (cnp->cn_flags & ISLASTCN) {
973 switch (nameiop) {
974 case CREATE:
975 case RENAME:
976 if (error == ENOENT) {
977 error = EJUSTRETURN;
978 #if __FreeBSD_version < 1400068
979 cnp->cn_flags |= SAVENAME;
980 #endif
981 break;
982 }
983 zfs_fallthrough;
984 case DELETE:
985 #if __FreeBSD_version < 1400068
986 if (error == 0)
987 cnp->cn_flags |= SAVENAME;
988 #endif
989 break;
990 }
991 }
992
993 #if __FreeBSD_version > 1300124
994 if ((cnp->cn_flags & ISDOTDOT) != 0) {
995 /*
996 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
997 * handle races. In particular different callers may end up
998 * with different vnodes and will try to add conflicting
999 * entries to the namecache.
1000 *
1001 * While finding different result may be acceptable in face
1002 * of concurrent modification, adding conflicting entries
1003 * trips over an assert in the namecache.
1004 *
1005 * Ultimately let an entry through once everything settles.
1006 */
1007 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
1008 cnp->cn_flags &= ~MAKEENTRY;
1009 }
1010 }
1011 #endif
1012
1013 /* Insert name into cache (as non-existent) if appropriate. */
1014 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1015 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
1016 cache_enter(dvp, NULL, cnp);
1017
1018 /* Insert name into cache if appropriate. */
1019 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1020 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
1021 if (!(cnp->cn_flags & ISLASTCN) ||
1022 (nameiop != DELETE && nameiop != RENAME)) {
1023 cache_enter(dvp, *vpp, cnp);
1024 }
1025 }
1026
1027 return (error);
1028 }
1029
1030 /*
1031 * Attempt to create a new entry in a directory. If the entry
1032 * already exists, truncate the file if permissible, else return
1033 * an error. Return the vp of the created or trunc'd file.
1034 *
1035 * IN: dvp - vnode of directory to put new file entry in.
1036 * name - name of new file entry.
1037 * vap - attributes of new file.
1038 * excl - flag indicating exclusive or non-exclusive mode.
1039 * mode - mode to open file with.
1040 * cr - credentials of caller.
1041 * flag - large file flag [UNUSED].
1042 * ct - caller context
1043 * vsecp - ACL to be set
1044 * mnt_ns - Unused on FreeBSD
1045 *
1046 * OUT: vpp - vnode of created or trunc'd entry.
1047 *
1048 * RETURN: 0 on success, error code on failure.
1049 *
1050 * Timestamps:
1051 * dvp - ctime|mtime updated if new entry created
1052 * vp - ctime|mtime always, atime if new
1053 */
1054 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp,zidmap_t * mnt_ns)1055 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
1056 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1057 {
1058 (void) excl, (void) mode, (void) flag;
1059 znode_t *zp;
1060 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1061 zilog_t *zilog;
1062 objset_t *os;
1063 dmu_tx_t *tx;
1064 int error;
1065 uid_t uid = crgetuid(cr);
1066 gid_t gid = crgetgid(cr);
1067 uint64_t projid = ZFS_DEFAULT_PROJID;
1068 zfs_acl_ids_t acl_ids;
1069 boolean_t fuid_dirtied;
1070 uint64_t txtype;
1071 #ifdef DEBUG_VFS_LOCKS
1072 vnode_t *dvp = ZTOV(dzp);
1073 #endif
1074
1075 /*
1076 * If we have an ephemeral id, ACL, or XVATTR then
1077 * make sure file system is at proper version
1078 */
1079 if (zfsvfs->z_use_fuids == B_FALSE &&
1080 (vsecp || (vap->va_mask & AT_XVATTR) ||
1081 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1082 return (SET_ERROR(EINVAL));
1083
1084 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1085 return (error);
1086 os = zfsvfs->z_os;
1087 zilog = zfsvfs->z_log;
1088
1089 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1090 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1091 zfs_exit(zfsvfs, FTAG);
1092 return (SET_ERROR(EILSEQ));
1093 }
1094
1095 if (vap->va_mask & AT_XVATTR) {
1096 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1097 crgetuid(cr), cr, vap->va_type)) != 0) {
1098 zfs_exit(zfsvfs, FTAG);
1099 return (error);
1100 }
1101 }
1102
1103 *zpp = NULL;
1104
1105 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1106 vap->va_mode &= ~S_ISVTX;
1107
1108 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1109 if (error) {
1110 zfs_exit(zfsvfs, FTAG);
1111 return (error);
1112 }
1113 ASSERT3P(zp, ==, NULL);
1114
1115 /*
1116 * Create a new file object and update the directory
1117 * to reference it.
1118 */
1119 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
1120 goto out;
1121 }
1122
1123 /*
1124 * We only support the creation of regular files in
1125 * extended attribute directories.
1126 */
1127
1128 if ((dzp->z_pflags & ZFS_XATTR) &&
1129 (vap->va_type != VREG)) {
1130 error = SET_ERROR(EINVAL);
1131 goto out;
1132 }
1133
1134 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1135 cr, vsecp, &acl_ids, NULL)) != 0)
1136 goto out;
1137
1138 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1139 projid = zfs_inherit_projid(dzp);
1140 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1141 zfs_acl_ids_free(&acl_ids);
1142 error = SET_ERROR(EDQUOT);
1143 goto out;
1144 }
1145
1146 getnewvnode_reserve_();
1147
1148 tx = dmu_tx_create(os);
1149
1150 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1151 ZFS_SA_BASE_ATTR_SIZE);
1152
1153 fuid_dirtied = zfsvfs->z_fuid_dirty;
1154 if (fuid_dirtied)
1155 zfs_fuid_txhold(zfsvfs, tx);
1156 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1157 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1158 if (!zfsvfs->z_use_sa &&
1159 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1160 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1161 0, acl_ids.z_aclp->z_acl_bytes);
1162 }
1163 error = dmu_tx_assign(tx, TXG_WAIT);
1164 if (error) {
1165 zfs_acl_ids_free(&acl_ids);
1166 dmu_tx_abort(tx);
1167 getnewvnode_drop_reserve();
1168 zfs_exit(zfsvfs, FTAG);
1169 return (error);
1170 }
1171 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1172
1173 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1174 if (error != 0) {
1175 /*
1176 * Since, we failed to add the directory entry for it,
1177 * delete the newly created dnode.
1178 */
1179 zfs_znode_delete(zp, tx);
1180 VOP_UNLOCK1(ZTOV(zp));
1181 zrele(zp);
1182 zfs_acl_ids_free(&acl_ids);
1183 dmu_tx_commit(tx);
1184 getnewvnode_drop_reserve();
1185 goto out;
1186 }
1187
1188 if (fuid_dirtied)
1189 zfs_fuid_sync(zfsvfs, tx);
1190
1191 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1192 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1193 vsecp, acl_ids.z_fuidp, vap);
1194 zfs_acl_ids_free(&acl_ids);
1195 dmu_tx_commit(tx);
1196
1197 getnewvnode_drop_reserve();
1198
1199 out:
1200 VNCHECKREF(dvp);
1201 if (error == 0) {
1202 *zpp = zp;
1203 }
1204
1205 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1206 zil_commit(zilog, 0);
1207
1208 zfs_exit(zfsvfs, FTAG);
1209 return (error);
1210 }
1211
1212 /*
1213 * Remove an entry from a directory.
1214 *
1215 * IN: dvp - vnode of directory to remove entry from.
1216 * name - name of entry to remove.
1217 * cr - credentials of caller.
1218 * ct - caller context
1219 * flags - case flags
1220 *
1221 * RETURN: 0 on success, error code on failure.
1222 *
1223 * Timestamps:
1224 * dvp - ctime|mtime
1225 * vp - ctime (if nlink > 0)
1226 */
1227 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1228 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1229 {
1230 znode_t *dzp = VTOZ(dvp);
1231 znode_t *zp;
1232 znode_t *xzp;
1233 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1234 zilog_t *zilog;
1235 uint64_t xattr_obj;
1236 uint64_t obj = 0;
1237 dmu_tx_t *tx;
1238 boolean_t unlinked;
1239 uint64_t txtype;
1240 int error;
1241
1242
1243 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1244 return (error);
1245 zp = VTOZ(vp);
1246 if ((error = zfs_verify_zp(zp)) != 0) {
1247 zfs_exit(zfsvfs, FTAG);
1248 return (error);
1249 }
1250 zilog = zfsvfs->z_log;
1251
1252 xattr_obj = 0;
1253 xzp = NULL;
1254
1255 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1256 goto out;
1257 }
1258
1259 /*
1260 * Need to use rmdir for removing directories.
1261 */
1262 if (vp->v_type == VDIR) {
1263 error = SET_ERROR(EPERM);
1264 goto out;
1265 }
1266
1267 vnevent_remove(vp, dvp, name, ct);
1268
1269 obj = zp->z_id;
1270
1271 /* are there any extended attributes? */
1272 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1273 &xattr_obj, sizeof (xattr_obj));
1274 if (error == 0 && xattr_obj) {
1275 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1276 ASSERT0(error);
1277 }
1278
1279 /*
1280 * We may delete the znode now, or we may put it in the unlinked set;
1281 * it depends on whether we're the last link, and on whether there are
1282 * other holds on the vnode. So we dmu_tx_hold() the right things to
1283 * allow for either case.
1284 */
1285 tx = dmu_tx_create(zfsvfs->z_os);
1286 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1287 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1288 zfs_sa_upgrade_txholds(tx, zp);
1289 zfs_sa_upgrade_txholds(tx, dzp);
1290
1291 if (xzp) {
1292 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1293 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1294 }
1295
1296 /* charge as an update -- would be nice not to charge at all */
1297 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1298
1299 /*
1300 * Mark this transaction as typically resulting in a net free of space
1301 */
1302 dmu_tx_mark_netfree(tx);
1303
1304 error = dmu_tx_assign(tx, TXG_WAIT);
1305 if (error) {
1306 dmu_tx_abort(tx);
1307 zfs_exit(zfsvfs, FTAG);
1308 return (error);
1309 }
1310
1311 /*
1312 * Remove the directory entry.
1313 */
1314 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1315
1316 if (error) {
1317 dmu_tx_commit(tx);
1318 goto out;
1319 }
1320
1321 if (unlinked) {
1322 zfs_unlinked_add(zp, tx);
1323 vp->v_vflag |= VV_NOSYNC;
1324 }
1325 /* XXX check changes to linux vnops */
1326 txtype = TX_REMOVE;
1327 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1328
1329 dmu_tx_commit(tx);
1330 out:
1331
1332 if (xzp)
1333 vrele(ZTOV(xzp));
1334
1335 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1336 zil_commit(zilog, 0);
1337
1338
1339 zfs_exit(zfsvfs, FTAG);
1340 return (error);
1341 }
1342
1343
1344 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1345 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1346 struct componentname *cnp, int nameiop)
1347 {
1348 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1349 int error;
1350
1351 cnp->cn_nameptr = __DECONST(char *, name);
1352 cnp->cn_namelen = strlen(name);
1353 cnp->cn_nameiop = nameiop;
1354 cnp->cn_flags = ISLASTCN;
1355 #if __FreeBSD_version < 1400068
1356 cnp->cn_flags |= SAVENAME;
1357 #endif
1358 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1359 cnp->cn_cred = kcred;
1360 #if __FreeBSD_version < 1400037
1361 cnp->cn_thread = curthread;
1362 #endif
1363
1364 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1365 struct vop_lookup_args a;
1366
1367 a.a_gen.a_desc = &vop_lookup_desc;
1368 a.a_dvp = ZTOV(dzp);
1369 a.a_vpp = vpp;
1370 a.a_cnp = cnp;
1371 error = vfs_cache_lookup(&a);
1372 } else {
1373 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1374 B_FALSE);
1375 }
1376 #ifdef ZFS_DEBUG
1377 if (error) {
1378 printf("got error %d on name %s on op %d\n", error, name,
1379 nameiop);
1380 kdb_backtrace();
1381 }
1382 #endif
1383 return (error);
1384 }
1385
1386 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1387 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1388 {
1389 vnode_t *vp;
1390 int error;
1391 struct componentname cn;
1392
1393 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1394 return (error);
1395
1396 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1397 vput(vp);
1398 return (error);
1399 }
1400 /*
1401 * Create a new directory and insert it into dvp using the name
1402 * provided. Return a pointer to the inserted directory.
1403 *
1404 * IN: dvp - vnode of directory to add subdir to.
1405 * dirname - name of new directory.
1406 * vap - attributes of new directory.
1407 * cr - credentials of caller.
1408 * ct - caller context
1409 * flags - case flags
1410 * vsecp - ACL to be set
1411 * mnt_ns - Unused on FreeBSD
1412 *
1413 * OUT: vpp - vnode of created directory.
1414 *
1415 * RETURN: 0 on success, error code on failure.
1416 *
1417 * Timestamps:
1418 * dvp - ctime|mtime updated
1419 * vp - ctime|mtime|atime updated
1420 */
1421 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp,zidmap_t * mnt_ns)1422 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1423 cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1424 {
1425 (void) flags, (void) vsecp;
1426 znode_t *zp;
1427 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1428 zilog_t *zilog;
1429 uint64_t txtype;
1430 dmu_tx_t *tx;
1431 int error;
1432 uid_t uid = crgetuid(cr);
1433 gid_t gid = crgetgid(cr);
1434 zfs_acl_ids_t acl_ids;
1435 boolean_t fuid_dirtied;
1436
1437 ASSERT3U(vap->va_type, ==, VDIR);
1438
1439 /*
1440 * If we have an ephemeral id, ACL, or XVATTR then
1441 * make sure file system is at proper version
1442 */
1443 if (zfsvfs->z_use_fuids == B_FALSE &&
1444 ((vap->va_mask & AT_XVATTR) ||
1445 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1446 return (SET_ERROR(EINVAL));
1447
1448 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1449 return (error);
1450 zilog = zfsvfs->z_log;
1451
1452 if (dzp->z_pflags & ZFS_XATTR) {
1453 zfs_exit(zfsvfs, FTAG);
1454 return (SET_ERROR(EINVAL));
1455 }
1456
1457 if (zfsvfs->z_utf8 && u8_validate(dirname,
1458 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1459 zfs_exit(zfsvfs, FTAG);
1460 return (SET_ERROR(EILSEQ));
1461 }
1462
1463 if (vap->va_mask & AT_XVATTR) {
1464 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1465 crgetuid(cr), cr, vap->va_type)) != 0) {
1466 zfs_exit(zfsvfs, FTAG);
1467 return (error);
1468 }
1469 }
1470
1471 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1472 NULL, &acl_ids, NULL)) != 0) {
1473 zfs_exit(zfsvfs, FTAG);
1474 return (error);
1475 }
1476
1477 /*
1478 * First make sure the new directory doesn't exist.
1479 *
1480 * Existence is checked first to make sure we don't return
1481 * EACCES instead of EEXIST which can cause some applications
1482 * to fail.
1483 */
1484 *zpp = NULL;
1485
1486 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1487 zfs_acl_ids_free(&acl_ids);
1488 zfs_exit(zfsvfs, FTAG);
1489 return (error);
1490 }
1491 ASSERT3P(zp, ==, NULL);
1492
1493 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1494 mnt_ns))) {
1495 zfs_acl_ids_free(&acl_ids);
1496 zfs_exit(zfsvfs, FTAG);
1497 return (error);
1498 }
1499
1500 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1501 zfs_acl_ids_free(&acl_ids);
1502 zfs_exit(zfsvfs, FTAG);
1503 return (SET_ERROR(EDQUOT));
1504 }
1505
1506 /*
1507 * Add a new entry to the directory.
1508 */
1509 getnewvnode_reserve_();
1510 tx = dmu_tx_create(zfsvfs->z_os);
1511 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1512 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1513 fuid_dirtied = zfsvfs->z_fuid_dirty;
1514 if (fuid_dirtied)
1515 zfs_fuid_txhold(zfsvfs, tx);
1516 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1517 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1518 acl_ids.z_aclp->z_acl_bytes);
1519 }
1520
1521 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1522 ZFS_SA_BASE_ATTR_SIZE);
1523
1524 error = dmu_tx_assign(tx, TXG_WAIT);
1525 if (error) {
1526 zfs_acl_ids_free(&acl_ids);
1527 dmu_tx_abort(tx);
1528 getnewvnode_drop_reserve();
1529 zfs_exit(zfsvfs, FTAG);
1530 return (error);
1531 }
1532
1533 /*
1534 * Create new node.
1535 */
1536 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1537
1538 /*
1539 * Now put new name in parent dir.
1540 */
1541 error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1542 if (error != 0) {
1543 zfs_znode_delete(zp, tx);
1544 VOP_UNLOCK1(ZTOV(zp));
1545 zrele(zp);
1546 goto out;
1547 }
1548
1549 if (fuid_dirtied)
1550 zfs_fuid_sync(zfsvfs, tx);
1551
1552 *zpp = zp;
1553
1554 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1555 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1556 acl_ids.z_fuidp, vap);
1557
1558 out:
1559 zfs_acl_ids_free(&acl_ids);
1560
1561 dmu_tx_commit(tx);
1562
1563 getnewvnode_drop_reserve();
1564
1565 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1566 zil_commit(zilog, 0);
1567
1568 zfs_exit(zfsvfs, FTAG);
1569 return (error);
1570 }
1571
1572 #if __FreeBSD_version < 1300124
1573 static void
cache_vop_rmdir(struct vnode * dvp,struct vnode * vp)1574 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
1575 {
1576
1577 cache_purge(dvp);
1578 cache_purge(vp);
1579 }
1580 #endif
1581
1582 /*
1583 * Remove a directory subdir entry. If the current working
1584 * directory is the same as the subdir to be removed, the
1585 * remove will fail.
1586 *
1587 * IN: dvp - vnode of directory to remove from.
1588 * name - name of directory to be removed.
1589 * cwd - vnode of current working directory.
1590 * cr - credentials of caller.
1591 * ct - caller context
1592 * flags - case flags
1593 *
1594 * RETURN: 0 on success, error code on failure.
1595 *
1596 * Timestamps:
1597 * dvp - ctime|mtime updated
1598 */
1599 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1600 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1601 {
1602 znode_t *dzp = VTOZ(dvp);
1603 znode_t *zp = VTOZ(vp);
1604 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1605 zilog_t *zilog;
1606 dmu_tx_t *tx;
1607 int error;
1608
1609 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1610 return (error);
1611 if ((error = zfs_verify_zp(zp)) != 0) {
1612 zfs_exit(zfsvfs, FTAG);
1613 return (error);
1614 }
1615 zilog = zfsvfs->z_log;
1616
1617
1618 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1619 goto out;
1620 }
1621
1622 if (vp->v_type != VDIR) {
1623 error = SET_ERROR(ENOTDIR);
1624 goto out;
1625 }
1626
1627 vnevent_rmdir(vp, dvp, name, ct);
1628
1629 tx = dmu_tx_create(zfsvfs->z_os);
1630 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1631 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1632 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1633 zfs_sa_upgrade_txholds(tx, zp);
1634 zfs_sa_upgrade_txholds(tx, dzp);
1635 dmu_tx_mark_netfree(tx);
1636 error = dmu_tx_assign(tx, TXG_WAIT);
1637 if (error) {
1638 dmu_tx_abort(tx);
1639 zfs_exit(zfsvfs, FTAG);
1640 return (error);
1641 }
1642
1643 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1644
1645 if (error == 0) {
1646 uint64_t txtype = TX_RMDIR;
1647 zfs_log_remove(zilog, tx, txtype, dzp, name,
1648 ZFS_NO_OBJECT, B_FALSE);
1649 }
1650
1651 dmu_tx_commit(tx);
1652
1653 if (zfsvfs->z_use_namecache)
1654 cache_vop_rmdir(dvp, vp);
1655 out:
1656 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1657 zil_commit(zilog, 0);
1658
1659 zfs_exit(zfsvfs, FTAG);
1660 return (error);
1661 }
1662
1663 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1664 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1665 {
1666 struct componentname cn;
1667 vnode_t *vp;
1668 int error;
1669
1670 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1671 return (error);
1672
1673 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1674 vput(vp);
1675 return (error);
1676 }
1677
1678 /*
1679 * Read as many directory entries as will fit into the provided
1680 * buffer from the given directory cursor position (specified in
1681 * the uio structure).
1682 *
1683 * IN: vp - vnode of directory to read.
1684 * uio - structure supplying read location, range info,
1685 * and return buffer.
1686 * cr - credentials of caller.
1687 * ct - caller context
1688 *
1689 * OUT: uio - updated offset and range, buffer filled.
1690 * eofp - set to true if end-of-file detected.
1691 * ncookies- number of entries in cookies
1692 * cookies - offsets to directory entries
1693 *
1694 * RETURN: 0 on success, error code on failure.
1695 *
1696 * Timestamps:
1697 * vp - atime updated
1698 *
1699 * Note that the low 4 bits of the cookie returned by zap is always zero.
1700 * This allows us to use the low range for "special" directory entries:
1701 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1702 * we use the offset 2 for the '.zfs' directory.
1703 */
1704 static int
zfs_readdir(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,int * eofp,int * ncookies,cookie_t ** cookies)1705 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1706 int *ncookies, cookie_t **cookies)
1707 {
1708 znode_t *zp = VTOZ(vp);
1709 iovec_t *iovp;
1710 dirent64_t *odp;
1711 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1712 objset_t *os;
1713 caddr_t outbuf;
1714 size_t bufsize;
1715 zap_cursor_t zc;
1716 zap_attribute_t zap;
1717 uint_t bytes_wanted;
1718 uint64_t offset; /* must be unsigned; checks for < 1 */
1719 uint64_t parent;
1720 int local_eof;
1721 int outcount;
1722 int error;
1723 uint8_t prefetch;
1724 uint8_t type;
1725 int ncooks;
1726 cookie_t *cooks = NULL;
1727
1728 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1729 return (error);
1730
1731 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1732 &parent, sizeof (parent))) != 0) {
1733 zfs_exit(zfsvfs, FTAG);
1734 return (error);
1735 }
1736
1737 /*
1738 * If we are not given an eof variable,
1739 * use a local one.
1740 */
1741 if (eofp == NULL)
1742 eofp = &local_eof;
1743
1744 /*
1745 * Check for valid iov_len.
1746 */
1747 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1748 zfs_exit(zfsvfs, FTAG);
1749 return (SET_ERROR(EINVAL));
1750 }
1751
1752 /*
1753 * Quit if directory has been removed (posix)
1754 */
1755 if ((*eofp = zp->z_unlinked) != 0) {
1756 zfs_exit(zfsvfs, FTAG);
1757 return (0);
1758 }
1759
1760 error = 0;
1761 os = zfsvfs->z_os;
1762 offset = zfs_uio_offset(uio);
1763 prefetch = zp->z_zn_prefetch;
1764
1765 /*
1766 * Initialize the iterator cursor.
1767 */
1768 if (offset <= 3) {
1769 /*
1770 * Start iteration from the beginning of the directory.
1771 */
1772 zap_cursor_init(&zc, os, zp->z_id);
1773 } else {
1774 /*
1775 * The offset is a serialized cursor.
1776 */
1777 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1778 }
1779
1780 /*
1781 * Get space to change directory entries into fs independent format.
1782 */
1783 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1784 bytes_wanted = iovp->iov_len;
1785 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1786 bufsize = bytes_wanted;
1787 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1788 odp = (struct dirent64 *)outbuf;
1789 } else {
1790 bufsize = bytes_wanted;
1791 outbuf = NULL;
1792 odp = (struct dirent64 *)iovp->iov_base;
1793 }
1794
1795 if (ncookies != NULL) {
1796 /*
1797 * Minimum entry size is dirent size and 1 byte for a file name.
1798 */
1799 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1800 sizeof (((struct dirent *)NULL)->d_name) + 1);
1801 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1802 *cookies = cooks;
1803 *ncookies = ncooks;
1804 }
1805
1806 /*
1807 * Transform to file-system independent format
1808 */
1809 outcount = 0;
1810 while (outcount < bytes_wanted) {
1811 ino64_t objnum;
1812 ushort_t reclen;
1813 off64_t *next = NULL;
1814
1815 /*
1816 * Special case `.', `..', and `.zfs'.
1817 */
1818 if (offset == 0) {
1819 (void) strcpy(zap.za_name, ".");
1820 zap.za_normalization_conflict = 0;
1821 objnum = zp->z_id;
1822 type = DT_DIR;
1823 } else if (offset == 1) {
1824 (void) strcpy(zap.za_name, "..");
1825 zap.za_normalization_conflict = 0;
1826 objnum = parent;
1827 type = DT_DIR;
1828 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1829 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1830 zap.za_normalization_conflict = 0;
1831 objnum = ZFSCTL_INO_ROOT;
1832 type = DT_DIR;
1833 } else {
1834 /*
1835 * Grab next entry.
1836 */
1837 if ((error = zap_cursor_retrieve(&zc, &zap))) {
1838 if ((*eofp = (error == ENOENT)) != 0)
1839 break;
1840 else
1841 goto update;
1842 }
1843
1844 if (zap.za_integer_length != 8 ||
1845 zap.za_num_integers != 1) {
1846 cmn_err(CE_WARN, "zap_readdir: bad directory "
1847 "entry, obj = %lld, offset = %lld\n",
1848 (u_longlong_t)zp->z_id,
1849 (u_longlong_t)offset);
1850 error = SET_ERROR(ENXIO);
1851 goto update;
1852 }
1853
1854 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1855 /*
1856 * MacOS X can extract the object type here such as:
1857 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1858 */
1859 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1860 }
1861
1862 reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1863
1864 /*
1865 * Will this entry fit in the buffer?
1866 */
1867 if (outcount + reclen > bufsize) {
1868 /*
1869 * Did we manage to fit anything in the buffer?
1870 */
1871 if (!outcount) {
1872 error = SET_ERROR(EINVAL);
1873 goto update;
1874 }
1875 break;
1876 }
1877 /*
1878 * Add normal entry:
1879 */
1880 odp->d_ino = objnum;
1881 odp->d_reclen = reclen;
1882 odp->d_namlen = strlen(zap.za_name);
1883 /* NOTE: d_off is the offset for the *next* entry. */
1884 next = &odp->d_off;
1885 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1886 odp->d_type = type;
1887 dirent_terminate(odp);
1888 odp = (dirent64_t *)((intptr_t)odp + reclen);
1889
1890 outcount += reclen;
1891
1892 ASSERT3S(outcount, <=, bufsize);
1893
1894 if (prefetch)
1895 dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1896
1897 /*
1898 * Move to the next entry, fill in the previous offset.
1899 */
1900 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1901 zap_cursor_advance(&zc);
1902 offset = zap_cursor_serialize(&zc);
1903 } else {
1904 offset += 1;
1905 }
1906
1907 /* Fill the offset right after advancing the cursor. */
1908 if (next != NULL)
1909 *next = offset;
1910 if (cooks != NULL) {
1911 *cooks++ = offset;
1912 ncooks--;
1913 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1914 }
1915 }
1916 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1917
1918 /* Subtract unused cookies */
1919 if (ncookies != NULL)
1920 *ncookies -= ncooks;
1921
1922 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1923 iovp->iov_base += outcount;
1924 iovp->iov_len -= outcount;
1925 zfs_uio_resid(uio) -= outcount;
1926 } else if ((error =
1927 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1928 /*
1929 * Reset the pointer.
1930 */
1931 offset = zfs_uio_offset(uio);
1932 }
1933
1934 update:
1935 zap_cursor_fini(&zc);
1936 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1937 kmem_free(outbuf, bufsize);
1938
1939 if (error == ENOENT)
1940 error = 0;
1941
1942 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1943
1944 zfs_uio_setoffset(uio, offset);
1945 zfs_exit(zfsvfs, FTAG);
1946 if (error != 0 && cookies != NULL) {
1947 free(*cookies, M_TEMP);
1948 *cookies = NULL;
1949 *ncookies = 0;
1950 }
1951 return (error);
1952 }
1953
1954 /*
1955 * Get the requested file attributes and place them in the provided
1956 * vattr structure.
1957 *
1958 * IN: vp - vnode of file.
1959 * vap - va_mask identifies requested attributes.
1960 * If AT_XVATTR set, then optional attrs are requested
1961 * flags - ATTR_NOACLCHECK (CIFS server context)
1962 * cr - credentials of caller.
1963 *
1964 * OUT: vap - attribute values.
1965 *
1966 * RETURN: 0 (always succeeds).
1967 */
1968 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)1969 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1970 {
1971 znode_t *zp = VTOZ(vp);
1972 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1973 int error = 0;
1974 uint32_t blksize;
1975 u_longlong_t nblocks;
1976 uint64_t mtime[2], ctime[2], crtime[2], rdev;
1977 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
1978 xoptattr_t *xoap = NULL;
1979 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1980 sa_bulk_attr_t bulk[4];
1981 int count = 0;
1982
1983 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1984 return (error);
1985
1986 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1987
1988 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1989 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1990 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1991 if (vp->v_type == VBLK || vp->v_type == VCHR)
1992 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1993 &rdev, 8);
1994
1995 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1996 zfs_exit(zfsvfs, FTAG);
1997 return (error);
1998 }
1999
2000 /*
2001 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2002 * Also, if we are the owner don't bother, since owner should
2003 * always be allowed to read basic attributes of file.
2004 */
2005 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2006 (vap->va_uid != crgetuid(cr))) {
2007 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2008 skipaclchk, cr, NULL))) {
2009 zfs_exit(zfsvfs, FTAG);
2010 return (error);
2011 }
2012 }
2013
2014 /*
2015 * Return all attributes. It's cheaper to provide the answer
2016 * than to determine whether we were asked the question.
2017 */
2018
2019 vap->va_type = IFTOVT(zp->z_mode);
2020 vap->va_mode = zp->z_mode & ~S_IFMT;
2021 vn_fsid(vp, vap);
2022 vap->va_nodeid = zp->z_id;
2023 vap->va_nlink = zp->z_links;
2024 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
2025 zp->z_links < ZFS_LINK_MAX)
2026 vap->va_nlink++;
2027 vap->va_size = zp->z_size;
2028 if (vp->v_type == VBLK || vp->v_type == VCHR)
2029 vap->va_rdev = zfs_cmpldev(rdev);
2030 else
2031 vap->va_rdev = 0;
2032 vap->va_gen = zp->z_gen;
2033 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
2034 vap->va_filerev = zp->z_seq;
2035
2036 /*
2037 * Add in any requested optional attributes and the create time.
2038 * Also set the corresponding bits in the returned attribute bitmap.
2039 */
2040 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2041 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2042 xoap->xoa_archive =
2043 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2044 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2045 }
2046
2047 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2048 xoap->xoa_readonly =
2049 ((zp->z_pflags & ZFS_READONLY) != 0);
2050 XVA_SET_RTN(xvap, XAT_READONLY);
2051 }
2052
2053 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2054 xoap->xoa_system =
2055 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2056 XVA_SET_RTN(xvap, XAT_SYSTEM);
2057 }
2058
2059 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2060 xoap->xoa_hidden =
2061 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2062 XVA_SET_RTN(xvap, XAT_HIDDEN);
2063 }
2064
2065 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2066 xoap->xoa_nounlink =
2067 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2068 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2069 }
2070
2071 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2072 xoap->xoa_immutable =
2073 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2074 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2075 }
2076
2077 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2078 xoap->xoa_appendonly =
2079 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2080 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2081 }
2082
2083 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2084 xoap->xoa_nodump =
2085 ((zp->z_pflags & ZFS_NODUMP) != 0);
2086 XVA_SET_RTN(xvap, XAT_NODUMP);
2087 }
2088
2089 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2090 xoap->xoa_opaque =
2091 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2092 XVA_SET_RTN(xvap, XAT_OPAQUE);
2093 }
2094
2095 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2096 xoap->xoa_av_quarantined =
2097 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2098 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2099 }
2100
2101 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2102 xoap->xoa_av_modified =
2103 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2104 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2105 }
2106
2107 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2108 vp->v_type == VREG) {
2109 zfs_sa_get_scanstamp(zp, xvap);
2110 }
2111
2112 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2113 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2114 XVA_SET_RTN(xvap, XAT_REPARSE);
2115 }
2116 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2117 xoap->xoa_generation = zp->z_gen;
2118 XVA_SET_RTN(xvap, XAT_GEN);
2119 }
2120
2121 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2122 xoap->xoa_offline =
2123 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2124 XVA_SET_RTN(xvap, XAT_OFFLINE);
2125 }
2126
2127 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2128 xoap->xoa_sparse =
2129 ((zp->z_pflags & ZFS_SPARSE) != 0);
2130 XVA_SET_RTN(xvap, XAT_SPARSE);
2131 }
2132
2133 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2134 xoap->xoa_projinherit =
2135 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2136 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2137 }
2138
2139 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2140 xoap->xoa_projid = zp->z_projid;
2141 XVA_SET_RTN(xvap, XAT_PROJID);
2142 }
2143 }
2144
2145 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2146 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2147 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2148 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2149
2150
2151 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2152 vap->va_blksize = blksize;
2153 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2154
2155 if (zp->z_blksz == 0) {
2156 /*
2157 * Block size hasn't been set; suggest maximal I/O transfers.
2158 */
2159 vap->va_blksize = zfsvfs->z_max_blksz;
2160 }
2161
2162 zfs_exit(zfsvfs, FTAG);
2163 return (0);
2164 }
2165
2166 /*
2167 * Set the file attributes to the values contained in the
2168 * vattr structure.
2169 *
2170 * IN: zp - znode of file to be modified.
2171 * vap - new attribute values.
2172 * If AT_XVATTR set, then optional attrs are being set
2173 * flags - ATTR_UTIME set if non-default time values provided.
2174 * - ATTR_NOACLCHECK (CIFS context only).
2175 * cr - credentials of caller.
2176 * mnt_ns - Unused on FreeBSD
2177 *
2178 * RETURN: 0 on success, error code on failure.
2179 *
2180 * Timestamps:
2181 * vp - ctime updated, mtime updated if size changed.
2182 */
2183 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr,zidmap_t * mnt_ns)2184 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2185 {
2186 vnode_t *vp = ZTOV(zp);
2187 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2188 objset_t *os;
2189 zilog_t *zilog;
2190 dmu_tx_t *tx;
2191 vattr_t oldva;
2192 xvattr_t tmpxvattr;
2193 uint_t mask = vap->va_mask;
2194 uint_t saved_mask = 0;
2195 uint64_t saved_mode;
2196 int trim_mask = 0;
2197 uint64_t new_mode;
2198 uint64_t new_uid, new_gid;
2199 uint64_t xattr_obj;
2200 uint64_t mtime[2], ctime[2];
2201 uint64_t projid = ZFS_INVALID_PROJID;
2202 znode_t *attrzp;
2203 int need_policy = FALSE;
2204 int err, err2;
2205 zfs_fuid_info_t *fuidp = NULL;
2206 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2207 xoptattr_t *xoap;
2208 zfs_acl_t *aclp;
2209 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2210 boolean_t fuid_dirtied = B_FALSE;
2211 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2212 int count = 0, xattr_count = 0;
2213
2214 if (mask == 0)
2215 return (0);
2216
2217 if (mask & AT_NOSET)
2218 return (SET_ERROR(EINVAL));
2219
2220 if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2221 return (err);
2222
2223 os = zfsvfs->z_os;
2224 zilog = zfsvfs->z_log;
2225
2226 /*
2227 * Make sure that if we have ephemeral uid/gid or xvattr specified
2228 * that file system is at proper version level
2229 */
2230
2231 if (zfsvfs->z_use_fuids == B_FALSE &&
2232 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2233 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2234 (mask & AT_XVATTR))) {
2235 zfs_exit(zfsvfs, FTAG);
2236 return (SET_ERROR(EINVAL));
2237 }
2238
2239 if (mask & AT_SIZE && vp->v_type == VDIR) {
2240 zfs_exit(zfsvfs, FTAG);
2241 return (SET_ERROR(EISDIR));
2242 }
2243
2244 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2245 zfs_exit(zfsvfs, FTAG);
2246 return (SET_ERROR(EINVAL));
2247 }
2248
2249 /*
2250 * If this is an xvattr_t, then get a pointer to the structure of
2251 * optional attributes. If this is NULL, then we have a vattr_t.
2252 */
2253 xoap = xva_getxoptattr(xvap);
2254
2255 xva_init(&tmpxvattr);
2256
2257 /*
2258 * Immutable files can only alter immutable bit and atime
2259 */
2260 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2261 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2262 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2263 zfs_exit(zfsvfs, FTAG);
2264 return (SET_ERROR(EPERM));
2265 }
2266
2267 /*
2268 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2269 */
2270
2271 /*
2272 * Verify timestamps doesn't overflow 32 bits.
2273 * ZFS can handle large timestamps, but 32bit syscalls can't
2274 * handle times greater than 2039. This check should be removed
2275 * once large timestamps are fully supported.
2276 */
2277 if (mask & (AT_ATIME | AT_MTIME)) {
2278 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2279 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2280 zfs_exit(zfsvfs, FTAG);
2281 return (SET_ERROR(EOVERFLOW));
2282 }
2283 }
2284 if (xoap != NULL && (mask & AT_XVATTR)) {
2285 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2286 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2287 zfs_exit(zfsvfs, FTAG);
2288 return (SET_ERROR(EOVERFLOW));
2289 }
2290
2291 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2292 if (!dmu_objset_projectquota_enabled(os) ||
2293 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2294 zfs_exit(zfsvfs, FTAG);
2295 return (SET_ERROR(EOPNOTSUPP));
2296 }
2297
2298 projid = xoap->xoa_projid;
2299 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2300 zfs_exit(zfsvfs, FTAG);
2301 return (SET_ERROR(EINVAL));
2302 }
2303
2304 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2305 projid = ZFS_INVALID_PROJID;
2306 else
2307 need_policy = TRUE;
2308 }
2309
2310 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2311 (xoap->xoa_projinherit !=
2312 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2313 (!dmu_objset_projectquota_enabled(os) ||
2314 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2315 zfs_exit(zfsvfs, FTAG);
2316 return (SET_ERROR(EOPNOTSUPP));
2317 }
2318 }
2319
2320 attrzp = NULL;
2321 aclp = NULL;
2322
2323 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2324 zfs_exit(zfsvfs, FTAG);
2325 return (SET_ERROR(EROFS));
2326 }
2327
2328 /*
2329 * First validate permissions
2330 */
2331
2332 if (mask & AT_SIZE) {
2333 /*
2334 * XXX - Note, we are not providing any open
2335 * mode flags here (like FNDELAY), so we may
2336 * block if there are locks present... this
2337 * should be addressed in openat().
2338 */
2339 /* XXX - would it be OK to generate a log record here? */
2340 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2341 if (err) {
2342 zfs_exit(zfsvfs, FTAG);
2343 return (err);
2344 }
2345 }
2346
2347 if (mask & (AT_ATIME|AT_MTIME) ||
2348 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2349 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2350 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2351 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2352 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2353 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2354 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2355 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2356 skipaclchk, cr, mnt_ns);
2357 }
2358
2359 if (mask & (AT_UID|AT_GID)) {
2360 int idmask = (mask & (AT_UID|AT_GID));
2361 int take_owner;
2362 int take_group;
2363
2364 /*
2365 * NOTE: even if a new mode is being set,
2366 * we may clear S_ISUID/S_ISGID bits.
2367 */
2368
2369 if (!(mask & AT_MODE))
2370 vap->va_mode = zp->z_mode;
2371
2372 /*
2373 * Take ownership or chgrp to group we are a member of
2374 */
2375
2376 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2377 take_group = (mask & AT_GID) &&
2378 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2379
2380 /*
2381 * If both AT_UID and AT_GID are set then take_owner and
2382 * take_group must both be set in order to allow taking
2383 * ownership.
2384 *
2385 * Otherwise, send the check through secpolicy_vnode_setattr()
2386 *
2387 */
2388
2389 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2390 ((idmask == AT_UID) && take_owner) ||
2391 ((idmask == AT_GID) && take_group)) {
2392 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2393 skipaclchk, cr, mnt_ns) == 0) {
2394 /*
2395 * Remove setuid/setgid for non-privileged users
2396 */
2397 secpolicy_setid_clear(vap, vp, cr);
2398 trim_mask = (mask & (AT_UID|AT_GID));
2399 } else {
2400 need_policy = TRUE;
2401 }
2402 } else {
2403 need_policy = TRUE;
2404 }
2405 }
2406
2407 oldva.va_mode = zp->z_mode;
2408 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2409 if (mask & AT_XVATTR) {
2410 /*
2411 * Update xvattr mask to include only those attributes
2412 * that are actually changing.
2413 *
2414 * the bits will be restored prior to actually setting
2415 * the attributes so the caller thinks they were set.
2416 */
2417 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2418 if (xoap->xoa_appendonly !=
2419 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2420 need_policy = TRUE;
2421 } else {
2422 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2423 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2424 }
2425 }
2426
2427 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2428 if (xoap->xoa_projinherit !=
2429 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2430 need_policy = TRUE;
2431 } else {
2432 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2433 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2434 }
2435 }
2436
2437 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2438 if (xoap->xoa_nounlink !=
2439 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2440 need_policy = TRUE;
2441 } else {
2442 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2443 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2444 }
2445 }
2446
2447 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2448 if (xoap->xoa_immutable !=
2449 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2450 need_policy = TRUE;
2451 } else {
2452 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2453 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2454 }
2455 }
2456
2457 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2458 if (xoap->xoa_nodump !=
2459 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2460 need_policy = TRUE;
2461 } else {
2462 XVA_CLR_REQ(xvap, XAT_NODUMP);
2463 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2464 }
2465 }
2466
2467 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2468 if (xoap->xoa_av_modified !=
2469 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2470 need_policy = TRUE;
2471 } else {
2472 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2473 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2474 }
2475 }
2476
2477 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2478 if ((vp->v_type != VREG &&
2479 xoap->xoa_av_quarantined) ||
2480 xoap->xoa_av_quarantined !=
2481 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2482 need_policy = TRUE;
2483 } else {
2484 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2485 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2486 }
2487 }
2488
2489 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2490 zfs_exit(zfsvfs, FTAG);
2491 return (SET_ERROR(EPERM));
2492 }
2493
2494 if (need_policy == FALSE &&
2495 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2496 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2497 need_policy = TRUE;
2498 }
2499 }
2500
2501 if (mask & AT_MODE) {
2502 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2503 mnt_ns) == 0) {
2504 err = secpolicy_setid_setsticky_clear(vp, vap,
2505 &oldva, cr);
2506 if (err) {
2507 zfs_exit(zfsvfs, FTAG);
2508 return (err);
2509 }
2510 trim_mask |= AT_MODE;
2511 } else {
2512 need_policy = TRUE;
2513 }
2514 }
2515
2516 if (need_policy) {
2517 /*
2518 * If trim_mask is set then take ownership
2519 * has been granted or write_acl is present and user
2520 * has the ability to modify mode. In that case remove
2521 * UID|GID and or MODE from mask so that
2522 * secpolicy_vnode_setattr() doesn't revoke it.
2523 */
2524
2525 if (trim_mask) {
2526 saved_mask = vap->va_mask;
2527 vap->va_mask &= ~trim_mask;
2528 if (trim_mask & AT_MODE) {
2529 /*
2530 * Save the mode, as secpolicy_vnode_setattr()
2531 * will overwrite it with ova.va_mode.
2532 */
2533 saved_mode = vap->va_mode;
2534 }
2535 }
2536 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2537 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2538 if (err) {
2539 zfs_exit(zfsvfs, FTAG);
2540 return (err);
2541 }
2542
2543 if (trim_mask) {
2544 vap->va_mask |= saved_mask;
2545 if (trim_mask & AT_MODE) {
2546 /*
2547 * Recover the mode after
2548 * secpolicy_vnode_setattr().
2549 */
2550 vap->va_mode = saved_mode;
2551 }
2552 }
2553 }
2554
2555 /*
2556 * secpolicy_vnode_setattr, or take ownership may have
2557 * changed va_mask
2558 */
2559 mask = vap->va_mask;
2560
2561 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2562 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2563 &xattr_obj, sizeof (xattr_obj));
2564
2565 if (err == 0 && xattr_obj) {
2566 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2567 if (err == 0) {
2568 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2569 if (err != 0)
2570 vrele(ZTOV(attrzp));
2571 }
2572 if (err)
2573 goto out2;
2574 }
2575 if (mask & AT_UID) {
2576 new_uid = zfs_fuid_create(zfsvfs,
2577 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2578 if (new_uid != zp->z_uid &&
2579 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2580 new_uid)) {
2581 if (attrzp)
2582 vput(ZTOV(attrzp));
2583 err = SET_ERROR(EDQUOT);
2584 goto out2;
2585 }
2586 }
2587
2588 if (mask & AT_GID) {
2589 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2590 cr, ZFS_GROUP, &fuidp);
2591 if (new_gid != zp->z_gid &&
2592 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2593 new_gid)) {
2594 if (attrzp)
2595 vput(ZTOV(attrzp));
2596 err = SET_ERROR(EDQUOT);
2597 goto out2;
2598 }
2599 }
2600
2601 if (projid != ZFS_INVALID_PROJID &&
2602 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2603 if (attrzp)
2604 vput(ZTOV(attrzp));
2605 err = SET_ERROR(EDQUOT);
2606 goto out2;
2607 }
2608 }
2609 tx = dmu_tx_create(os);
2610
2611 if (mask & AT_MODE) {
2612 uint64_t pmode = zp->z_mode;
2613 uint64_t acl_obj;
2614 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2615
2616 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2617 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2618 err = SET_ERROR(EPERM);
2619 goto out;
2620 }
2621
2622 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2623 goto out;
2624
2625 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2626 /*
2627 * Are we upgrading ACL from old V0 format
2628 * to V1 format?
2629 */
2630 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2631 zfs_znode_acl_version(zp) ==
2632 ZFS_ACL_VERSION_INITIAL) {
2633 dmu_tx_hold_free(tx, acl_obj, 0,
2634 DMU_OBJECT_END);
2635 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2636 0, aclp->z_acl_bytes);
2637 } else {
2638 dmu_tx_hold_write(tx, acl_obj, 0,
2639 aclp->z_acl_bytes);
2640 }
2641 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2642 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2643 0, aclp->z_acl_bytes);
2644 }
2645 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2646 } else {
2647 if (((mask & AT_XVATTR) &&
2648 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2649 (projid != ZFS_INVALID_PROJID &&
2650 !(zp->z_pflags & ZFS_PROJID)))
2651 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2652 else
2653 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2654 }
2655
2656 if (attrzp) {
2657 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2658 }
2659
2660 fuid_dirtied = zfsvfs->z_fuid_dirty;
2661 if (fuid_dirtied)
2662 zfs_fuid_txhold(zfsvfs, tx);
2663
2664 zfs_sa_upgrade_txholds(tx, zp);
2665
2666 err = dmu_tx_assign(tx, TXG_WAIT);
2667 if (err)
2668 goto out;
2669
2670 count = 0;
2671 /*
2672 * Set each attribute requested.
2673 * We group settings according to the locks they need to acquire.
2674 *
2675 * Note: you cannot set ctime directly, although it will be
2676 * updated as a side-effect of calling this function.
2677 */
2678
2679 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2680 /*
2681 * For the existed object that is upgraded from old system,
2682 * its on-disk layout has no slot for the project ID attribute.
2683 * But quota accounting logic needs to access related slots by
2684 * offset directly. So we need to adjust old objects' layout
2685 * to make the project ID to some unified and fixed offset.
2686 */
2687 if (attrzp)
2688 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2689 if (err == 0)
2690 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2691
2692 if (unlikely(err == EEXIST))
2693 err = 0;
2694 else if (err != 0)
2695 goto out;
2696 else
2697 projid = ZFS_INVALID_PROJID;
2698 }
2699
2700 if (mask & (AT_UID|AT_GID|AT_MODE))
2701 mutex_enter(&zp->z_acl_lock);
2702
2703 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2704 &zp->z_pflags, sizeof (zp->z_pflags));
2705
2706 if (attrzp) {
2707 if (mask & (AT_UID|AT_GID|AT_MODE))
2708 mutex_enter(&attrzp->z_acl_lock);
2709 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2710 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2711 sizeof (attrzp->z_pflags));
2712 if (projid != ZFS_INVALID_PROJID) {
2713 attrzp->z_projid = projid;
2714 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2715 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2716 sizeof (attrzp->z_projid));
2717 }
2718 }
2719
2720 if (mask & (AT_UID|AT_GID)) {
2721
2722 if (mask & AT_UID) {
2723 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2724 &new_uid, sizeof (new_uid));
2725 zp->z_uid = new_uid;
2726 if (attrzp) {
2727 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2728 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2729 sizeof (new_uid));
2730 attrzp->z_uid = new_uid;
2731 }
2732 }
2733
2734 if (mask & AT_GID) {
2735 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2736 NULL, &new_gid, sizeof (new_gid));
2737 zp->z_gid = new_gid;
2738 if (attrzp) {
2739 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2740 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2741 sizeof (new_gid));
2742 attrzp->z_gid = new_gid;
2743 }
2744 }
2745 if (!(mask & AT_MODE)) {
2746 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2747 NULL, &new_mode, sizeof (new_mode));
2748 new_mode = zp->z_mode;
2749 }
2750 err = zfs_acl_chown_setattr(zp);
2751 ASSERT0(err);
2752 if (attrzp) {
2753 vn_seqc_write_begin(ZTOV(attrzp));
2754 err = zfs_acl_chown_setattr(attrzp);
2755 vn_seqc_write_end(ZTOV(attrzp));
2756 ASSERT0(err);
2757 }
2758 }
2759
2760 if (mask & AT_MODE) {
2761 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2762 &new_mode, sizeof (new_mode));
2763 zp->z_mode = new_mode;
2764 ASSERT3P(aclp, !=, NULL);
2765 err = zfs_aclset_common(zp, aclp, cr, tx);
2766 ASSERT0(err);
2767 if (zp->z_acl_cached)
2768 zfs_acl_free(zp->z_acl_cached);
2769 zp->z_acl_cached = aclp;
2770 aclp = NULL;
2771 }
2772
2773
2774 if (mask & AT_ATIME) {
2775 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2776 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2777 &zp->z_atime, sizeof (zp->z_atime));
2778 }
2779
2780 if (mask & AT_MTIME) {
2781 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2782 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2783 mtime, sizeof (mtime));
2784 }
2785
2786 if (projid != ZFS_INVALID_PROJID) {
2787 zp->z_projid = projid;
2788 SA_ADD_BULK_ATTR(bulk, count,
2789 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2790 sizeof (zp->z_projid));
2791 }
2792
2793 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2794 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2795 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2796 NULL, mtime, sizeof (mtime));
2797 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2798 &ctime, sizeof (ctime));
2799 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2800 } else if (mask != 0) {
2801 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2802 &ctime, sizeof (ctime));
2803 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2804 if (attrzp) {
2805 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2806 SA_ZPL_CTIME(zfsvfs), NULL,
2807 &ctime, sizeof (ctime));
2808 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2809 mtime, ctime);
2810 }
2811 }
2812
2813 /*
2814 * Do this after setting timestamps to prevent timestamp
2815 * update from toggling bit
2816 */
2817
2818 if (xoap && (mask & AT_XVATTR)) {
2819
2820 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2821 xoap->xoa_createtime = vap->va_birthtime;
2822 /*
2823 * restore trimmed off masks
2824 * so that return masks can be set for caller.
2825 */
2826
2827 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2828 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2829 }
2830 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2831 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2832 }
2833 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2834 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2835 }
2836 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2837 XVA_SET_REQ(xvap, XAT_NODUMP);
2838 }
2839 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2840 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2841 }
2842 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2843 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2844 }
2845 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2846 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2847 }
2848
2849 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2850 ASSERT3S(vp->v_type, ==, VREG);
2851
2852 zfs_xvattr_set(zp, xvap, tx);
2853 }
2854
2855 if (fuid_dirtied)
2856 zfs_fuid_sync(zfsvfs, tx);
2857
2858 if (mask != 0)
2859 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2860
2861 if (mask & (AT_UID|AT_GID|AT_MODE))
2862 mutex_exit(&zp->z_acl_lock);
2863
2864 if (attrzp) {
2865 if (mask & (AT_UID|AT_GID|AT_MODE))
2866 mutex_exit(&attrzp->z_acl_lock);
2867 }
2868 out:
2869 if (err == 0 && attrzp) {
2870 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2871 xattr_count, tx);
2872 ASSERT0(err2);
2873 }
2874
2875 if (attrzp)
2876 vput(ZTOV(attrzp));
2877
2878 if (aclp)
2879 zfs_acl_free(aclp);
2880
2881 if (fuidp) {
2882 zfs_fuid_info_free(fuidp);
2883 fuidp = NULL;
2884 }
2885
2886 if (err) {
2887 dmu_tx_abort(tx);
2888 } else {
2889 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2890 dmu_tx_commit(tx);
2891 }
2892
2893 out2:
2894 if (os->os_sync == ZFS_SYNC_ALWAYS)
2895 zil_commit(zilog, 0);
2896
2897 zfs_exit(zfsvfs, FTAG);
2898 return (err);
2899 }
2900
2901 /*
2902 * Look up the directory entries corresponding to the source and target
2903 * directory/name pairs.
2904 */
2905 static int
zfs_rename_relock_lookup(znode_t * sdzp,const struct componentname * scnp,znode_t ** szpp,znode_t * tdzp,const struct componentname * tcnp,znode_t ** tzpp)2906 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2907 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2908 znode_t **tzpp)
2909 {
2910 zfsvfs_t *zfsvfs;
2911 znode_t *szp, *tzp;
2912 int error;
2913
2914 /*
2915 * Before using sdzp and tdzp we must ensure that they are live.
2916 * As a porting legacy from illumos we have two things to worry
2917 * about. One is typical for FreeBSD and it is that the vnode is
2918 * not reclaimed (doomed). The other is that the znode is live.
2919 * The current code can invalidate the znode without acquiring the
2920 * corresponding vnode lock if the object represented by the znode
2921 * and vnode is no longer valid after a rollback or receive operation.
2922 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
2923 * that protects the znodes from the invalidation.
2924 */
2925 zfsvfs = sdzp->z_zfsvfs;
2926 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2927 if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
2928 return (error);
2929 if ((error = zfs_verify_zp(tdzp)) != 0) {
2930 zfs_exit(zfsvfs, FTAG);
2931 return (error);
2932 }
2933
2934 /*
2935 * Re-resolve svp to be certain it still exists and fetch the
2936 * correct vnode.
2937 */
2938 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2939 if (error != 0) {
2940 /* Source entry invalid or not there. */
2941 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2942 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2943 error = SET_ERROR(EINVAL);
2944 goto out;
2945 }
2946 *szpp = szp;
2947
2948 /*
2949 * Re-resolve tvp, if it disappeared we just carry on.
2950 */
2951 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2952 if (error != 0) {
2953 vrele(ZTOV(szp));
2954 if ((tcnp->cn_flags & ISDOTDOT) != 0)
2955 error = SET_ERROR(EINVAL);
2956 goto out;
2957 }
2958 *tzpp = tzp;
2959 out:
2960 zfs_exit(zfsvfs, FTAG);
2961 return (error);
2962 }
2963
2964 /*
2965 * We acquire all but fdvp locks using non-blocking acquisitions. If we
2966 * fail to acquire any lock in the path we will drop all held locks,
2967 * acquire the new lock in a blocking fashion, and then release it and
2968 * restart the rename. This acquire/release step ensures that we do not
2969 * spin on a lock waiting for release. On error release all vnode locks
2970 * and decrement references the way tmpfs_rename() would do.
2971 */
2972 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)2973 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2974 struct vnode *tdvp, struct vnode **tvpp,
2975 const struct componentname *scnp, const struct componentname *tcnp)
2976 {
2977 struct vnode *nvp, *svp, *tvp;
2978 znode_t *sdzp, *tdzp, *szp, *tzp;
2979 int error;
2980
2981 VOP_UNLOCK1(tdvp);
2982 if (*tvpp != NULL && *tvpp != tdvp)
2983 VOP_UNLOCK1(*tvpp);
2984
2985 relock:
2986 error = vn_lock(sdvp, LK_EXCLUSIVE);
2987 if (error)
2988 goto out;
2989 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2990 if (error != 0) {
2991 VOP_UNLOCK1(sdvp);
2992 if (error != EBUSY)
2993 goto out;
2994 error = vn_lock(tdvp, LK_EXCLUSIVE);
2995 if (error)
2996 goto out;
2997 VOP_UNLOCK1(tdvp);
2998 goto relock;
2999 }
3000 tdzp = VTOZ(tdvp);
3001 sdzp = VTOZ(sdvp);
3002
3003 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
3004 if (error != 0) {
3005 VOP_UNLOCK1(sdvp);
3006 VOP_UNLOCK1(tdvp);
3007 goto out;
3008 }
3009 svp = ZTOV(szp);
3010 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
3011
3012 /*
3013 * Now try acquire locks on svp and tvp.
3014 */
3015 nvp = svp;
3016 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3017 if (error != 0) {
3018 VOP_UNLOCK1(sdvp);
3019 VOP_UNLOCK1(tdvp);
3020 if (tvp != NULL)
3021 vrele(tvp);
3022 if (error != EBUSY) {
3023 vrele(nvp);
3024 goto out;
3025 }
3026 error = vn_lock(nvp, LK_EXCLUSIVE);
3027 if (error != 0) {
3028 vrele(nvp);
3029 goto out;
3030 }
3031 VOP_UNLOCK1(nvp);
3032 /*
3033 * Concurrent rename race.
3034 * XXX ?
3035 */
3036 if (nvp == tdvp) {
3037 vrele(nvp);
3038 error = SET_ERROR(EINVAL);
3039 goto out;
3040 }
3041 vrele(*svpp);
3042 *svpp = nvp;
3043 goto relock;
3044 }
3045 vrele(*svpp);
3046 *svpp = nvp;
3047
3048 if (*tvpp != NULL)
3049 vrele(*tvpp);
3050 *tvpp = NULL;
3051 if (tvp != NULL) {
3052 nvp = tvp;
3053 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3054 if (error != 0) {
3055 VOP_UNLOCK1(sdvp);
3056 VOP_UNLOCK1(tdvp);
3057 VOP_UNLOCK1(*svpp);
3058 if (error != EBUSY) {
3059 vrele(nvp);
3060 goto out;
3061 }
3062 error = vn_lock(nvp, LK_EXCLUSIVE);
3063 if (error != 0) {
3064 vrele(nvp);
3065 goto out;
3066 }
3067 vput(nvp);
3068 goto relock;
3069 }
3070 *tvpp = nvp;
3071 }
3072
3073 return (0);
3074
3075 out:
3076 return (error);
3077 }
3078
3079 /*
3080 * Note that we must use VRELE_ASYNC in this function as it walks
3081 * up the directory tree and vrele may need to acquire an exclusive
3082 * lock if a last reference to a vnode is dropped.
3083 */
3084 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)3085 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3086 {
3087 zfsvfs_t *zfsvfs;
3088 znode_t *zp, *zp1;
3089 uint64_t parent;
3090 int error;
3091
3092 zfsvfs = tdzp->z_zfsvfs;
3093 if (tdzp == szp)
3094 return (SET_ERROR(EINVAL));
3095 if (tdzp == sdzp)
3096 return (0);
3097 if (tdzp->z_id == zfsvfs->z_root)
3098 return (0);
3099 zp = tdzp;
3100 for (;;) {
3101 ASSERT(!zp->z_unlinked);
3102 if ((error = sa_lookup(zp->z_sa_hdl,
3103 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3104 break;
3105
3106 if (parent == szp->z_id) {
3107 error = SET_ERROR(EINVAL);
3108 break;
3109 }
3110 if (parent == zfsvfs->z_root)
3111 break;
3112 if (parent == sdzp->z_id)
3113 break;
3114
3115 error = zfs_zget(zfsvfs, parent, &zp1);
3116 if (error != 0)
3117 break;
3118
3119 if (zp != tdzp)
3120 VN_RELE_ASYNC(ZTOV(zp),
3121 dsl_pool_zrele_taskq(
3122 dmu_objset_pool(zfsvfs->z_os)));
3123 zp = zp1;
3124 }
3125
3126 if (error == ENOTDIR)
3127 panic("checkpath: .. not a directory\n");
3128 if (zp != tdzp)
3129 VN_RELE_ASYNC(ZTOV(zp),
3130 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3131 return (error);
3132 }
3133
3134 #if __FreeBSD_version < 1300124
3135 static void
cache_vop_rename(struct vnode * fdvp,struct vnode * fvp,struct vnode * tdvp,struct vnode * tvp,struct componentname * fcnp,struct componentname * tcnp)3136 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
3137 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
3138 {
3139
3140 cache_purge(fvp);
3141 if (tvp != NULL)
3142 cache_purge(tvp);
3143 cache_purge_negative(tdvp);
3144 }
3145 #endif
3146
3147 static int
3148 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3149 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3150 cred_t *cr);
3151
3152 /*
3153 * Move an entry from the provided source directory to the target
3154 * directory. Change the entry name as indicated.
3155 *
3156 * IN: sdvp - Source directory containing the "old entry".
3157 * scnp - Old entry name.
3158 * tdvp - Target directory to contain the "new entry".
3159 * tcnp - New entry name.
3160 * cr - credentials of caller.
3161 * INOUT: svpp - Source file
3162 * tvpp - Target file, may point to NULL initially
3163 *
3164 * RETURN: 0 on success, error code on failure.
3165 *
3166 * Timestamps:
3167 * sdvp,tdvp - ctime|mtime updated
3168 */
3169 static int
zfs_do_rename(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3170 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3171 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3172 cred_t *cr)
3173 {
3174 int error;
3175
3176 ASSERT_VOP_ELOCKED(tdvp, __func__);
3177 if (*tvpp != NULL)
3178 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3179
3180 /* Reject renames across filesystems. */
3181 if ((*svpp)->v_mount != tdvp->v_mount ||
3182 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3183 error = SET_ERROR(EXDEV);
3184 goto out;
3185 }
3186
3187 if (zfsctl_is_node(tdvp)) {
3188 error = SET_ERROR(EXDEV);
3189 goto out;
3190 }
3191
3192 /*
3193 * Lock all four vnodes to ensure safety and semantics of renaming.
3194 */
3195 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3196 if (error != 0) {
3197 /* no vnodes are locked in the case of error here */
3198 return (error);
3199 }
3200
3201 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3202 VOP_UNLOCK1(sdvp);
3203 VOP_UNLOCK1(*svpp);
3204 out:
3205 if (*tvpp != NULL)
3206 VOP_UNLOCK1(*tvpp);
3207 if (tdvp != *tvpp)
3208 VOP_UNLOCK1(tdvp);
3209
3210 return (error);
3211 }
3212
3213 static int
zfs_do_rename_impl(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3214 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3215 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3216 cred_t *cr)
3217 {
3218 dmu_tx_t *tx;
3219 zfsvfs_t *zfsvfs;
3220 zilog_t *zilog;
3221 znode_t *tdzp, *sdzp, *tzp, *szp;
3222 const char *snm = scnp->cn_nameptr;
3223 const char *tnm = tcnp->cn_nameptr;
3224 int error;
3225
3226 tdzp = VTOZ(tdvp);
3227 sdzp = VTOZ(sdvp);
3228 zfsvfs = tdzp->z_zfsvfs;
3229
3230 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3231 return (error);
3232 if ((error = zfs_verify_zp(sdzp)) != 0) {
3233 zfs_exit(zfsvfs, FTAG);
3234 return (error);
3235 }
3236 zilog = zfsvfs->z_log;
3237
3238 if (zfsvfs->z_utf8 && u8_validate(tnm,
3239 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3240 error = SET_ERROR(EILSEQ);
3241 goto out;
3242 }
3243
3244 /* If source and target are the same file, there is nothing to do. */
3245 if ((*svpp) == (*tvpp)) {
3246 error = 0;
3247 goto out;
3248 }
3249
3250 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3251 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3252 (*tvpp)->v_mountedhere != NULL)) {
3253 error = SET_ERROR(EXDEV);
3254 goto out;
3255 }
3256
3257 szp = VTOZ(*svpp);
3258 if ((error = zfs_verify_zp(szp)) != 0) {
3259 zfs_exit(zfsvfs, FTAG);
3260 return (error);
3261 }
3262 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3263 if (tzp != NULL) {
3264 if ((error = zfs_verify_zp(tzp)) != 0) {
3265 zfs_exit(zfsvfs, FTAG);
3266 return (error);
3267 }
3268 }
3269
3270 /*
3271 * This is to prevent the creation of links into attribute space
3272 * by renaming a linked file into/outof an attribute directory.
3273 * See the comment in zfs_link() for why this is considered bad.
3274 */
3275 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3276 error = SET_ERROR(EINVAL);
3277 goto out;
3278 }
3279
3280 /*
3281 * If we are using project inheritance, means if the directory has
3282 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3283 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3284 * such case, we only allow renames into our tree when the project
3285 * IDs are the same.
3286 */
3287 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3288 tdzp->z_projid != szp->z_projid) {
3289 error = SET_ERROR(EXDEV);
3290 goto out;
3291 }
3292
3293 /*
3294 * Must have write access at the source to remove the old entry
3295 * and write access at the target to create the new entry.
3296 * Note that if target and source are the same, this can be
3297 * done in a single check.
3298 */
3299 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3300 goto out;
3301
3302 if ((*svpp)->v_type == VDIR) {
3303 /*
3304 * Avoid ".", "..", and aliases of "." for obvious reasons.
3305 */
3306 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3307 sdzp == szp ||
3308 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3309 error = EINVAL;
3310 goto out;
3311 }
3312
3313 /*
3314 * Check to make sure rename is valid.
3315 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3316 */
3317 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3318 goto out;
3319 }
3320
3321 /*
3322 * Does target exist?
3323 */
3324 if (tzp) {
3325 /*
3326 * Source and target must be the same type.
3327 */
3328 if ((*svpp)->v_type == VDIR) {
3329 if ((*tvpp)->v_type != VDIR) {
3330 error = SET_ERROR(ENOTDIR);
3331 goto out;
3332 } else {
3333 cache_purge(tdvp);
3334 if (sdvp != tdvp)
3335 cache_purge(sdvp);
3336 }
3337 } else {
3338 if ((*tvpp)->v_type == VDIR) {
3339 error = SET_ERROR(EISDIR);
3340 goto out;
3341 }
3342 }
3343 }
3344
3345 vn_seqc_write_begin(*svpp);
3346 vn_seqc_write_begin(sdvp);
3347 if (*tvpp != NULL)
3348 vn_seqc_write_begin(*tvpp);
3349 if (tdvp != *tvpp)
3350 vn_seqc_write_begin(tdvp);
3351
3352 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3353 if (tzp)
3354 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3355
3356 /*
3357 * notify the target directory if it is not the same
3358 * as source directory.
3359 */
3360 if (tdvp != sdvp) {
3361 vnevent_rename_dest_dir(tdvp, ct);
3362 }
3363
3364 tx = dmu_tx_create(zfsvfs->z_os);
3365 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3366 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3367 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3368 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3369 if (sdzp != tdzp) {
3370 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3371 zfs_sa_upgrade_txholds(tx, tdzp);
3372 }
3373 if (tzp) {
3374 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3375 zfs_sa_upgrade_txholds(tx, tzp);
3376 }
3377
3378 zfs_sa_upgrade_txholds(tx, szp);
3379 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3380 error = dmu_tx_assign(tx, TXG_WAIT);
3381 if (error) {
3382 dmu_tx_abort(tx);
3383 goto out_seq;
3384 }
3385
3386 if (tzp) /* Attempt to remove the existing target */
3387 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3388
3389 if (error == 0) {
3390 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3391 if (error == 0) {
3392 szp->z_pflags |= ZFS_AV_MODIFIED;
3393
3394 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3395 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3396 ASSERT0(error);
3397
3398 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3399 NULL);
3400 if (error == 0) {
3401 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3402 snm, tdzp, tnm, szp);
3403 } else {
3404 /*
3405 * At this point, we have successfully created
3406 * the target name, but have failed to remove
3407 * the source name. Since the create was done
3408 * with the ZRENAMING flag, there are
3409 * complications; for one, the link count is
3410 * wrong. The easiest way to deal with this
3411 * is to remove the newly created target, and
3412 * return the original error. This must
3413 * succeed; fortunately, it is very unlikely to
3414 * fail, since we just created it.
3415 */
3416 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3417 ZRENAMING, NULL));
3418 }
3419 }
3420 if (error == 0) {
3421 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3422 }
3423 }
3424
3425 dmu_tx_commit(tx);
3426
3427 out_seq:
3428 vn_seqc_write_end(*svpp);
3429 vn_seqc_write_end(sdvp);
3430 if (*tvpp != NULL)
3431 vn_seqc_write_end(*tvpp);
3432 if (tdvp != *tvpp)
3433 vn_seqc_write_end(tdvp);
3434
3435 out:
3436 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3437 zil_commit(zilog, 0);
3438 zfs_exit(zfsvfs, FTAG);
3439
3440 return (error);
3441 }
3442
3443 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags,uint64_t rflags,vattr_t * wo_vap,zidmap_t * mnt_ns)3444 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3445 cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3446 {
3447 struct componentname scn, tcn;
3448 vnode_t *sdvp, *tdvp;
3449 vnode_t *svp, *tvp;
3450 int error;
3451 svp = tvp = NULL;
3452
3453 if (rflags != 0 || wo_vap != NULL)
3454 return (SET_ERROR(EINVAL));
3455
3456 sdvp = ZTOV(sdzp);
3457 tdvp = ZTOV(tdzp);
3458 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3459 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3460 VOP_UNLOCK1(sdvp);
3461 if (error != 0)
3462 goto fail;
3463 VOP_UNLOCK1(svp);
3464
3465 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3466 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3467 if (error == EJUSTRETURN)
3468 tvp = NULL;
3469 else if (error != 0) {
3470 VOP_UNLOCK1(tdvp);
3471 goto fail;
3472 }
3473
3474 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3475 fail:
3476 if (svp != NULL)
3477 vrele(svp);
3478 if (tvp != NULL)
3479 vrele(tvp);
3480
3481 return (error);
3482 }
3483
3484 /*
3485 * Insert the indicated symbolic reference entry into the directory.
3486 *
3487 * IN: dvp - Directory to contain new symbolic link.
3488 * link - Name for new symlink entry.
3489 * vap - Attributes of new entry.
3490 * cr - credentials of caller.
3491 * ct - caller context
3492 * flags - case flags
3493 * mnt_ns - Unused on FreeBSD
3494 *
3495 * RETURN: 0 on success, error code on failure.
3496 *
3497 * Timestamps:
3498 * dvp - ctime|mtime updated
3499 */
3500 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags,zidmap_t * mnt_ns)3501 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3502 const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3503 {
3504 (void) flags;
3505 znode_t *zp;
3506 dmu_tx_t *tx;
3507 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3508 zilog_t *zilog;
3509 uint64_t len = strlen(link);
3510 int error;
3511 zfs_acl_ids_t acl_ids;
3512 boolean_t fuid_dirtied;
3513 uint64_t txtype = TX_SYMLINK;
3514
3515 ASSERT3S(vap->va_type, ==, VLNK);
3516
3517 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3518 return (error);
3519 zilog = zfsvfs->z_log;
3520
3521 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3522 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3523 zfs_exit(zfsvfs, FTAG);
3524 return (SET_ERROR(EILSEQ));
3525 }
3526
3527 if (len > MAXPATHLEN) {
3528 zfs_exit(zfsvfs, FTAG);
3529 return (SET_ERROR(ENAMETOOLONG));
3530 }
3531
3532 if ((error = zfs_acl_ids_create(dzp, 0,
3533 vap, cr, NULL, &acl_ids, NULL)) != 0) {
3534 zfs_exit(zfsvfs, FTAG);
3535 return (error);
3536 }
3537
3538 /*
3539 * Attempt to lock directory; fail if entry already exists.
3540 */
3541 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3542 if (error) {
3543 zfs_acl_ids_free(&acl_ids);
3544 zfs_exit(zfsvfs, FTAG);
3545 return (error);
3546 }
3547
3548 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3549 zfs_acl_ids_free(&acl_ids);
3550 zfs_exit(zfsvfs, FTAG);
3551 return (error);
3552 }
3553
3554 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3555 0 /* projid */)) {
3556 zfs_acl_ids_free(&acl_ids);
3557 zfs_exit(zfsvfs, FTAG);
3558 return (SET_ERROR(EDQUOT));
3559 }
3560
3561 getnewvnode_reserve_();
3562 tx = dmu_tx_create(zfsvfs->z_os);
3563 fuid_dirtied = zfsvfs->z_fuid_dirty;
3564 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3565 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3566 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3567 ZFS_SA_BASE_ATTR_SIZE + len);
3568 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3569 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3570 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3571 acl_ids.z_aclp->z_acl_bytes);
3572 }
3573 if (fuid_dirtied)
3574 zfs_fuid_txhold(zfsvfs, tx);
3575 error = dmu_tx_assign(tx, TXG_WAIT);
3576 if (error) {
3577 zfs_acl_ids_free(&acl_ids);
3578 dmu_tx_abort(tx);
3579 getnewvnode_drop_reserve();
3580 zfs_exit(zfsvfs, FTAG);
3581 return (error);
3582 }
3583
3584 /*
3585 * Create a new object for the symlink.
3586 * for version 4 ZPL datasets the symlink will be an SA attribute
3587 */
3588 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3589
3590 if (fuid_dirtied)
3591 zfs_fuid_sync(zfsvfs, tx);
3592
3593 if (zp->z_is_sa)
3594 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3595 __DECONST(void *, link), len, tx);
3596 else
3597 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3598
3599 zp->z_size = len;
3600 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3601 &zp->z_size, sizeof (zp->z_size), tx);
3602 /*
3603 * Insert the new object into the directory.
3604 */
3605 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3606 if (error != 0) {
3607 zfs_znode_delete(zp, tx);
3608 VOP_UNLOCK1(ZTOV(zp));
3609 zrele(zp);
3610 } else {
3611 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3612 }
3613
3614 zfs_acl_ids_free(&acl_ids);
3615
3616 dmu_tx_commit(tx);
3617
3618 getnewvnode_drop_reserve();
3619
3620 if (error == 0) {
3621 *zpp = zp;
3622
3623 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3624 zil_commit(zilog, 0);
3625 }
3626
3627 zfs_exit(zfsvfs, FTAG);
3628 return (error);
3629 }
3630
3631 /*
3632 * Return, in the buffer contained in the provided uio structure,
3633 * the symbolic path referred to by vp.
3634 *
3635 * IN: vp - vnode of symbolic link.
3636 * uio - structure to contain the link path.
3637 * cr - credentials of caller.
3638 * ct - caller context
3639 *
3640 * OUT: uio - structure containing the link path.
3641 *
3642 * RETURN: 0 on success, error code on failure.
3643 *
3644 * Timestamps:
3645 * vp - atime updated
3646 */
3647 static int
zfs_readlink(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,caller_context_t * ct)3648 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3649 {
3650 (void) cr, (void) ct;
3651 znode_t *zp = VTOZ(vp);
3652 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3653 int error;
3654
3655 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3656 return (error);
3657
3658 if (zp->z_is_sa)
3659 error = sa_lookup_uio(zp->z_sa_hdl,
3660 SA_ZPL_SYMLINK(zfsvfs), uio);
3661 else
3662 error = zfs_sa_readlink(zp, uio);
3663
3664 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3665
3666 zfs_exit(zfsvfs, FTAG);
3667 return (error);
3668 }
3669
3670 /*
3671 * Insert a new entry into directory tdvp referencing svp.
3672 *
3673 * IN: tdvp - Directory to contain new entry.
3674 * svp - vnode of new entry.
3675 * name - name of new entry.
3676 * cr - credentials of caller.
3677 *
3678 * RETURN: 0 on success, error code on failure.
3679 *
3680 * Timestamps:
3681 * tdvp - ctime|mtime updated
3682 * svp - ctime updated
3683 */
3684 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3685 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3686 int flags)
3687 {
3688 (void) flags;
3689 znode_t *tzp;
3690 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3691 zilog_t *zilog;
3692 dmu_tx_t *tx;
3693 int error;
3694 uint64_t parent;
3695 uid_t owner;
3696
3697 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3698
3699 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3700 return (error);
3701 zilog = zfsvfs->z_log;
3702
3703 /*
3704 * POSIX dictates that we return EPERM here.
3705 * Better choices include ENOTSUP or EISDIR.
3706 */
3707 if (ZTOV(szp)->v_type == VDIR) {
3708 zfs_exit(zfsvfs, FTAG);
3709 return (SET_ERROR(EPERM));
3710 }
3711
3712 if ((error = zfs_verify_zp(szp)) != 0) {
3713 zfs_exit(zfsvfs, FTAG);
3714 return (error);
3715 }
3716
3717 /*
3718 * If we are using project inheritance, means if the directory has
3719 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3720 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3721 * such case, we only allow hard link creation in our tree when the
3722 * project IDs are the same.
3723 */
3724 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3725 tdzp->z_projid != szp->z_projid) {
3726 zfs_exit(zfsvfs, FTAG);
3727 return (SET_ERROR(EXDEV));
3728 }
3729
3730 if (szp->z_pflags & (ZFS_APPENDONLY |
3731 ZFS_IMMUTABLE | ZFS_READONLY)) {
3732 zfs_exit(zfsvfs, FTAG);
3733 return (SET_ERROR(EPERM));
3734 }
3735
3736 /* Prevent links to .zfs/shares files */
3737
3738 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3739 &parent, sizeof (uint64_t))) != 0) {
3740 zfs_exit(zfsvfs, FTAG);
3741 return (error);
3742 }
3743 if (parent == zfsvfs->z_shares_dir) {
3744 zfs_exit(zfsvfs, FTAG);
3745 return (SET_ERROR(EPERM));
3746 }
3747
3748 if (zfsvfs->z_utf8 && u8_validate(name,
3749 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3750 zfs_exit(zfsvfs, FTAG);
3751 return (SET_ERROR(EILSEQ));
3752 }
3753
3754 /*
3755 * We do not support links between attributes and non-attributes
3756 * because of the potential security risk of creating links
3757 * into "normal" file space in order to circumvent restrictions
3758 * imposed in attribute space.
3759 */
3760 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3761 zfs_exit(zfsvfs, FTAG);
3762 return (SET_ERROR(EINVAL));
3763 }
3764
3765
3766 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3767 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3768 zfs_exit(zfsvfs, FTAG);
3769 return (SET_ERROR(EPERM));
3770 }
3771
3772 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3773 zfs_exit(zfsvfs, FTAG);
3774 return (error);
3775 }
3776
3777 /*
3778 * Attempt to lock directory; fail if entry already exists.
3779 */
3780 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3781 if (error) {
3782 zfs_exit(zfsvfs, FTAG);
3783 return (error);
3784 }
3785
3786 tx = dmu_tx_create(zfsvfs->z_os);
3787 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3788 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3789 zfs_sa_upgrade_txholds(tx, szp);
3790 zfs_sa_upgrade_txholds(tx, tdzp);
3791 error = dmu_tx_assign(tx, TXG_WAIT);
3792 if (error) {
3793 dmu_tx_abort(tx);
3794 zfs_exit(zfsvfs, FTAG);
3795 return (error);
3796 }
3797
3798 error = zfs_link_create(tdzp, name, szp, tx, 0);
3799
3800 if (error == 0) {
3801 uint64_t txtype = TX_LINK;
3802 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3803 }
3804
3805 dmu_tx_commit(tx);
3806
3807 if (error == 0) {
3808 vnevent_link(ZTOV(szp), ct);
3809 }
3810
3811 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3812 zil_commit(zilog, 0);
3813
3814 zfs_exit(zfsvfs, FTAG);
3815 return (error);
3816 }
3817
3818 /*
3819 * Free or allocate space in a file. Currently, this function only
3820 * supports the `F_FREESP' command. However, this command is somewhat
3821 * misnamed, as its functionality includes the ability to allocate as
3822 * well as free space.
3823 *
3824 * IN: ip - inode of file to free data in.
3825 * cmd - action to take (only F_FREESP supported).
3826 * bfp - section of file to free/alloc.
3827 * flag - current file open mode flags.
3828 * offset - current file offset.
3829 * cr - credentials of caller.
3830 *
3831 * RETURN: 0 on success, error code on failure.
3832 *
3833 * Timestamps:
3834 * ip - ctime|mtime updated
3835 */
3836 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3837 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3838 offset_t offset, cred_t *cr)
3839 {
3840 (void) offset;
3841 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3842 uint64_t off, len;
3843 int error;
3844
3845 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3846 return (error);
3847
3848 if (cmd != F_FREESP) {
3849 zfs_exit(zfsvfs, FTAG);
3850 return (SET_ERROR(EINVAL));
3851 }
3852
3853 /*
3854 * Callers might not be able to detect properly that we are read-only,
3855 * so check it explicitly here.
3856 */
3857 if (zfs_is_readonly(zfsvfs)) {
3858 zfs_exit(zfsvfs, FTAG);
3859 return (SET_ERROR(EROFS));
3860 }
3861
3862 if (bfp->l_len < 0) {
3863 zfs_exit(zfsvfs, FTAG);
3864 return (SET_ERROR(EINVAL));
3865 }
3866
3867 /*
3868 * Permissions aren't checked on Solaris because on this OS
3869 * zfs_space() can only be called with an opened file handle.
3870 * On Linux we can get here through truncate_range() which
3871 * operates directly on inodes, so we need to check access rights.
3872 */
3873 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3874 zfs_exit(zfsvfs, FTAG);
3875 return (error);
3876 }
3877
3878 off = bfp->l_start;
3879 len = bfp->l_len; /* 0 means from off to end of file */
3880
3881 error = zfs_freesp(zp, off, len, flag, TRUE);
3882
3883 zfs_exit(zfsvfs, FTAG);
3884 return (error);
3885 }
3886
3887 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3888 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3889 {
3890 (void) cr, (void) ct;
3891 znode_t *zp = VTOZ(vp);
3892 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3893 int error;
3894
3895 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3896 if (zp->z_sa_hdl == NULL) {
3897 /*
3898 * The fs has been unmounted, or we did a
3899 * suspend/resume and this file no longer exists.
3900 */
3901 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3902 vrecycle(vp);
3903 return;
3904 }
3905
3906 if (zp->z_unlinked) {
3907 /*
3908 * Fast path to recycle a vnode of a removed file.
3909 */
3910 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3911 vrecycle(vp);
3912 return;
3913 }
3914
3915 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3916 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3917
3918 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3919 zfs_sa_upgrade_txholds(tx, zp);
3920 error = dmu_tx_assign(tx, TXG_WAIT);
3921 if (error) {
3922 dmu_tx_abort(tx);
3923 } else {
3924 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3925 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3926 zp->z_atime_dirty = 0;
3927 dmu_tx_commit(tx);
3928 }
3929 }
3930 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3931 }
3932
3933
3934 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
3935 "struct zfid_short bigger than struct fid");
3936 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
3937 "struct zfid_long bigger than struct fid");
3938
3939 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3940 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3941 {
3942 (void) ct;
3943 znode_t *zp = VTOZ(vp);
3944 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3945 uint32_t gen;
3946 uint64_t gen64;
3947 uint64_t object = zp->z_id;
3948 zfid_short_t *zfid;
3949 int size, i, error;
3950
3951 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3952 return (error);
3953
3954 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3955 &gen64, sizeof (uint64_t))) != 0) {
3956 zfs_exit(zfsvfs, FTAG);
3957 return (error);
3958 }
3959
3960 gen = (uint32_t)gen64;
3961
3962 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3963 fidp->fid_len = size;
3964
3965 zfid = (zfid_short_t *)fidp;
3966
3967 zfid->zf_len = size;
3968
3969 for (i = 0; i < sizeof (zfid->zf_object); i++)
3970 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3971
3972 /* Must have a non-zero generation number to distinguish from .zfs */
3973 if (gen == 0)
3974 gen = 1;
3975 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3976 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3977
3978 if (size == LONG_FID_LEN) {
3979 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3980 zfid_long_t *zlfid;
3981
3982 zlfid = (zfid_long_t *)fidp;
3983
3984 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3985 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3986
3987 /* XXX - this should be the generation number for the objset */
3988 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3989 zlfid->zf_setgen[i] = 0;
3990 }
3991
3992 zfs_exit(zfsvfs, FTAG);
3993 return (0);
3994 }
3995
3996 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)3997 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3998 caller_context_t *ct)
3999 {
4000 znode_t *zp;
4001 zfsvfs_t *zfsvfs;
4002 int error;
4003
4004 switch (cmd) {
4005 case _PC_LINK_MAX:
4006 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
4007 return (0);
4008
4009 case _PC_FILESIZEBITS:
4010 *valp = 64;
4011 return (0);
4012 case _PC_MIN_HOLE_SIZE:
4013 *valp = (int)SPA_MINBLOCKSIZE;
4014 return (0);
4015 case _PC_ACL_EXTENDED:
4016 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
4017 zp = VTOZ(vp);
4018 zfsvfs = zp->z_zfsvfs;
4019 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
4020 return (error);
4021 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
4022 zfs_exit(zfsvfs, FTAG);
4023 #else
4024 *valp = 0;
4025 #endif
4026 return (0);
4027
4028 case _PC_ACL_NFS4:
4029 zp = VTOZ(vp);
4030 zfsvfs = zp->z_zfsvfs;
4031 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
4032 return (error);
4033 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
4034 zfs_exit(zfsvfs, FTAG);
4035 return (0);
4036
4037 case _PC_ACL_PATH_MAX:
4038 *valp = ACL_MAX_ENTRIES;
4039 return (0);
4040
4041 default:
4042 return (EOPNOTSUPP);
4043 }
4044 }
4045
4046 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)4047 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4048 int *rahead)
4049 {
4050 znode_t *zp = VTOZ(vp);
4051 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4052 zfs_locked_range_t *lr;
4053 vm_object_t object;
4054 off_t start, end, obj_size;
4055 uint_t blksz;
4056 int pgsin_b, pgsin_a;
4057 int error;
4058
4059 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4060 return (zfs_vm_pagerret_error);
4061
4062 start = IDX_TO_OFF(ma[0]->pindex);
4063 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4064
4065 /*
4066 * Lock a range covering all required and optional pages.
4067 * Note that we need to handle the case of the block size growing.
4068 */
4069 for (;;) {
4070 blksz = zp->z_blksz;
4071 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
4072 rounddown(start, blksz),
4073 roundup(end, blksz) - rounddown(start, blksz), RL_READER);
4074 if (lr == NULL) {
4075 if (rahead != NULL) {
4076 *rahead = 0;
4077 rahead = NULL;
4078 }
4079 if (rbehind != NULL) {
4080 *rbehind = 0;
4081 rbehind = NULL;
4082 }
4083 break;
4084 }
4085 if (blksz == zp->z_blksz)
4086 break;
4087 zfs_rangelock_exit(lr);
4088 }
4089
4090 object = ma[0]->object;
4091 zfs_vmobject_wlock(object);
4092 obj_size = object->un_pager.vnp.vnp_size;
4093 zfs_vmobject_wunlock(object);
4094 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4095 if (lr != NULL)
4096 zfs_rangelock_exit(lr);
4097 zfs_exit(zfsvfs, FTAG);
4098 return (zfs_vm_pagerret_bad);
4099 }
4100
4101 pgsin_b = 0;
4102 if (rbehind != NULL) {
4103 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4104 pgsin_b = MIN(*rbehind, pgsin_b);
4105 }
4106
4107 pgsin_a = 0;
4108 if (rahead != NULL) {
4109 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4110 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4111 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4112 pgsin_a = MIN(*rahead, pgsin_a);
4113 }
4114
4115 /*
4116 * NB: we need to pass the exact byte size of the data that we expect
4117 * to read after accounting for the file size. This is required because
4118 * ZFS will panic if we request DMU to read beyond the end of the last
4119 * allocated block.
4120 */
4121 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
4122 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
4123
4124 if (lr != NULL)
4125 zfs_rangelock_exit(lr);
4126 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4127
4128 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4129
4130 zfs_exit(zfsvfs, FTAG);
4131
4132 if (error != 0)
4133 return (zfs_vm_pagerret_error);
4134
4135 VM_CNT_INC(v_vnodein);
4136 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4137 if (rbehind != NULL)
4138 *rbehind = pgsin_b;
4139 if (rahead != NULL)
4140 *rahead = pgsin_a;
4141 return (zfs_vm_pagerret_ok);
4142 }
4143
4144 #ifndef _SYS_SYSPROTO_H_
4145 struct vop_getpages_args {
4146 struct vnode *a_vp;
4147 vm_page_t *a_m;
4148 int a_count;
4149 int *a_rbehind;
4150 int *a_rahead;
4151 };
4152 #endif
4153
4154 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4155 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4156 {
4157
4158 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4159 ap->a_rahead));
4160 }
4161
4162 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4163 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4164 int *rtvals)
4165 {
4166 znode_t *zp = VTOZ(vp);
4167 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4168 zfs_locked_range_t *lr;
4169 dmu_tx_t *tx;
4170 struct sf_buf *sf;
4171 vm_object_t object;
4172 vm_page_t m;
4173 caddr_t va;
4174 size_t tocopy;
4175 size_t lo_len;
4176 vm_ooffset_t lo_off;
4177 vm_ooffset_t off;
4178 uint_t blksz;
4179 int ncount;
4180 int pcount;
4181 int err;
4182 int i;
4183
4184 object = vp->v_object;
4185 KASSERT(ma[0]->object == object, ("mismatching object"));
4186 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4187
4188 pcount = btoc(len);
4189 ncount = pcount;
4190 for (i = 0; i < pcount; i++)
4191 rtvals[i] = zfs_vm_pagerret_error;
4192
4193 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4194 return (zfs_vm_pagerret_error);
4195
4196 off = IDX_TO_OFF(ma[0]->pindex);
4197 blksz = zp->z_blksz;
4198 lo_off = rounddown(off, blksz);
4199 lo_len = roundup(len + (off - lo_off), blksz);
4200 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4201
4202 zfs_vmobject_wlock(object);
4203 if (len + off > object->un_pager.vnp.vnp_size) {
4204 if (object->un_pager.vnp.vnp_size > off) {
4205 int pgoff;
4206
4207 len = object->un_pager.vnp.vnp_size - off;
4208 ncount = btoc(len);
4209 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4210 /*
4211 * If the object is locked and the following
4212 * conditions hold, then the page's dirty
4213 * field cannot be concurrently changed by a
4214 * pmap operation.
4215 */
4216 m = ma[ncount - 1];
4217 vm_page_assert_sbusied(m);
4218 KASSERT(!pmap_page_is_write_mapped(m),
4219 ("zfs_putpages: page %p is not read-only",
4220 m));
4221 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4222 pgoff);
4223 }
4224 } else {
4225 len = 0;
4226 ncount = 0;
4227 }
4228 if (ncount < pcount) {
4229 for (i = ncount; i < pcount; i++) {
4230 rtvals[i] = zfs_vm_pagerret_bad;
4231 }
4232 }
4233 }
4234 zfs_vmobject_wunlock(object);
4235
4236 if (ncount == 0)
4237 goto out;
4238
4239 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4240 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4241 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4242 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4243 zp->z_projid))) {
4244 goto out;
4245 }
4246
4247 tx = dmu_tx_create(zfsvfs->z_os);
4248 dmu_tx_hold_write(tx, zp->z_id, off, len);
4249
4250 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4251 zfs_sa_upgrade_txholds(tx, zp);
4252 err = dmu_tx_assign(tx, TXG_WAIT);
4253 if (err != 0) {
4254 dmu_tx_abort(tx);
4255 goto out;
4256 }
4257
4258 if (zp->z_blksz < PAGE_SIZE) {
4259 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4260 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4261 va = zfs_map_page(ma[i], &sf);
4262 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4263 zfs_unmap_page(sf);
4264 }
4265 } else {
4266 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4267 }
4268
4269 if (err == 0) {
4270 uint64_t mtime[2], ctime[2];
4271 sa_bulk_attr_t bulk[3];
4272 int count = 0;
4273
4274 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4275 &mtime, 16);
4276 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4277 &ctime, 16);
4278 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4279 &zp->z_pflags, 8);
4280 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4281 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4282 ASSERT0(err);
4283 /*
4284 * XXX we should be passing a callback to undirty
4285 * but that would make the locking messier
4286 */
4287 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4288 len, 0, NULL, NULL);
4289
4290 zfs_vmobject_wlock(object);
4291 for (i = 0; i < ncount; i++) {
4292 rtvals[i] = zfs_vm_pagerret_ok;
4293 vm_page_undirty(ma[i]);
4294 }
4295 zfs_vmobject_wunlock(object);
4296 VM_CNT_INC(v_vnodeout);
4297 VM_CNT_ADD(v_vnodepgsout, ncount);
4298 }
4299 dmu_tx_commit(tx);
4300
4301 out:
4302 zfs_rangelock_exit(lr);
4303 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4304 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4305 zil_commit(zfsvfs->z_log, zp->z_id);
4306
4307 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4308
4309 zfs_exit(zfsvfs, FTAG);
4310 return (rtvals[0]);
4311 }
4312
4313 #ifndef _SYS_SYSPROTO_H_
4314 struct vop_putpages_args {
4315 struct vnode *a_vp;
4316 vm_page_t *a_m;
4317 int a_count;
4318 int a_sync;
4319 int *a_rtvals;
4320 };
4321 #endif
4322
4323 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4324 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4325 {
4326
4327 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4328 ap->a_rtvals));
4329 }
4330
4331 #ifndef _SYS_SYSPROTO_H_
4332 struct vop_bmap_args {
4333 struct vnode *a_vp;
4334 daddr_t a_bn;
4335 struct bufobj **a_bop;
4336 daddr_t *a_bnp;
4337 int *a_runp;
4338 int *a_runb;
4339 };
4340 #endif
4341
4342 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4343 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4344 {
4345
4346 if (ap->a_bop != NULL)
4347 *ap->a_bop = &ap->a_vp->v_bufobj;
4348 if (ap->a_bnp != NULL)
4349 *ap->a_bnp = ap->a_bn;
4350 if (ap->a_runp != NULL)
4351 *ap->a_runp = 0;
4352 if (ap->a_runb != NULL)
4353 *ap->a_runb = 0;
4354
4355 return (0);
4356 }
4357
4358 #ifndef _SYS_SYSPROTO_H_
4359 struct vop_open_args {
4360 struct vnode *a_vp;
4361 int a_mode;
4362 struct ucred *a_cred;
4363 struct thread *a_td;
4364 };
4365 #endif
4366
4367 static int
zfs_freebsd_open(struct vop_open_args * ap)4368 zfs_freebsd_open(struct vop_open_args *ap)
4369 {
4370 vnode_t *vp = ap->a_vp;
4371 znode_t *zp = VTOZ(vp);
4372 int error;
4373
4374 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4375 if (error == 0)
4376 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4377 return (error);
4378 }
4379
4380 #ifndef _SYS_SYSPROTO_H_
4381 struct vop_close_args {
4382 struct vnode *a_vp;
4383 int a_fflag;
4384 struct ucred *a_cred;
4385 struct thread *a_td;
4386 };
4387 #endif
4388
4389 static int
zfs_freebsd_close(struct vop_close_args * ap)4390 zfs_freebsd_close(struct vop_close_args *ap)
4391 {
4392
4393 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4394 }
4395
4396 #ifndef _SYS_SYSPROTO_H_
4397 struct vop_ioctl_args {
4398 struct vnode *a_vp;
4399 ulong_t a_command;
4400 caddr_t a_data;
4401 int a_fflag;
4402 struct ucred *cred;
4403 struct thread *td;
4404 };
4405 #endif
4406
4407 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4408 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4409 {
4410
4411 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4412 ap->a_fflag, ap->a_cred, NULL));
4413 }
4414
4415 static int
ioflags(int ioflags)4416 ioflags(int ioflags)
4417 {
4418 int flags = 0;
4419
4420 if (ioflags & IO_APPEND)
4421 flags |= O_APPEND;
4422 if (ioflags & IO_NDELAY)
4423 flags |= O_NONBLOCK;
4424 if (ioflags & IO_SYNC)
4425 flags |= O_SYNC;
4426
4427 return (flags);
4428 }
4429
4430 #ifndef _SYS_SYSPROTO_H_
4431 struct vop_read_args {
4432 struct vnode *a_vp;
4433 struct uio *a_uio;
4434 int a_ioflag;
4435 struct ucred *a_cred;
4436 };
4437 #endif
4438
4439 static int
zfs_freebsd_read(struct vop_read_args * ap)4440 zfs_freebsd_read(struct vop_read_args *ap)
4441 {
4442 zfs_uio_t uio;
4443 zfs_uio_init(&uio, ap->a_uio);
4444 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4445 ap->a_cred));
4446 }
4447
4448 #ifndef _SYS_SYSPROTO_H_
4449 struct vop_write_args {
4450 struct vnode *a_vp;
4451 struct uio *a_uio;
4452 int a_ioflag;
4453 struct ucred *a_cred;
4454 };
4455 #endif
4456
4457 static int
zfs_freebsd_write(struct vop_write_args * ap)4458 zfs_freebsd_write(struct vop_write_args *ap)
4459 {
4460 zfs_uio_t uio;
4461 zfs_uio_init(&uio, ap->a_uio);
4462 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4463 ap->a_cred));
4464 }
4465
4466 #if __FreeBSD_version >= 1300102
4467 /*
4468 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4469 * the comment above cache_fplookup for details.
4470 */
4471 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4472 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4473 {
4474 vnode_t *vp;
4475 znode_t *zp;
4476 uint64_t pflags;
4477
4478 vp = v->a_vp;
4479 zp = VTOZ_SMR(vp);
4480 if (__predict_false(zp == NULL))
4481 return (EAGAIN);
4482 pflags = atomic_load_64(&zp->z_pflags);
4483 if (pflags & ZFS_AV_QUARANTINED)
4484 return (EAGAIN);
4485 if (pflags & ZFS_XATTR)
4486 return (EAGAIN);
4487 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4488 return (EAGAIN);
4489 return (0);
4490 }
4491 #endif
4492
4493 #if __FreeBSD_version >= 1300139
4494 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4495 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4496 {
4497 vnode_t *vp;
4498 znode_t *zp;
4499 char *target;
4500
4501 vp = v->a_vp;
4502 zp = VTOZ_SMR(vp);
4503 if (__predict_false(zp == NULL)) {
4504 return (EAGAIN);
4505 }
4506
4507 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4508 if (target == NULL) {
4509 return (EAGAIN);
4510 }
4511 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4512 }
4513 #endif
4514
4515 #ifndef _SYS_SYSPROTO_H_
4516 struct vop_access_args {
4517 struct vnode *a_vp;
4518 accmode_t a_accmode;
4519 struct ucred *a_cred;
4520 struct thread *a_td;
4521 };
4522 #endif
4523
4524 static int
zfs_freebsd_access(struct vop_access_args * ap)4525 zfs_freebsd_access(struct vop_access_args *ap)
4526 {
4527 vnode_t *vp = ap->a_vp;
4528 znode_t *zp = VTOZ(vp);
4529 accmode_t accmode;
4530 int error = 0;
4531
4532
4533 if (ap->a_accmode == VEXEC) {
4534 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4535 return (0);
4536 }
4537
4538 /*
4539 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4540 */
4541 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4542 if (accmode != 0)
4543 error = zfs_access(zp, accmode, 0, ap->a_cred);
4544
4545 /*
4546 * VADMIN has to be handled by vaccess().
4547 */
4548 if (error == 0) {
4549 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4550 if (accmode != 0) {
4551 #if __FreeBSD_version >= 1300105
4552 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4553 zp->z_gid, accmode, ap->a_cred);
4554 #else
4555 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4556 zp->z_gid, accmode, ap->a_cred, NULL);
4557 #endif
4558 }
4559 }
4560
4561 /*
4562 * For VEXEC, ensure that at least one execute bit is set for
4563 * non-directories.
4564 */
4565 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4566 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4567 error = EACCES;
4568 }
4569
4570 return (error);
4571 }
4572
4573 #ifndef _SYS_SYSPROTO_H_
4574 struct vop_lookup_args {
4575 struct vnode *a_dvp;
4576 struct vnode **a_vpp;
4577 struct componentname *a_cnp;
4578 };
4579 #endif
4580
4581 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4582 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4583 {
4584 struct componentname *cnp = ap->a_cnp;
4585 char nm[NAME_MAX + 1];
4586
4587 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4588 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4589
4590 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4591 cnp->cn_cred, 0, cached));
4592 }
4593
4594 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4595 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4596 {
4597
4598 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4599 }
4600
4601 #ifndef _SYS_SYSPROTO_H_
4602 struct vop_lookup_args {
4603 struct vnode *a_dvp;
4604 struct vnode **a_vpp;
4605 struct componentname *a_cnp;
4606 };
4607 #endif
4608
4609 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4610 zfs_cache_lookup(struct vop_lookup_args *ap)
4611 {
4612 zfsvfs_t *zfsvfs;
4613
4614 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4615 if (zfsvfs->z_use_namecache)
4616 return (vfs_cache_lookup(ap));
4617 else
4618 return (zfs_freebsd_lookup(ap, B_FALSE));
4619 }
4620
4621 #ifndef _SYS_SYSPROTO_H_
4622 struct vop_create_args {
4623 struct vnode *a_dvp;
4624 struct vnode **a_vpp;
4625 struct componentname *a_cnp;
4626 struct vattr *a_vap;
4627 };
4628 #endif
4629
4630 static int
zfs_freebsd_create(struct vop_create_args * ap)4631 zfs_freebsd_create(struct vop_create_args *ap)
4632 {
4633 zfsvfs_t *zfsvfs;
4634 struct componentname *cnp = ap->a_cnp;
4635 vattr_t *vap = ap->a_vap;
4636 znode_t *zp = NULL;
4637 int rc, mode;
4638
4639 #if __FreeBSD_version < 1400068
4640 ASSERT(cnp->cn_flags & SAVENAME);
4641 #endif
4642
4643 vattr_init_mask(vap);
4644 mode = vap->va_mode & ALLPERMS;
4645 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4646 *ap->a_vpp = NULL;
4647
4648 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4649 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4650 if (rc == 0)
4651 *ap->a_vpp = ZTOV(zp);
4652 if (zfsvfs->z_use_namecache &&
4653 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4654 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4655
4656 return (rc);
4657 }
4658
4659 #ifndef _SYS_SYSPROTO_H_
4660 struct vop_remove_args {
4661 struct vnode *a_dvp;
4662 struct vnode *a_vp;
4663 struct componentname *a_cnp;
4664 };
4665 #endif
4666
4667 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4668 zfs_freebsd_remove(struct vop_remove_args *ap)
4669 {
4670
4671 #if __FreeBSD_version < 1400068
4672 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4673 #endif
4674
4675 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4676 ap->a_cnp->cn_cred));
4677 }
4678
4679 #ifndef _SYS_SYSPROTO_H_
4680 struct vop_mkdir_args {
4681 struct vnode *a_dvp;
4682 struct vnode **a_vpp;
4683 struct componentname *a_cnp;
4684 struct vattr *a_vap;
4685 };
4686 #endif
4687
4688 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4689 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4690 {
4691 vattr_t *vap = ap->a_vap;
4692 znode_t *zp = NULL;
4693 int rc;
4694
4695 #if __FreeBSD_version < 1400068
4696 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4697 #endif
4698
4699 vattr_init_mask(vap);
4700 *ap->a_vpp = NULL;
4701
4702 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4703 ap->a_cnp->cn_cred, 0, NULL, NULL);
4704
4705 if (rc == 0)
4706 *ap->a_vpp = ZTOV(zp);
4707 return (rc);
4708 }
4709
4710 #ifndef _SYS_SYSPROTO_H_
4711 struct vop_rmdir_args {
4712 struct vnode *a_dvp;
4713 struct vnode *a_vp;
4714 struct componentname *a_cnp;
4715 };
4716 #endif
4717
4718 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4719 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4720 {
4721 struct componentname *cnp = ap->a_cnp;
4722
4723 #if __FreeBSD_version < 1400068
4724 ASSERT(cnp->cn_flags & SAVENAME);
4725 #endif
4726
4727 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4728 }
4729
4730 #ifndef _SYS_SYSPROTO_H_
4731 struct vop_readdir_args {
4732 struct vnode *a_vp;
4733 struct uio *a_uio;
4734 struct ucred *a_cred;
4735 int *a_eofflag;
4736 int *a_ncookies;
4737 cookie_t **a_cookies;
4738 };
4739 #endif
4740
4741 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4742 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4743 {
4744 zfs_uio_t uio;
4745 zfs_uio_init(&uio, ap->a_uio);
4746 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4747 ap->a_ncookies, ap->a_cookies));
4748 }
4749
4750 #ifndef _SYS_SYSPROTO_H_
4751 struct vop_fsync_args {
4752 struct vnode *a_vp;
4753 int a_waitfor;
4754 struct thread *a_td;
4755 };
4756 #endif
4757
4758 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4759 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4760 {
4761
4762 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4763 }
4764
4765 #ifndef _SYS_SYSPROTO_H_
4766 struct vop_getattr_args {
4767 struct vnode *a_vp;
4768 struct vattr *a_vap;
4769 struct ucred *a_cred;
4770 };
4771 #endif
4772
4773 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4774 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4775 {
4776 vattr_t *vap = ap->a_vap;
4777 xvattr_t xvap;
4778 ulong_t fflags = 0;
4779 int error;
4780
4781 xva_init(&xvap);
4782 xvap.xva_vattr = *vap;
4783 xvap.xva_vattr.va_mask |= AT_XVATTR;
4784
4785 /* Convert chflags into ZFS-type flags. */
4786 /* XXX: what about SF_SETTABLE?. */
4787 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4788 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4789 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4790 XVA_SET_REQ(&xvap, XAT_NODUMP);
4791 XVA_SET_REQ(&xvap, XAT_READONLY);
4792 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4793 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4794 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4795 XVA_SET_REQ(&xvap, XAT_REPARSE);
4796 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4797 XVA_SET_REQ(&xvap, XAT_SPARSE);
4798
4799 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4800 if (error != 0)
4801 return (error);
4802
4803 /* Convert ZFS xattr into chflags. */
4804 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4805 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4806 fflags |= (fflag); \
4807 } while (0)
4808 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4809 xvap.xva_xoptattrs.xoa_immutable);
4810 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4811 xvap.xva_xoptattrs.xoa_appendonly);
4812 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4813 xvap.xva_xoptattrs.xoa_nounlink);
4814 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4815 xvap.xva_xoptattrs.xoa_archive);
4816 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4817 xvap.xva_xoptattrs.xoa_nodump);
4818 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4819 xvap.xva_xoptattrs.xoa_readonly);
4820 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4821 xvap.xva_xoptattrs.xoa_system);
4822 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4823 xvap.xva_xoptattrs.xoa_hidden);
4824 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4825 xvap.xva_xoptattrs.xoa_reparse);
4826 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4827 xvap.xva_xoptattrs.xoa_offline);
4828 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4829 xvap.xva_xoptattrs.xoa_sparse);
4830
4831 #undef FLAG_CHECK
4832 *vap = xvap.xva_vattr;
4833 vap->va_flags = fflags;
4834 return (0);
4835 }
4836
4837 #ifndef _SYS_SYSPROTO_H_
4838 struct vop_setattr_args {
4839 struct vnode *a_vp;
4840 struct vattr *a_vap;
4841 struct ucred *a_cred;
4842 };
4843 #endif
4844
4845 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)4846 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4847 {
4848 vnode_t *vp = ap->a_vp;
4849 vattr_t *vap = ap->a_vap;
4850 cred_t *cred = ap->a_cred;
4851 xvattr_t xvap;
4852 ulong_t fflags;
4853 uint64_t zflags;
4854
4855 vattr_init_mask(vap);
4856 vap->va_mask &= ~AT_NOSET;
4857
4858 xva_init(&xvap);
4859 xvap.xva_vattr = *vap;
4860
4861 zflags = VTOZ(vp)->z_pflags;
4862
4863 if (vap->va_flags != VNOVAL) {
4864 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4865 int error;
4866
4867 if (zfsvfs->z_use_fuids == B_FALSE)
4868 return (EOPNOTSUPP);
4869
4870 fflags = vap->va_flags;
4871 /*
4872 * XXX KDM
4873 * We need to figure out whether it makes sense to allow
4874 * UF_REPARSE through, since we don't really have other
4875 * facilities to handle reparse points and zfs_setattr()
4876 * doesn't currently allow setting that attribute anyway.
4877 */
4878 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4879 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4880 UF_OFFLINE|UF_SPARSE)) != 0)
4881 return (EOPNOTSUPP);
4882 /*
4883 * Unprivileged processes are not permitted to unset system
4884 * flags, or modify flags if any system flags are set.
4885 * Privileged non-jail processes may not modify system flags
4886 * if securelevel > 0 and any existing system flags are set.
4887 * Privileged jail processes behave like privileged non-jail
4888 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4889 * otherwise, they behave like unprivileged processes.
4890 */
4891 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4892 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4893 if (zflags &
4894 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4895 error = securelevel_gt(cred, 0);
4896 if (error != 0)
4897 return (error);
4898 }
4899 } else {
4900 /*
4901 * Callers may only modify the file flags on
4902 * objects they have VADMIN rights for.
4903 */
4904 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4905 curthread)) != 0)
4906 return (error);
4907 if (zflags &
4908 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4909 ZFS_NOUNLINK)) {
4910 return (EPERM);
4911 }
4912 if (fflags &
4913 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4914 return (EPERM);
4915 }
4916 }
4917
4918 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4919 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4920 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4921 XVA_SET_REQ(&xvap, (xflag)); \
4922 (xfield) = ((fflags & (fflag)) != 0); \
4923 } \
4924 } while (0)
4925 /* Convert chflags into ZFS-type flags. */
4926 /* XXX: what about SF_SETTABLE?. */
4927 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4928 xvap.xva_xoptattrs.xoa_immutable);
4929 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4930 xvap.xva_xoptattrs.xoa_appendonly);
4931 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4932 xvap.xva_xoptattrs.xoa_nounlink);
4933 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4934 xvap.xva_xoptattrs.xoa_archive);
4935 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4936 xvap.xva_xoptattrs.xoa_nodump);
4937 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4938 xvap.xva_xoptattrs.xoa_readonly);
4939 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4940 xvap.xva_xoptattrs.xoa_system);
4941 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4942 xvap.xva_xoptattrs.xoa_hidden);
4943 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4944 xvap.xva_xoptattrs.xoa_reparse);
4945 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4946 xvap.xva_xoptattrs.xoa_offline);
4947 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4948 xvap.xva_xoptattrs.xoa_sparse);
4949 #undef FLAG_CHANGE
4950 }
4951 if (vap->va_birthtime.tv_sec != VNOVAL) {
4952 xvap.xva_vattr.va_mask |= AT_XVATTR;
4953 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4954 }
4955 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
4956 }
4957
4958 #ifndef _SYS_SYSPROTO_H_
4959 struct vop_rename_args {
4960 struct vnode *a_fdvp;
4961 struct vnode *a_fvp;
4962 struct componentname *a_fcnp;
4963 struct vnode *a_tdvp;
4964 struct vnode *a_tvp;
4965 struct componentname *a_tcnp;
4966 };
4967 #endif
4968
4969 static int
zfs_freebsd_rename(struct vop_rename_args * ap)4970 zfs_freebsd_rename(struct vop_rename_args *ap)
4971 {
4972 vnode_t *fdvp = ap->a_fdvp;
4973 vnode_t *fvp = ap->a_fvp;
4974 vnode_t *tdvp = ap->a_tdvp;
4975 vnode_t *tvp = ap->a_tvp;
4976 int error;
4977
4978 #if __FreeBSD_version < 1400068
4979 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4980 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4981 #endif
4982
4983 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4984 ap->a_tcnp, ap->a_fcnp->cn_cred);
4985
4986 vrele(fdvp);
4987 vrele(fvp);
4988 vrele(tdvp);
4989 if (tvp != NULL)
4990 vrele(tvp);
4991
4992 return (error);
4993 }
4994
4995 #ifndef _SYS_SYSPROTO_H_
4996 struct vop_symlink_args {
4997 struct vnode *a_dvp;
4998 struct vnode **a_vpp;
4999 struct componentname *a_cnp;
5000 struct vattr *a_vap;
5001 char *a_target;
5002 };
5003 #endif
5004
5005 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)5006 zfs_freebsd_symlink(struct vop_symlink_args *ap)
5007 {
5008 struct componentname *cnp = ap->a_cnp;
5009 vattr_t *vap = ap->a_vap;
5010 znode_t *zp = NULL;
5011 #if __FreeBSD_version >= 1300139
5012 char *symlink;
5013 size_t symlink_len;
5014 #endif
5015 int rc;
5016
5017 #if __FreeBSD_version < 1400068
5018 ASSERT(cnp->cn_flags & SAVENAME);
5019 #endif
5020
5021 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
5022 vattr_init_mask(vap);
5023 *ap->a_vpp = NULL;
5024
5025 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
5026 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
5027 if (rc == 0) {
5028 *ap->a_vpp = ZTOV(zp);
5029 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
5030 #if __FreeBSD_version >= 1300139
5031 MPASS(zp->z_cached_symlink == NULL);
5032 symlink_len = strlen(ap->a_target);
5033 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5034 if (symlink != NULL) {
5035 memcpy(symlink, ap->a_target, symlink_len);
5036 symlink[symlink_len] = '\0';
5037 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5038 (uintptr_t)symlink);
5039 }
5040 #endif
5041 }
5042 return (rc);
5043 }
5044
5045 #ifndef _SYS_SYSPROTO_H_
5046 struct vop_readlink_args {
5047 struct vnode *a_vp;
5048 struct uio *a_uio;
5049 struct ucred *a_cred;
5050 };
5051 #endif
5052
5053 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)5054 zfs_freebsd_readlink(struct vop_readlink_args *ap)
5055 {
5056 zfs_uio_t uio;
5057 int error;
5058 #if __FreeBSD_version >= 1300139
5059 znode_t *zp = VTOZ(ap->a_vp);
5060 char *symlink, *base;
5061 size_t symlink_len;
5062 bool trycache;
5063 #endif
5064
5065 zfs_uio_init(&uio, ap->a_uio);
5066 #if __FreeBSD_version >= 1300139
5067 trycache = false;
5068 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
5069 zfs_uio_iovcnt(&uio) == 1) {
5070 base = zfs_uio_iovbase(&uio, 0);
5071 symlink_len = zfs_uio_iovlen(&uio, 0);
5072 trycache = true;
5073 }
5074 #endif
5075 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5076 #if __FreeBSD_version >= 1300139
5077 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5078 error != 0 || !trycache) {
5079 return (error);
5080 }
5081 symlink_len -= zfs_uio_resid(&uio);
5082 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5083 if (symlink != NULL) {
5084 memcpy(symlink, base, symlink_len);
5085 symlink[symlink_len] = '\0';
5086 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5087 (uintptr_t)NULL, (uintptr_t)symlink)) {
5088 cache_symlink_free(symlink, symlink_len + 1);
5089 }
5090 }
5091 #endif
5092 return (error);
5093 }
5094
5095 #ifndef _SYS_SYSPROTO_H_
5096 struct vop_link_args {
5097 struct vnode *a_tdvp;
5098 struct vnode *a_vp;
5099 struct componentname *a_cnp;
5100 };
5101 #endif
5102
5103 static int
zfs_freebsd_link(struct vop_link_args * ap)5104 zfs_freebsd_link(struct vop_link_args *ap)
5105 {
5106 struct componentname *cnp = ap->a_cnp;
5107 vnode_t *vp = ap->a_vp;
5108 vnode_t *tdvp = ap->a_tdvp;
5109
5110 if (tdvp->v_mount != vp->v_mount)
5111 return (EXDEV);
5112
5113 #if __FreeBSD_version < 1400068
5114 ASSERT(cnp->cn_flags & SAVENAME);
5115 #endif
5116
5117 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5118 cnp->cn_nameptr, cnp->cn_cred, 0));
5119 }
5120
5121 #ifndef _SYS_SYSPROTO_H_
5122 struct vop_inactive_args {
5123 struct vnode *a_vp;
5124 struct thread *a_td;
5125 };
5126 #endif
5127
5128 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5129 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5130 {
5131 vnode_t *vp = ap->a_vp;
5132
5133 #if __FreeBSD_version >= 1300123
5134 zfs_inactive(vp, curthread->td_ucred, NULL);
5135 #else
5136 zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5137 #endif
5138 return (0);
5139 }
5140
5141 #if __FreeBSD_version >= 1300042
5142 #ifndef _SYS_SYSPROTO_H_
5143 struct vop_need_inactive_args {
5144 struct vnode *a_vp;
5145 struct thread *a_td;
5146 };
5147 #endif
5148
5149 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5150 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5151 {
5152 vnode_t *vp = ap->a_vp;
5153 znode_t *zp = VTOZ(vp);
5154 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5155 int need;
5156
5157 if (vn_need_pageq_flush(vp))
5158 return (1);
5159
5160 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5161 return (1);
5162 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5163 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5164
5165 return (need);
5166 }
5167 #endif
5168
5169 #ifndef _SYS_SYSPROTO_H_
5170 struct vop_reclaim_args {
5171 struct vnode *a_vp;
5172 struct thread *a_td;
5173 };
5174 #endif
5175
5176 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5177 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5178 {
5179 vnode_t *vp = ap->a_vp;
5180 znode_t *zp = VTOZ(vp);
5181 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5182
5183 ASSERT3P(zp, !=, NULL);
5184
5185 #if __FreeBSD_version < 1300042
5186 /* Destroy the vm object and flush associated pages. */
5187 vnode_destroy_vobject(vp);
5188 #endif
5189 /*
5190 * z_teardown_inactive_lock protects from a race with
5191 * zfs_znode_dmu_fini in zfsvfs_teardown during
5192 * force unmount.
5193 */
5194 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5195 if (zp->z_sa_hdl == NULL)
5196 zfs_znode_free(zp);
5197 else
5198 zfs_zinactive(zp);
5199 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5200
5201 vp->v_data = NULL;
5202 return (0);
5203 }
5204
5205 #ifndef _SYS_SYSPROTO_H_
5206 struct vop_fid_args {
5207 struct vnode *a_vp;
5208 struct fid *a_fid;
5209 };
5210 #endif
5211
5212 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5213 zfs_freebsd_fid(struct vop_fid_args *ap)
5214 {
5215
5216 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5217 }
5218
5219
5220 #ifndef _SYS_SYSPROTO_H_
5221 struct vop_pathconf_args {
5222 struct vnode *a_vp;
5223 int a_name;
5224 register_t *a_retval;
5225 } *ap;
5226 #endif
5227
5228 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5229 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5230 {
5231 ulong_t val;
5232 int error;
5233
5234 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5235 curthread->td_ucred, NULL);
5236 if (error == 0) {
5237 *ap->a_retval = val;
5238 return (error);
5239 }
5240 if (error != EOPNOTSUPP)
5241 return (error);
5242
5243 switch (ap->a_name) {
5244 case _PC_NAME_MAX:
5245 *ap->a_retval = NAME_MAX;
5246 return (0);
5247 #if __FreeBSD_version >= 1400032
5248 case _PC_DEALLOC_PRESENT:
5249 *ap->a_retval = 1;
5250 return (0);
5251 #endif
5252 case _PC_PIPE_BUF:
5253 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5254 *ap->a_retval = PIPE_BUF;
5255 return (0);
5256 }
5257 return (EINVAL);
5258 default:
5259 return (vop_stdpathconf(ap));
5260 }
5261 }
5262
5263 static int zfs_xattr_compat = 1;
5264
5265 static int
zfs_check_attrname(const char * name)5266 zfs_check_attrname(const char *name)
5267 {
5268 /* We don't allow '/' character in attribute name. */
5269 if (strchr(name, '/') != NULL)
5270 return (SET_ERROR(EINVAL));
5271 /* We don't allow attribute names that start with a namespace prefix. */
5272 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5273 return (SET_ERROR(EINVAL));
5274 return (0);
5275 }
5276
5277 /*
5278 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5279 * extended attribute name:
5280 *
5281 * NAMESPACE XATTR_COMPAT PREFIX
5282 * system * freebsd:system:
5283 * user 1 (none, can be used to access ZFS
5284 * fsattr(5) attributes created on Solaris)
5285 * user 0 user.
5286 */
5287 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size,boolean_t compat)5288 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5289 size_t size, boolean_t compat)
5290 {
5291 const char *namespace, *prefix, *suffix;
5292
5293 memset(attrname, 0, size);
5294
5295 switch (attrnamespace) {
5296 case EXTATTR_NAMESPACE_USER:
5297 if (compat) {
5298 /*
5299 * This is the default namespace by which we can access
5300 * all attributes created on Solaris.
5301 */
5302 prefix = namespace = suffix = "";
5303 } else {
5304 /*
5305 * This is compatible with the user namespace encoding
5306 * on Linux prior to xattr_compat, but nothing
5307 * else.
5308 */
5309 prefix = "";
5310 namespace = "user";
5311 suffix = ".";
5312 }
5313 break;
5314 case EXTATTR_NAMESPACE_SYSTEM:
5315 prefix = "freebsd:";
5316 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5317 suffix = ":";
5318 break;
5319 case EXTATTR_NAMESPACE_EMPTY:
5320 default:
5321 return (SET_ERROR(EINVAL));
5322 }
5323 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5324 name) >= size) {
5325 return (SET_ERROR(ENAMETOOLONG));
5326 }
5327 return (0);
5328 }
5329
5330 static int
zfs_ensure_xattr_cached(znode_t * zp)5331 zfs_ensure_xattr_cached(znode_t *zp)
5332 {
5333 int error = 0;
5334
5335 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5336
5337 if (zp->z_xattr_cached != NULL)
5338 return (0);
5339
5340 if (rw_write_held(&zp->z_xattr_lock))
5341 return (zfs_sa_get_xattr(zp));
5342
5343 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5344 rw_exit(&zp->z_xattr_lock);
5345 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5346 }
5347 if (zp->z_xattr_cached == NULL)
5348 error = zfs_sa_get_xattr(zp);
5349 rw_downgrade(&zp->z_xattr_lock);
5350 return (error);
5351 }
5352
5353 #ifndef _SYS_SYSPROTO_H_
5354 struct vop_getextattr {
5355 IN struct vnode *a_vp;
5356 IN int a_attrnamespace;
5357 IN const char *a_name;
5358 INOUT struct uio *a_uio;
5359 OUT size_t *a_size;
5360 IN struct ucred *a_cred;
5361 IN struct thread *a_td;
5362 };
5363 #endif
5364
5365 static int
zfs_getextattr_dir(struct vop_getextattr_args * ap,const char * attrname)5366 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5367 {
5368 struct thread *td = ap->a_td;
5369 struct nameidata nd;
5370 struct vattr va;
5371 vnode_t *xvp = NULL, *vp;
5372 int error, flags;
5373
5374 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5375 LOOKUP_XATTR, B_FALSE);
5376 if (error != 0)
5377 return (error);
5378
5379 flags = FREAD;
5380 #if __FreeBSD_version < 1400043
5381 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5382 xvp, td);
5383 #else
5384 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5385 #endif
5386 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5387 if (error != 0)
5388 return (SET_ERROR(error));
5389 vp = nd.ni_vp;
5390 NDFREE_PNBUF(&nd);
5391
5392 if (ap->a_size != NULL) {
5393 error = VOP_GETATTR(vp, &va, ap->a_cred);
5394 if (error == 0)
5395 *ap->a_size = (size_t)va.va_size;
5396 } else if (ap->a_uio != NULL)
5397 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5398
5399 VOP_UNLOCK1(vp);
5400 vn_close(vp, flags, ap->a_cred, td);
5401 return (error);
5402 }
5403
5404 static int
zfs_getextattr_sa(struct vop_getextattr_args * ap,const char * attrname)5405 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5406 {
5407 znode_t *zp = VTOZ(ap->a_vp);
5408 uchar_t *nv_value;
5409 uint_t nv_size;
5410 int error;
5411
5412 error = zfs_ensure_xattr_cached(zp);
5413 if (error != 0)
5414 return (error);
5415
5416 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5417 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5418
5419 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5420 &nv_value, &nv_size);
5421 if (error != 0)
5422 return (SET_ERROR(error));
5423
5424 if (ap->a_size != NULL)
5425 *ap->a_size = nv_size;
5426 else if (ap->a_uio != NULL)
5427 error = uiomove(nv_value, nv_size, ap->a_uio);
5428 if (error != 0)
5429 return (SET_ERROR(error));
5430
5431 return (0);
5432 }
5433
5434 static int
zfs_getextattr_impl(struct vop_getextattr_args * ap,boolean_t compat)5435 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5436 {
5437 znode_t *zp = VTOZ(ap->a_vp);
5438 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5439 char attrname[EXTATTR_MAXNAMELEN+1];
5440 int error;
5441
5442 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5443 sizeof (attrname), compat);
5444 if (error != 0)
5445 return (error);
5446
5447 error = ENOENT;
5448 if (zfsvfs->z_use_sa && zp->z_is_sa)
5449 error = zfs_getextattr_sa(ap, attrname);
5450 if (error == ENOENT)
5451 error = zfs_getextattr_dir(ap, attrname);
5452 return (error);
5453 }
5454
5455 /*
5456 * Vnode operation to retrieve a named extended attribute.
5457 */
5458 static int
zfs_getextattr(struct vop_getextattr_args * ap)5459 zfs_getextattr(struct vop_getextattr_args *ap)
5460 {
5461 znode_t *zp = VTOZ(ap->a_vp);
5462 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5463 int error;
5464
5465 /*
5466 * If the xattr property is off, refuse the request.
5467 */
5468 if (!(zfsvfs->z_flags & ZSB_XATTR))
5469 return (SET_ERROR(EOPNOTSUPP));
5470
5471 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5472 ap->a_cred, ap->a_td, VREAD);
5473 if (error != 0)
5474 return (SET_ERROR(error));
5475
5476 error = zfs_check_attrname(ap->a_name);
5477 if (error != 0)
5478 return (error);
5479
5480 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5481 return (error);
5482 error = ENOENT;
5483 rw_enter(&zp->z_xattr_lock, RW_READER);
5484
5485 error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5486 if ((error == ENOENT || error == ENOATTR) &&
5487 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5488 /*
5489 * Fall back to the alternate namespace format if we failed to
5490 * find a user xattr.
5491 */
5492 error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5493 }
5494
5495 rw_exit(&zp->z_xattr_lock);
5496 zfs_exit(zfsvfs, FTAG);
5497 if (error == ENOENT)
5498 error = SET_ERROR(ENOATTR);
5499 return (error);
5500 }
5501
5502 #ifndef _SYS_SYSPROTO_H_
5503 struct vop_deleteextattr {
5504 IN struct vnode *a_vp;
5505 IN int a_attrnamespace;
5506 IN const char *a_name;
5507 IN struct ucred *a_cred;
5508 IN struct thread *a_td;
5509 };
5510 #endif
5511
5512 static int
zfs_deleteextattr_dir(struct vop_deleteextattr_args * ap,const char * attrname)5513 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5514 {
5515 struct nameidata nd;
5516 vnode_t *xvp = NULL, *vp;
5517 int error;
5518
5519 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5520 LOOKUP_XATTR, B_FALSE);
5521 if (error != 0)
5522 return (error);
5523
5524 #if __FreeBSD_version < 1400043
5525 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5526 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5527 #else
5528 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5529 UIO_SYSSPACE, attrname, xvp);
5530 #endif
5531 error = namei(&nd);
5532 if (error != 0)
5533 return (SET_ERROR(error));
5534
5535 vp = nd.ni_vp;
5536 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5537 NDFREE_PNBUF(&nd);
5538
5539 vput(nd.ni_dvp);
5540 if (vp == nd.ni_dvp)
5541 vrele(vp);
5542 else
5543 vput(vp);
5544
5545 return (error);
5546 }
5547
5548 static int
zfs_deleteextattr_sa(struct vop_deleteextattr_args * ap,const char * attrname)5549 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5550 {
5551 znode_t *zp = VTOZ(ap->a_vp);
5552 nvlist_t *nvl;
5553 int error;
5554
5555 error = zfs_ensure_xattr_cached(zp);
5556 if (error != 0)
5557 return (error);
5558
5559 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5560 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5561
5562 nvl = zp->z_xattr_cached;
5563 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5564 if (error != 0)
5565 error = SET_ERROR(error);
5566 else
5567 error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5568 if (error != 0) {
5569 zp->z_xattr_cached = NULL;
5570 nvlist_free(nvl);
5571 }
5572 return (error);
5573 }
5574
5575 static int
zfs_deleteextattr_impl(struct vop_deleteextattr_args * ap,boolean_t compat)5576 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5577 {
5578 znode_t *zp = VTOZ(ap->a_vp);
5579 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5580 char attrname[EXTATTR_MAXNAMELEN+1];
5581 int error;
5582
5583 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5584 sizeof (attrname), compat);
5585 if (error != 0)
5586 return (error);
5587
5588 error = ENOENT;
5589 if (zfsvfs->z_use_sa && zp->z_is_sa)
5590 error = zfs_deleteextattr_sa(ap, attrname);
5591 if (error == ENOENT)
5592 error = zfs_deleteextattr_dir(ap, attrname);
5593 return (error);
5594 }
5595
5596 /*
5597 * Vnode operation to remove a named attribute.
5598 */
5599 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5600 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5601 {
5602 znode_t *zp = VTOZ(ap->a_vp);
5603 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5604 int error;
5605
5606 /*
5607 * If the xattr property is off, refuse the request.
5608 */
5609 if (!(zfsvfs->z_flags & ZSB_XATTR))
5610 return (SET_ERROR(EOPNOTSUPP));
5611
5612 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5613 ap->a_cred, ap->a_td, VWRITE);
5614 if (error != 0)
5615 return (SET_ERROR(error));
5616
5617 error = zfs_check_attrname(ap->a_name);
5618 if (error != 0)
5619 return (error);
5620
5621 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5622 return (error);
5623 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5624
5625 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5626 if ((error == ENOENT || error == ENOATTR) &&
5627 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5628 /*
5629 * Fall back to the alternate namespace format if we failed to
5630 * find a user xattr.
5631 */
5632 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5633 }
5634
5635 rw_exit(&zp->z_xattr_lock);
5636 zfs_exit(zfsvfs, FTAG);
5637 if (error == ENOENT)
5638 error = SET_ERROR(ENOATTR);
5639 return (error);
5640 }
5641
5642 #ifndef _SYS_SYSPROTO_H_
5643 struct vop_setextattr {
5644 IN struct vnode *a_vp;
5645 IN int a_attrnamespace;
5646 IN const char *a_name;
5647 INOUT struct uio *a_uio;
5648 IN struct ucred *a_cred;
5649 IN struct thread *a_td;
5650 };
5651 #endif
5652
5653 static int
zfs_setextattr_dir(struct vop_setextattr_args * ap,const char * attrname)5654 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5655 {
5656 struct thread *td = ap->a_td;
5657 struct nameidata nd;
5658 struct vattr va;
5659 vnode_t *xvp = NULL, *vp;
5660 int error, flags;
5661
5662 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5663 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5664 if (error != 0)
5665 return (error);
5666
5667 flags = FFLAGS(O_WRONLY | O_CREAT);
5668 #if __FreeBSD_version < 1400043
5669 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5670 #else
5671 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5672 #endif
5673 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5674 NULL);
5675 if (error != 0)
5676 return (SET_ERROR(error));
5677 vp = nd.ni_vp;
5678 NDFREE_PNBUF(&nd);
5679
5680 VATTR_NULL(&va);
5681 va.va_size = 0;
5682 error = VOP_SETATTR(vp, &va, ap->a_cred);
5683 if (error == 0)
5684 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5685
5686 VOP_UNLOCK1(vp);
5687 vn_close(vp, flags, ap->a_cred, td);
5688 return (error);
5689 }
5690
5691 static int
zfs_setextattr_sa(struct vop_setextattr_args * ap,const char * attrname)5692 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5693 {
5694 znode_t *zp = VTOZ(ap->a_vp);
5695 nvlist_t *nvl;
5696 size_t sa_size;
5697 int error;
5698
5699 error = zfs_ensure_xattr_cached(zp);
5700 if (error != 0)
5701 return (error);
5702
5703 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5704 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5705
5706 nvl = zp->z_xattr_cached;
5707 size_t entry_size = ap->a_uio->uio_resid;
5708 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5709 return (SET_ERROR(EFBIG));
5710 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5711 if (error != 0)
5712 return (SET_ERROR(error));
5713 if (sa_size > DXATTR_MAX_SA_SIZE)
5714 return (SET_ERROR(EFBIG));
5715 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5716 error = uiomove(buf, entry_size, ap->a_uio);
5717 if (error != 0) {
5718 error = SET_ERROR(error);
5719 } else {
5720 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5721 if (error != 0)
5722 error = SET_ERROR(error);
5723 }
5724 if (error == 0)
5725 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5726 kmem_free(buf, entry_size);
5727 if (error != 0) {
5728 zp->z_xattr_cached = NULL;
5729 nvlist_free(nvl);
5730 }
5731 return (error);
5732 }
5733
5734 static int
zfs_setextattr_impl(struct vop_setextattr_args * ap,boolean_t compat)5735 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5736 {
5737 znode_t *zp = VTOZ(ap->a_vp);
5738 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5739 char attrname[EXTATTR_MAXNAMELEN+1];
5740 int error;
5741
5742 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5743 sizeof (attrname), compat);
5744 if (error != 0)
5745 return (error);
5746
5747 struct vop_deleteextattr_args vda = {
5748 .a_vp = ap->a_vp,
5749 .a_attrnamespace = ap->a_attrnamespace,
5750 .a_name = ap->a_name,
5751 .a_cred = ap->a_cred,
5752 .a_td = ap->a_td,
5753 };
5754 error = ENOENT;
5755 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5756 error = zfs_setextattr_sa(ap, attrname);
5757 if (error == 0) {
5758 /*
5759 * Successfully put into SA, we need to clear the one
5760 * in dir if present.
5761 */
5762 zfs_deleteextattr_dir(&vda, attrname);
5763 }
5764 }
5765 if (error != 0) {
5766 error = zfs_setextattr_dir(ap, attrname);
5767 if (error == 0 && zp->z_is_sa) {
5768 /*
5769 * Successfully put into dir, we need to clear the one
5770 * in SA if present.
5771 */
5772 zfs_deleteextattr_sa(&vda, attrname);
5773 }
5774 }
5775 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5776 /*
5777 * Also clear all versions of the alternate compat name.
5778 */
5779 zfs_deleteextattr_impl(&vda, !compat);
5780 }
5781 return (error);
5782 }
5783
5784 /*
5785 * Vnode operation to set a named attribute.
5786 */
5787 static int
zfs_setextattr(struct vop_setextattr_args * ap)5788 zfs_setextattr(struct vop_setextattr_args *ap)
5789 {
5790 znode_t *zp = VTOZ(ap->a_vp);
5791 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5792 int error;
5793
5794 /*
5795 * If the xattr property is off, refuse the request.
5796 */
5797 if (!(zfsvfs->z_flags & ZSB_XATTR))
5798 return (SET_ERROR(EOPNOTSUPP));
5799
5800 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5801 ap->a_cred, ap->a_td, VWRITE);
5802 if (error != 0)
5803 return (SET_ERROR(error));
5804
5805 error = zfs_check_attrname(ap->a_name);
5806 if (error != 0)
5807 return (error);
5808
5809 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5810 return (error);
5811 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5812
5813 error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5814
5815 rw_exit(&zp->z_xattr_lock);
5816 zfs_exit(zfsvfs, FTAG);
5817 return (error);
5818 }
5819
5820 #ifndef _SYS_SYSPROTO_H_
5821 struct vop_listextattr {
5822 IN struct vnode *a_vp;
5823 IN int a_attrnamespace;
5824 INOUT struct uio *a_uio;
5825 OUT size_t *a_size;
5826 IN struct ucred *a_cred;
5827 IN struct thread *a_td;
5828 };
5829 #endif
5830
5831 static int
zfs_listextattr_dir(struct vop_listextattr_args * ap,const char * attrprefix)5832 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5833 {
5834 struct thread *td = ap->a_td;
5835 struct nameidata nd;
5836 uint8_t dirbuf[sizeof (struct dirent)];
5837 struct iovec aiov;
5838 struct uio auio;
5839 vnode_t *xvp = NULL, *vp;
5840 int error, eof;
5841
5842 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5843 LOOKUP_XATTR, B_FALSE);
5844 if (error != 0) {
5845 /*
5846 * ENOATTR means that the EA directory does not yet exist,
5847 * i.e. there are no extended attributes there.
5848 */
5849 if (error == ENOATTR)
5850 error = 0;
5851 return (error);
5852 }
5853
5854 #if __FreeBSD_version < 1400043
5855 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5856 UIO_SYSSPACE, ".", xvp, td);
5857 #else
5858 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5859 UIO_SYSSPACE, ".", xvp);
5860 #endif
5861 error = namei(&nd);
5862 if (error != 0)
5863 return (SET_ERROR(error));
5864 vp = nd.ni_vp;
5865 NDFREE_PNBUF(&nd);
5866
5867 auio.uio_iov = &aiov;
5868 auio.uio_iovcnt = 1;
5869 auio.uio_segflg = UIO_SYSSPACE;
5870 auio.uio_td = td;
5871 auio.uio_rw = UIO_READ;
5872 auio.uio_offset = 0;
5873
5874 size_t plen = strlen(attrprefix);
5875
5876 do {
5877 aiov.iov_base = (void *)dirbuf;
5878 aiov.iov_len = sizeof (dirbuf);
5879 auio.uio_resid = sizeof (dirbuf);
5880 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5881 if (error != 0)
5882 break;
5883 int done = sizeof (dirbuf) - auio.uio_resid;
5884 for (int pos = 0; pos < done; ) {
5885 struct dirent *dp = (struct dirent *)(dirbuf + pos);
5886 pos += dp->d_reclen;
5887 /*
5888 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5889 * is what we get when attribute was created on Solaris.
5890 */
5891 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5892 continue;
5893 else if (plen == 0 &&
5894 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
5895 continue;
5896 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5897 continue;
5898 uint8_t nlen = dp->d_namlen - plen;
5899 if (ap->a_size != NULL) {
5900 *ap->a_size += 1 + nlen;
5901 } else if (ap->a_uio != NULL) {
5902 /*
5903 * Format of extattr name entry is one byte for
5904 * length and the rest for name.
5905 */
5906 error = uiomove(&nlen, 1, ap->a_uio);
5907 if (error == 0) {
5908 char *namep = dp->d_name + plen;
5909 error = uiomove(namep, nlen, ap->a_uio);
5910 }
5911 if (error != 0) {
5912 error = SET_ERROR(error);
5913 break;
5914 }
5915 }
5916 }
5917 } while (!eof && error == 0);
5918
5919 vput(vp);
5920 return (error);
5921 }
5922
5923 static int
zfs_listextattr_sa(struct vop_listextattr_args * ap,const char * attrprefix)5924 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5925 {
5926 znode_t *zp = VTOZ(ap->a_vp);
5927 int error;
5928
5929 error = zfs_ensure_xattr_cached(zp);
5930 if (error != 0)
5931 return (error);
5932
5933 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5934 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5935
5936 size_t plen = strlen(attrprefix);
5937 nvpair_t *nvp = NULL;
5938 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5939 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5940
5941 const char *name = nvpair_name(nvp);
5942 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5943 continue;
5944 else if (strncmp(name, attrprefix, plen) != 0)
5945 continue;
5946 uint8_t nlen = strlen(name) - plen;
5947 if (ap->a_size != NULL) {
5948 *ap->a_size += 1 + nlen;
5949 } else if (ap->a_uio != NULL) {
5950 /*
5951 * Format of extattr name entry is one byte for
5952 * length and the rest for name.
5953 */
5954 error = uiomove(&nlen, 1, ap->a_uio);
5955 if (error == 0) {
5956 char *namep = __DECONST(char *, name) + plen;
5957 error = uiomove(namep, nlen, ap->a_uio);
5958 }
5959 if (error != 0) {
5960 error = SET_ERROR(error);
5961 break;
5962 }
5963 }
5964 }
5965
5966 return (error);
5967 }
5968
5969 static int
zfs_listextattr_impl(struct vop_listextattr_args * ap,boolean_t compat)5970 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
5971 {
5972 znode_t *zp = VTOZ(ap->a_vp);
5973 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5974 char attrprefix[16];
5975 int error;
5976
5977 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5978 sizeof (attrprefix), compat);
5979 if (error != 0)
5980 return (error);
5981
5982 if (zfsvfs->z_use_sa && zp->z_is_sa)
5983 error = zfs_listextattr_sa(ap, attrprefix);
5984 if (error == 0)
5985 error = zfs_listextattr_dir(ap, attrprefix);
5986 return (error);
5987 }
5988
5989 /*
5990 * Vnode operation to retrieve extended attributes on a vnode.
5991 */
5992 static int
zfs_listextattr(struct vop_listextattr_args * ap)5993 zfs_listextattr(struct vop_listextattr_args *ap)
5994 {
5995 znode_t *zp = VTOZ(ap->a_vp);
5996 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5997 int error;
5998
5999 if (ap->a_size != NULL)
6000 *ap->a_size = 0;
6001
6002 /*
6003 * If the xattr property is off, refuse the request.
6004 */
6005 if (!(zfsvfs->z_flags & ZSB_XATTR))
6006 return (SET_ERROR(EOPNOTSUPP));
6007
6008 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
6009 ap->a_cred, ap->a_td, VREAD);
6010 if (error != 0)
6011 return (SET_ERROR(error));
6012
6013 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6014 return (error);
6015 rw_enter(&zp->z_xattr_lock, RW_READER);
6016
6017 error = zfs_listextattr_impl(ap, zfs_xattr_compat);
6018 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
6019 /* Also list user xattrs with the alternate format. */
6020 error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
6021 }
6022
6023 rw_exit(&zp->z_xattr_lock);
6024 zfs_exit(zfsvfs, FTAG);
6025 return (error);
6026 }
6027
6028 #ifndef _SYS_SYSPROTO_H_
6029 struct vop_getacl_args {
6030 struct vnode *vp;
6031 acl_type_t type;
6032 struct acl *aclp;
6033 struct ucred *cred;
6034 struct thread *td;
6035 };
6036 #endif
6037
6038 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)6039 zfs_freebsd_getacl(struct vop_getacl_args *ap)
6040 {
6041 int error;
6042 vsecattr_t vsecattr;
6043
6044 if (ap->a_type != ACL_TYPE_NFS4)
6045 return (EINVAL);
6046
6047 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
6048 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
6049 &vsecattr, 0, ap->a_cred)))
6050 return (error);
6051
6052 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
6053 vsecattr.vsa_aclcnt);
6054 if (vsecattr.vsa_aclentp != NULL)
6055 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
6056
6057 return (error);
6058 }
6059
6060 #ifndef _SYS_SYSPROTO_H_
6061 struct vop_setacl_args {
6062 struct vnode *vp;
6063 acl_type_t type;
6064 struct acl *aclp;
6065 struct ucred *cred;
6066 struct thread *td;
6067 };
6068 #endif
6069
6070 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)6071 zfs_freebsd_setacl(struct vop_setacl_args *ap)
6072 {
6073 int error;
6074 vsecattr_t vsecattr;
6075 int aclbsize; /* size of acl list in bytes */
6076 aclent_t *aaclp;
6077
6078 if (ap->a_type != ACL_TYPE_NFS4)
6079 return (EINVAL);
6080
6081 if (ap->a_aclp == NULL)
6082 return (EINVAL);
6083
6084 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
6085 return (EINVAL);
6086
6087 /*
6088 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
6089 * splitting every entry into two and appending "canonical six"
6090 * entries at the end. Don't allow for setting an ACL that would
6091 * cause chmod(2) to run out of ACL entries.
6092 */
6093 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
6094 return (ENOSPC);
6095
6096 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
6097 if (error != 0)
6098 return (error);
6099
6100 vsecattr.vsa_mask = VSA_ACE;
6101 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
6102 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
6103 aaclp = vsecattr.vsa_aclentp;
6104 vsecattr.vsa_aclentsz = aclbsize;
6105
6106 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
6107 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
6108 kmem_free(aaclp, aclbsize);
6109
6110 return (error);
6111 }
6112
6113 #ifndef _SYS_SYSPROTO_H_
6114 struct vop_aclcheck_args {
6115 struct vnode *vp;
6116 acl_type_t type;
6117 struct acl *aclp;
6118 struct ucred *cred;
6119 struct thread *td;
6120 };
6121 #endif
6122
6123 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)6124 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6125 {
6126
6127 return (EOPNOTSUPP);
6128 }
6129
6130 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)6131 zfs_vptocnp(struct vop_vptocnp_args *ap)
6132 {
6133 vnode_t *covered_vp;
6134 vnode_t *vp = ap->a_vp;
6135 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6136 znode_t *zp = VTOZ(vp);
6137 int ltype;
6138 int error;
6139
6140 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6141 return (error);
6142
6143 /*
6144 * If we are a snapshot mounted under .zfs, run the operation
6145 * on the covered vnode.
6146 */
6147 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6148 char name[MAXNAMLEN + 1];
6149 znode_t *dzp;
6150 size_t len;
6151
6152 error = zfs_znode_parent_and_name(zp, &dzp, name);
6153 if (error == 0) {
6154 len = strlen(name);
6155 if (*ap->a_buflen < len)
6156 error = SET_ERROR(ENOMEM);
6157 }
6158 if (error == 0) {
6159 *ap->a_buflen -= len;
6160 memcpy(ap->a_buf + *ap->a_buflen, name, len);
6161 *ap->a_vpp = ZTOV(dzp);
6162 }
6163 zfs_exit(zfsvfs, FTAG);
6164 return (error);
6165 }
6166 zfs_exit(zfsvfs, FTAG);
6167
6168 covered_vp = vp->v_mount->mnt_vnodecovered;
6169 #if __FreeBSD_version >= 1300045
6170 enum vgetstate vs = vget_prep(covered_vp);
6171 #else
6172 vhold(covered_vp);
6173 #endif
6174 ltype = VOP_ISLOCKED(vp);
6175 VOP_UNLOCK1(vp);
6176 #if __FreeBSD_version >= 1300045
6177 error = vget_finish(covered_vp, LK_SHARED, vs);
6178 #else
6179 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
6180 #endif
6181 if (error == 0) {
6182 #if __FreeBSD_version >= 1300123
6183 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6184 ap->a_buflen);
6185 #else
6186 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
6187 ap->a_buf, ap->a_buflen);
6188 #endif
6189 vput(covered_vp);
6190 }
6191 vn_lock(vp, ltype | LK_RETRY);
6192 if (VN_IS_DOOMED(vp))
6193 error = SET_ERROR(ENOENT);
6194 return (error);
6195 }
6196
6197 #if __FreeBSD_version >= 1400032
6198 static int
zfs_deallocate(struct vop_deallocate_args * ap)6199 zfs_deallocate(struct vop_deallocate_args *ap)
6200 {
6201 znode_t *zp = VTOZ(ap->a_vp);
6202 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6203 zilog_t *zilog;
6204 off_t off, len, file_sz;
6205 int error;
6206
6207 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6208 return (error);
6209
6210 /*
6211 * Callers might not be able to detect properly that we are read-only,
6212 * so check it explicitly here.
6213 */
6214 if (zfs_is_readonly(zfsvfs)) {
6215 zfs_exit(zfsvfs, FTAG);
6216 return (SET_ERROR(EROFS));
6217 }
6218
6219 zilog = zfsvfs->z_log;
6220 off = *ap->a_offset;
6221 len = *ap->a_len;
6222 file_sz = zp->z_size;
6223 if (off + len > file_sz)
6224 len = file_sz - off;
6225 /* Fast path for out-of-range request. */
6226 if (len <= 0) {
6227 *ap->a_len = 0;
6228 zfs_exit(zfsvfs, FTAG);
6229 return (0);
6230 }
6231
6232 error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6233 if (error == 0) {
6234 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6235 (ap->a_ioflag & IO_SYNC) != 0)
6236 zil_commit(zilog, zp->z_id);
6237 *ap->a_offset = off + len;
6238 *ap->a_len = 0;
6239 }
6240
6241 zfs_exit(zfsvfs, FTAG);
6242 return (error);
6243 }
6244 #endif
6245
6246 #if __FreeBSD_version >= 1300039
6247 #ifndef _SYS_SYSPROTO_H_
6248 struct vop_copy_file_range_args {
6249 struct vnode *a_invp;
6250 off_t *a_inoffp;
6251 struct vnode *a_outvp;
6252 off_t *a_outoffp;
6253 size_t *a_lenp;
6254 unsigned int a_flags;
6255 struct ucred *a_incred;
6256 struct ucred *a_outcred;
6257 struct thread *a_fsizetd;
6258 }
6259 #endif
6260 /*
6261 * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6262 * files resides under the same mountpoint. In case of ZFS we want to be called
6263 * even is files are in different datasets (but on the same pools, but we need
6264 * to check that ourselves).
6265 */
6266 static int
zfs_freebsd_copy_file_range(struct vop_copy_file_range_args * ap)6267 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6268 {
6269 zfsvfs_t *outzfsvfs;
6270 struct vnode *invp = ap->a_invp;
6271 struct vnode *outvp = ap->a_outvp;
6272 struct mount *mp;
6273 int error;
6274 uint64_t len = *ap->a_lenp;
6275
6276 if (!zfs_bclone_enabled) {
6277 mp = NULL;
6278 goto bad_write_fallback;
6279 }
6280
6281 /*
6282 * TODO: If offset/length is not aligned to recordsize, use
6283 * vn_generic_copy_file_range() on this fragment.
6284 * It would be better to do this after we lock the vnodes, but then we
6285 * need something else than vn_generic_copy_file_range().
6286 */
6287
6288 vn_start_write(outvp, &mp, V_WAIT);
6289 if (__predict_true(mp == outvp->v_mount)) {
6290 outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6291 if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6292 SPA_FEATURE_BLOCK_CLONING)) {
6293 goto bad_write_fallback;
6294 }
6295 }
6296 if (invp == outvp) {
6297 if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6298 goto bad_write_fallback;
6299 }
6300 } else {
6301 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6302 __FreeBSD_version >= 1400086
6303 vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
6304 LK_EXCLUSIVE);
6305 #else
6306 vn_lock_pair(invp, false, outvp, false);
6307 #endif
6308 if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6309 goto bad_locked_fallback;
6310 }
6311 }
6312
6313 #ifdef MAC
6314 error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6315 outvp);
6316 if (error != 0)
6317 goto out_locked;
6318 #endif
6319
6320 error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6321 ap->a_outoffp, &len, ap->a_outcred);
6322 if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6323 error == EOPNOTSUPP)
6324 goto bad_locked_fallback;
6325 *ap->a_lenp = (size_t)len;
6326 #ifdef MAC
6327 out_locked:
6328 #endif
6329 if (invp != outvp)
6330 VOP_UNLOCK(invp);
6331 VOP_UNLOCK(outvp);
6332 if (mp != NULL)
6333 vn_finished_write(mp);
6334 return (error);
6335
6336 bad_locked_fallback:
6337 if (invp != outvp)
6338 VOP_UNLOCK(invp);
6339 VOP_UNLOCK(outvp);
6340 bad_write_fallback:
6341 if (mp != NULL)
6342 vn_finished_write(mp);
6343 error = ENOSYS;
6344 return (error);
6345 }
6346 #endif
6347
6348 struct vop_vector zfs_vnodeops;
6349 struct vop_vector zfs_fifoops;
6350 struct vop_vector zfs_shareops;
6351
6352 struct vop_vector zfs_vnodeops = {
6353 .vop_default = &default_vnodeops,
6354 .vop_inactive = zfs_freebsd_inactive,
6355 #if __FreeBSD_version >= 1300042
6356 .vop_need_inactive = zfs_freebsd_need_inactive,
6357 #endif
6358 .vop_reclaim = zfs_freebsd_reclaim,
6359 #if __FreeBSD_version >= 1300102
6360 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6361 #endif
6362 #if __FreeBSD_version >= 1300139
6363 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6364 #endif
6365 .vop_access = zfs_freebsd_access,
6366 .vop_allocate = VOP_EINVAL,
6367 #if __FreeBSD_version >= 1400032
6368 .vop_deallocate = zfs_deallocate,
6369 #endif
6370 .vop_lookup = zfs_cache_lookup,
6371 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6372 .vop_getattr = zfs_freebsd_getattr,
6373 .vop_setattr = zfs_freebsd_setattr,
6374 .vop_create = zfs_freebsd_create,
6375 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6376 .vop_mkdir = zfs_freebsd_mkdir,
6377 .vop_readdir = zfs_freebsd_readdir,
6378 .vop_fsync = zfs_freebsd_fsync,
6379 .vop_open = zfs_freebsd_open,
6380 .vop_close = zfs_freebsd_close,
6381 .vop_rmdir = zfs_freebsd_rmdir,
6382 .vop_ioctl = zfs_freebsd_ioctl,
6383 .vop_link = zfs_freebsd_link,
6384 .vop_symlink = zfs_freebsd_symlink,
6385 .vop_readlink = zfs_freebsd_readlink,
6386 .vop_read = zfs_freebsd_read,
6387 .vop_write = zfs_freebsd_write,
6388 .vop_remove = zfs_freebsd_remove,
6389 .vop_rename = zfs_freebsd_rename,
6390 .vop_pathconf = zfs_freebsd_pathconf,
6391 .vop_bmap = zfs_freebsd_bmap,
6392 .vop_fid = zfs_freebsd_fid,
6393 .vop_getextattr = zfs_getextattr,
6394 .vop_deleteextattr = zfs_deleteextattr,
6395 .vop_setextattr = zfs_setextattr,
6396 .vop_listextattr = zfs_listextattr,
6397 .vop_getacl = zfs_freebsd_getacl,
6398 .vop_setacl = zfs_freebsd_setacl,
6399 .vop_aclcheck = zfs_freebsd_aclcheck,
6400 .vop_getpages = zfs_freebsd_getpages,
6401 .vop_putpages = zfs_freebsd_putpages,
6402 .vop_vptocnp = zfs_vptocnp,
6403 #if __FreeBSD_version >= 1300064
6404 .vop_lock1 = vop_lock,
6405 .vop_unlock = vop_unlock,
6406 .vop_islocked = vop_islocked,
6407 #endif
6408 #if __FreeBSD_version >= 1400043
6409 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6410 #endif
6411 #if __FreeBSD_version >= 1300039
6412 .vop_copy_file_range = zfs_freebsd_copy_file_range,
6413 #endif
6414 };
6415 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6416
6417 struct vop_vector zfs_fifoops = {
6418 .vop_default = &fifo_specops,
6419 .vop_fsync = zfs_freebsd_fsync,
6420 #if __FreeBSD_version >= 1300102
6421 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6422 #endif
6423 #if __FreeBSD_version >= 1300139
6424 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6425 #endif
6426 .vop_access = zfs_freebsd_access,
6427 .vop_getattr = zfs_freebsd_getattr,
6428 .vop_inactive = zfs_freebsd_inactive,
6429 .vop_read = VOP_PANIC,
6430 .vop_reclaim = zfs_freebsd_reclaim,
6431 .vop_setattr = zfs_freebsd_setattr,
6432 .vop_write = VOP_PANIC,
6433 .vop_pathconf = zfs_freebsd_pathconf,
6434 .vop_fid = zfs_freebsd_fid,
6435 .vop_getacl = zfs_freebsd_getacl,
6436 .vop_setacl = zfs_freebsd_setacl,
6437 .vop_aclcheck = zfs_freebsd_aclcheck,
6438 #if __FreeBSD_version >= 1400043
6439 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6440 #endif
6441 };
6442 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6443
6444 /*
6445 * special share hidden files vnode operations template
6446 */
6447 struct vop_vector zfs_shareops = {
6448 .vop_default = &default_vnodeops,
6449 #if __FreeBSD_version >= 1300121
6450 .vop_fplookup_vexec = VOP_EAGAIN,
6451 #endif
6452 #if __FreeBSD_version >= 1300139
6453 .vop_fplookup_symlink = VOP_EAGAIN,
6454 #endif
6455 .vop_access = zfs_freebsd_access,
6456 .vop_inactive = zfs_freebsd_inactive,
6457 .vop_reclaim = zfs_freebsd_reclaim,
6458 .vop_fid = zfs_freebsd_fid,
6459 .vop_pathconf = zfs_freebsd_pathconf,
6460 #if __FreeBSD_version >= 1400043
6461 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6462 #endif
6463 };
6464 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6465
6466 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6467 "Use legacy ZFS xattr naming for writing new user namespace xattrs");
6468