1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/time.h>
36 #include <sys/systm.h>
37 #include <sys/sysmacros.h>
38 #include <sys/resource.h>
39 #include <sys/vfs.h>
40 #include <sys/endian.h>
41 #include <sys/vm.h>
42 #include <sys/vnode.h>
43 #if __FreeBSD_version >= 1300102
44 #include <sys/smr.h>
45 #endif
46 #include <sys/dirent.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/kmem.h>
50 #include <sys/taskq.h>
51 #include <sys/uio.h>
52 #include <sys/atomic.h>
53 #include <sys/namei.h>
54 #include <sys/mman.h>
55 #include <sys/cmn_err.h>
56 #include <sys/kdb.h>
57 #include <sys/sysproto.h>
58 #include <sys/errno.h>
59 #include <sys/unistd.h>
60 #include <sys/zfs_dir.h>
61 #include <sys/zfs_ioctl.h>
62 #include <sys/fs/zfs.h>
63 #include <sys/dmu.h>
64 #include <sys/dmu_objset.h>
65 #include <sys/spa.h>
66 #include <sys/txg.h>
67 #include <sys/dbuf.h>
68 #include <sys/zap.h>
69 #include <sys/sa.h>
70 #include <sys/policy.h>
71 #include <sys/sunddi.h>
72 #include <sys/filio.h>
73 #include <sys/sid.h>
74 #include <sys/zfs_ctldir.h>
75 #include <sys/zfs_fuid.h>
76 #include <sys/zfs_quota.h>
77 #include <sys/zfs_sa.h>
78 #include <sys/zfs_rlock.h>
79 #include <sys/extdirent.h>
80 #include <sys/bio.h>
81 #include <sys/buf.h>
82 #include <sys/sched.h>
83 #include <sys/acl.h>
84 #include <sys/vmmeter.h>
85 #include <vm/vm_param.h>
86 #include <sys/zil.h>
87 #include <sys/zfs_vnops.h>
88
89 #include <vm/vm_object.h>
90
91 #include <sys/extattr.h>
92 #include <sys/priv.h>
93
94 #ifndef VN_OPEN_INVFS
95 #define VN_OPEN_INVFS 0x0
96 #endif
97
98 VFS_SMR_DECLARE;
99
100 #if __FreeBSD_version >= 1300047
101 #define vm_page_wire_lock(pp)
102 #define vm_page_wire_unlock(pp)
103 #else
104 #define vm_page_wire_lock(pp) vm_page_lock(pp)
105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp)
106 #endif
107
108 #ifdef DEBUG_VFS_LOCKS
109 #define VNCHECKREF(vp) \
110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
111 ("%s: wrong ref counts", __func__));
112 #else
113 #define VNCHECKREF(vp)
114 #endif
115
116 /*
117 * Programming rules.
118 *
119 * Each vnode op performs some logical unit of work. To do this, the ZPL must
120 * properly lock its in-core state, create a DMU transaction, do the work,
121 * record this work in the intent log (ZIL), commit the DMU transaction,
122 * and wait for the intent log to commit if it is a synchronous operation.
123 * Moreover, the vnode ops must work in both normal and log replay context.
124 * The ordering of events is important to avoid deadlocks and references
125 * to freed memory. The example below illustrates the following Big Rules:
126 *
127 * (1) A check must be made in each zfs thread for a mounted file system.
128 * This is done avoiding races using ZFS_ENTER(zfsvfs).
129 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
130 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
131 * can return EIO from the calling function.
132 *
133 * (2) VN_RELE() should always be the last thing except for zil_commit()
134 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
135 * First, if it's the last reference, the vnode/znode
136 * can be freed, so the zp may point to freed memory. Second, the last
137 * reference will call zfs_zinactive(), which may induce a lot of work --
138 * pushing cached pages (which acquires range locks) and syncing out
139 * cached atime changes. Third, zfs_zinactive() may require a new tx,
140 * which could deadlock the system if you were already holding one.
141 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
142 *
143 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
144 * as they can span dmu_tx_assign() calls.
145 *
146 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
147 * dmu_tx_assign(). This is critical because we don't want to block
148 * while holding locks.
149 *
150 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
151 * reduces lock contention and CPU usage when we must wait (note that if
152 * throughput is constrained by the storage, nearly every transaction
153 * must wait).
154 *
155 * Note, in particular, that if a lock is sometimes acquired before
156 * the tx assigns, and sometimes after (e.g. z_lock), then failing
157 * to use a non-blocking assign can deadlock the system. The scenario:
158 *
159 * Thread A has grabbed a lock before calling dmu_tx_assign().
160 * Thread B is in an already-assigned tx, and blocks for this lock.
161 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
162 * forever, because the previous txg can't quiesce until B's tx commits.
163 *
164 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
165 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
166 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
167 * to indicate that this operation has already called dmu_tx_wait().
168 * This will ensure that we don't retry forever, waiting a short bit
169 * each time.
170 *
171 * (5) If the operation succeeded, generate the intent log entry for it
172 * before dropping locks. This ensures that the ordering of events
173 * in the intent log matches the order in which they actually occurred.
174 * During ZIL replay the zfs_log_* functions will update the sequence
175 * number to indicate the zil transaction has replayed.
176 *
177 * (6) At the end of each vnode op, the DMU tx must always commit,
178 * regardless of whether there were any errors.
179 *
180 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
181 * to ensure that synchronous semantics are provided when necessary.
182 *
183 * In general, this is how things should be ordered in each vnode op:
184 *
185 * ZFS_ENTER(zfsvfs); // exit if unmounted
186 * top:
187 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
188 * rw_enter(...); // grab any other locks you need
189 * tx = dmu_tx_create(...); // get DMU tx
190 * dmu_tx_hold_*(); // hold each object you might modify
191 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
192 * if (error) {
193 * rw_exit(...); // drop locks
194 * zfs_dirent_unlock(dl); // unlock directory entry
195 * VN_RELE(...); // release held vnodes
196 * if (error == ERESTART) {
197 * waited = B_TRUE;
198 * dmu_tx_wait(tx);
199 * dmu_tx_abort(tx);
200 * goto top;
201 * }
202 * dmu_tx_abort(tx); // abort DMU tx
203 * ZFS_EXIT(zfsvfs); // finished in zfs
204 * return (error); // really out of space
205 * }
206 * error = do_real_work(); // do whatever this VOP does
207 * if (error == 0)
208 * zfs_log_*(...); // on success, make ZIL entry
209 * dmu_tx_commit(tx); // commit DMU tx -- error or not
210 * rw_exit(...); // drop locks
211 * zfs_dirent_unlock(dl); // unlock directory entry
212 * VN_RELE(...); // release held vnodes
213 * zil_commit(zilog, foid); // synchronous when necessary
214 * ZFS_EXIT(zfsvfs); // finished in zfs
215 * return (error); // done, report error
216 */
217
218 /* ARGSUSED */
219 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)220 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
221 {
222 znode_t *zp = VTOZ(*vpp);
223 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
224
225 ZFS_ENTER(zfsvfs);
226 ZFS_VERIFY_ZP(zp);
227
228 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
229 ((flag & FAPPEND) == 0)) {
230 ZFS_EXIT(zfsvfs);
231 return (SET_ERROR(EPERM));
232 }
233
234 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
235 ZTOV(zp)->v_type == VREG &&
236 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
237 if (fs_vscan(*vpp, cr, 0) != 0) {
238 ZFS_EXIT(zfsvfs);
239 return (SET_ERROR(EACCES));
240 }
241 }
242
243 /* Keep a count of the synchronous opens in the znode */
244 if (flag & (FSYNC | FDSYNC))
245 atomic_inc_32(&zp->z_sync_cnt);
246
247 ZFS_EXIT(zfsvfs);
248 return (0);
249 }
250
251 /* ARGSUSED */
252 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)253 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
254 {
255 znode_t *zp = VTOZ(vp);
256 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
257
258 ZFS_ENTER(zfsvfs);
259 ZFS_VERIFY_ZP(zp);
260
261 /* Decrement the synchronous opens in the znode */
262 if ((flag & (FSYNC | FDSYNC)) && (count == 1))
263 atomic_dec_32(&zp->z_sync_cnt);
264
265 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
266 ZTOV(zp)->v_type == VREG &&
267 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
268 VERIFY(fs_vscan(vp, cr, 1) == 0);
269
270 ZFS_EXIT(zfsvfs);
271 return (0);
272 }
273
274 /* ARGSUSED */
275 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)276 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
277 int *rvalp)
278 {
279 loff_t off;
280 int error;
281
282 switch (com) {
283 case _FIOFFS:
284 {
285 return (0);
286
287 /*
288 * The following two ioctls are used by bfu. Faking out,
289 * necessary to avoid bfu errors.
290 */
291 }
292 case _FIOGDIO:
293 case _FIOSDIO:
294 {
295 return (0);
296 }
297
298 case F_SEEK_DATA:
299 case F_SEEK_HOLE:
300 {
301 off = *(offset_t *)data;
302 /* offset parameter is in/out */
303 error = zfs_holey(VTOZ(vp), com, &off);
304 if (error)
305 return (error);
306 *(offset_t *)data = off;
307 return (0);
308 }
309 }
310 return (SET_ERROR(ENOTTY));
311 }
312
313 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)314 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
315 {
316 vm_object_t obj;
317 vm_page_t pp;
318 int64_t end;
319
320 /*
321 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
322 * aligned boundaries, if the range is not aligned. As a result a
323 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
324 * It may happen that all DEV_BSIZE subranges are marked clean and thus
325 * the whole page would be considered clean despite have some
326 * dirty data.
327 * For this reason we should shrink the range to DEV_BSIZE aligned
328 * boundaries before calling vm_page_clear_dirty.
329 */
330 end = rounddown2(off + nbytes, DEV_BSIZE);
331 off = roundup2(off, DEV_BSIZE);
332 nbytes = end - off;
333
334 obj = vp->v_object;
335 zfs_vmobject_assert_wlocked_12(obj);
336 #if __FreeBSD_version < 1300050
337 for (;;) {
338 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
339 pp->valid) {
340 if (vm_page_xbusied(pp)) {
341 /*
342 * Reference the page before unlocking and
343 * sleeping so that the page daemon is less
344 * likely to reclaim it.
345 */
346 vm_page_reference(pp);
347 vm_page_lock(pp);
348 zfs_vmobject_wunlock(obj);
349 vm_page_busy_sleep(pp, "zfsmwb", true);
350 zfs_vmobject_wlock(obj);
351 continue;
352 }
353 vm_page_sbusy(pp);
354 } else if (pp != NULL) {
355 ASSERT(!pp->valid);
356 pp = NULL;
357 }
358 if (pp != NULL) {
359 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
360 vm_object_pip_add(obj, 1);
361 pmap_remove_write(pp);
362 if (nbytes != 0)
363 vm_page_clear_dirty(pp, off, nbytes);
364 }
365 break;
366 }
367 #else
368 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
369 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
370 VM_ALLOC_IGN_SBUSY);
371 if (pp != NULL) {
372 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
373 vm_object_pip_add(obj, 1);
374 pmap_remove_write(pp);
375 if (nbytes != 0)
376 vm_page_clear_dirty(pp, off, nbytes);
377 }
378 #endif
379 return (pp);
380 }
381
382 static void
page_unbusy(vm_page_t pp)383 page_unbusy(vm_page_t pp)
384 {
385
386 vm_page_sunbusy(pp);
387 #if __FreeBSD_version >= 1300041
388 vm_object_pip_wakeup(pp->object);
389 #else
390 vm_object_pip_subtract(pp->object, 1);
391 #endif
392 }
393
394 #if __FreeBSD_version > 1300051
395 static vm_page_t
page_hold(vnode_t * vp,int64_t start)396 page_hold(vnode_t *vp, int64_t start)
397 {
398 vm_object_t obj;
399 vm_page_t m;
400
401 obj = vp->v_object;
402 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
403 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
404 VM_ALLOC_NOBUSY);
405 return (m);
406 }
407 #else
408 static vm_page_t
page_hold(vnode_t * vp,int64_t start)409 page_hold(vnode_t *vp, int64_t start)
410 {
411 vm_object_t obj;
412 vm_page_t pp;
413
414 obj = vp->v_object;
415 zfs_vmobject_assert_wlocked(obj);
416
417 for (;;) {
418 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
419 pp->valid) {
420 if (vm_page_xbusied(pp)) {
421 /*
422 * Reference the page before unlocking and
423 * sleeping so that the page daemon is less
424 * likely to reclaim it.
425 */
426 vm_page_reference(pp);
427 vm_page_lock(pp);
428 zfs_vmobject_wunlock(obj);
429 vm_page_busy_sleep(pp, "zfsmwb", true);
430 zfs_vmobject_wlock(obj);
431 continue;
432 }
433
434 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
435 vm_page_wire_lock(pp);
436 vm_page_hold(pp);
437 vm_page_wire_unlock(pp);
438
439 } else
440 pp = NULL;
441 break;
442 }
443 return (pp);
444 }
445 #endif
446
447 static void
page_unhold(vm_page_t pp)448 page_unhold(vm_page_t pp)
449 {
450
451 vm_page_wire_lock(pp);
452 #if __FreeBSD_version >= 1300035
453 vm_page_unwire(pp, PQ_ACTIVE);
454 #else
455 vm_page_unhold(pp);
456 #endif
457 vm_page_wire_unlock(pp);
458 }
459
460 /*
461 * When a file is memory mapped, we must keep the IO data synchronized
462 * between the DMU cache and the memory mapped pages. What this means:
463 *
464 * On Write: If we find a memory mapped page, we write to *both*
465 * the page and the dmu buffer.
466 */
467 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)468 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
469 {
470 vm_object_t obj;
471 struct sf_buf *sf;
472 vnode_t *vp = ZTOV(zp);
473 caddr_t va;
474 int off;
475
476 ASSERT(vp->v_mount != NULL);
477 obj = vp->v_object;
478 ASSERT(obj != NULL);
479
480 off = start & PAGEOFFSET;
481 zfs_vmobject_wlock_12(obj);
482 #if __FreeBSD_version >= 1300041
483 vm_object_pip_add(obj, 1);
484 #endif
485 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
486 vm_page_t pp;
487 int nbytes = imin(PAGESIZE - off, len);
488
489 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
490 zfs_vmobject_wunlock_12(obj);
491
492 va = zfs_map_page(pp, &sf);
493 (void) dmu_read(os, zp->z_id, start + off, nbytes,
494 va + off, DMU_READ_PREFETCH);
495 zfs_unmap_page(sf);
496
497 zfs_vmobject_wlock_12(obj);
498 page_unbusy(pp);
499 }
500 len -= nbytes;
501 off = 0;
502 }
503 #if __FreeBSD_version >= 1300041
504 vm_object_pip_wakeup(obj);
505 #else
506 vm_object_pip_wakeupn(obj, 0);
507 #endif
508 zfs_vmobject_wunlock_12(obj);
509 }
510
511 /*
512 * Read with UIO_NOCOPY flag means that sendfile(2) requests
513 * ZFS to populate a range of page cache pages with data.
514 *
515 * NOTE: this function could be optimized to pre-allocate
516 * all pages in advance, drain exclusive busy on all of them,
517 * map them into contiguous KVA region and populate them
518 * in one single dmu_read() call.
519 */
520 int
mappedread_sf(znode_t * zp,int nbytes,uio_t * uio)521 mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
522 {
523 vnode_t *vp = ZTOV(zp);
524 objset_t *os = zp->z_zfsvfs->z_os;
525 struct sf_buf *sf;
526 vm_object_t obj;
527 vm_page_t pp;
528 int64_t start;
529 caddr_t va;
530 int len = nbytes;
531 int error = 0;
532
533 ASSERT(uio->uio_segflg == UIO_NOCOPY);
534 ASSERT(vp->v_mount != NULL);
535 obj = vp->v_object;
536 ASSERT(obj != NULL);
537 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
538
539 zfs_vmobject_wlock_12(obj);
540 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
541 int bytes = MIN(PAGESIZE, len);
542
543 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
544 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
545 if (vm_page_none_valid(pp)) {
546 zfs_vmobject_wunlock_12(obj);
547 va = zfs_map_page(pp, &sf);
548 error = dmu_read(os, zp->z_id, start, bytes, va,
549 DMU_READ_PREFETCH);
550 if (bytes != PAGESIZE && error == 0)
551 bzero(va + bytes, PAGESIZE - bytes);
552 zfs_unmap_page(sf);
553 zfs_vmobject_wlock_12(obj);
554 #if __FreeBSD_version >= 1300081
555 if (error == 0) {
556 vm_page_valid(pp);
557 vm_page_activate(pp);
558 vm_page_do_sunbusy(pp);
559 } else {
560 zfs_vmobject_wlock(obj);
561 if (!vm_page_wired(pp) && pp->valid == 0 &&
562 vm_page_busy_tryupgrade(pp))
563 vm_page_free(pp);
564 else
565 vm_page_sunbusy(pp);
566 zfs_vmobject_wunlock(obj);
567 }
568 #else
569 vm_page_do_sunbusy(pp);
570 vm_page_lock(pp);
571 if (error) {
572 if (pp->wire_count == 0 && pp->valid == 0 &&
573 !vm_page_busied(pp))
574 vm_page_free(pp);
575 } else {
576 pp->valid = VM_PAGE_BITS_ALL;
577 vm_page_activate(pp);
578 }
579 vm_page_unlock(pp);
580 #endif
581 } else {
582 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
583 vm_page_do_sunbusy(pp);
584 }
585 if (error)
586 break;
587 uio->uio_resid -= bytes;
588 uio->uio_offset += bytes;
589 len -= bytes;
590 }
591 zfs_vmobject_wunlock_12(obj);
592 return (error);
593 }
594
595 /*
596 * When a file is memory mapped, we must keep the IO data synchronized
597 * between the DMU cache and the memory mapped pages. What this means:
598 *
599 * On Read: We "read" preferentially from memory mapped pages,
600 * else we default from the dmu buffer.
601 *
602 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
603 * the file is memory mapped.
604 */
605 int
mappedread(znode_t * zp,int nbytes,uio_t * uio)606 mappedread(znode_t *zp, int nbytes, uio_t *uio)
607 {
608 vnode_t *vp = ZTOV(zp);
609 vm_object_t obj;
610 int64_t start;
611 int len = nbytes;
612 int off;
613 int error = 0;
614
615 ASSERT(vp->v_mount != NULL);
616 obj = vp->v_object;
617 ASSERT(obj != NULL);
618
619 start = uio->uio_loffset;
620 off = start & PAGEOFFSET;
621 zfs_vmobject_wlock_12(obj);
622 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
623 vm_page_t pp;
624 uint64_t bytes = MIN(PAGESIZE - off, len);
625
626 if ((pp = page_hold(vp, start))) {
627 struct sf_buf *sf;
628 caddr_t va;
629
630 zfs_vmobject_wunlock_12(obj);
631 va = zfs_map_page(pp, &sf);
632 error = vn_io_fault_uiomove(va + off, bytes, uio);
633 zfs_unmap_page(sf);
634 zfs_vmobject_wlock_12(obj);
635 page_unhold(pp);
636 } else {
637 zfs_vmobject_wunlock_12(obj);
638 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
639 uio, bytes);
640 zfs_vmobject_wlock_12(obj);
641 }
642 len -= bytes;
643 off = 0;
644 if (error)
645 break;
646 }
647 zfs_vmobject_wunlock_12(obj);
648 return (error);
649 }
650
651 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)652 zfs_write_simple(znode_t *zp, const void *data, size_t len,
653 loff_t pos, size_t *presid)
654 {
655 int error = 0;
656 ssize_t resid;
657
658 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
659 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
660
661 if (error) {
662 return (SET_ERROR(error));
663 } else if (presid == NULL) {
664 if (resid != 0) {
665 error = SET_ERROR(EIO);
666 }
667 } else {
668 *presid = resid;
669 }
670 return (error);
671 }
672
673 void
zfs_zrele_async(znode_t * zp)674 zfs_zrele_async(znode_t *zp)
675 {
676 vnode_t *vp = ZTOV(zp);
677 objset_t *os = ITOZSB(vp)->z_os;
678
679 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
680 }
681
682 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)683 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
684 {
685 int error;
686
687 *vpp = arg;
688 error = vn_lock(*vpp, lkflags);
689 if (error != 0)
690 vrele(*vpp);
691 return (error);
692 }
693
694 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)695 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
696 {
697 znode_t *zdp = VTOZ(dvp);
698 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
699 int error;
700 int ltype;
701
702 if (zfsvfs->z_replay == B_FALSE)
703 ASSERT_VOP_LOCKED(dvp, __func__);
704
705 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
706 ASSERT3P(dvp, ==, vp);
707 vref(dvp);
708 ltype = lkflags & LK_TYPE_MASK;
709 if (ltype != VOP_ISLOCKED(dvp)) {
710 if (ltype == LK_EXCLUSIVE)
711 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
712 else /* if (ltype == LK_SHARED) */
713 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
714
715 /*
716 * Relock for the "." case could leave us with
717 * reclaimed vnode.
718 */
719 if (VN_IS_DOOMED(dvp)) {
720 vrele(dvp);
721 return (SET_ERROR(ENOENT));
722 }
723 }
724 return (0);
725 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
726 /*
727 * Note that in this case, dvp is the child vnode, and we
728 * are looking up the parent vnode - exactly reverse from
729 * normal operation. Unlocking dvp requires some rather
730 * tricky unlock/relock dance to prevent mp from being freed;
731 * use vn_vget_ino_gen() which takes care of all that.
732 *
733 * XXX Note that there is a time window when both vnodes are
734 * unlocked. It is possible, although highly unlikely, that
735 * during that window the parent-child relationship between
736 * the vnodes may change, for example, get reversed.
737 * In that case we would have a wrong lock order for the vnodes.
738 * All other filesystems seem to ignore this problem, so we
739 * do the same here.
740 * A potential solution could be implemented as follows:
741 * - using LK_NOWAIT when locking the second vnode and retrying
742 * if necessary
743 * - checking that the parent-child relationship still holds
744 * after locking both vnodes and retrying if it doesn't
745 */
746 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
747 return (error);
748 } else {
749 error = vn_lock(vp, lkflags);
750 if (error != 0)
751 vrele(vp);
752 return (error);
753 }
754 }
755
756 /*
757 * Lookup an entry in a directory, or an extended attribute directory.
758 * If it exists, return a held vnode reference for it.
759 *
760 * IN: dvp - vnode of directory to search.
761 * nm - name of entry to lookup.
762 * pnp - full pathname to lookup [UNUSED].
763 * flags - LOOKUP_XATTR set if looking for an attribute.
764 * rdir - root directory vnode [UNUSED].
765 * cr - credentials of caller.
766 * ct - caller context
767 *
768 * OUT: vpp - vnode of located entry, NULL if not found.
769 *
770 * RETURN: 0 on success, error code on failure.
771 *
772 * Timestamps:
773 * NA
774 */
775 /* ARGSUSED */
776 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,kthread_t * td,int flags,boolean_t cached)777 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
778 struct componentname *cnp, int nameiop, cred_t *cr, kthread_t *td,
779 int flags, boolean_t cached)
780 {
781 znode_t *zdp = VTOZ(dvp);
782 znode_t *zp;
783 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
784 int error = 0;
785
786 /*
787 * Fast path lookup, however we must skip DNLC lookup
788 * for case folding or normalizing lookups because the
789 * DNLC code only stores the passed in name. This means
790 * creating 'a' and removing 'A' on a case insensitive
791 * file system would work, but DNLC still thinks 'a'
792 * exists and won't let you create it again on the next
793 * pass through fast path.
794 */
795 if (!(flags & LOOKUP_XATTR)) {
796 if (dvp->v_type != VDIR) {
797 return (SET_ERROR(ENOTDIR));
798 } else if (zdp->z_sa_hdl == NULL) {
799 return (SET_ERROR(EIO));
800 }
801 }
802
803 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
804 const char *, nm);
805
806 ZFS_ENTER(zfsvfs);
807 ZFS_VERIFY_ZP(zdp);
808
809 *vpp = NULL;
810
811 if (flags & LOOKUP_XATTR) {
812 /*
813 * If the xattr property is off, refuse the lookup request.
814 */
815 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
816 ZFS_EXIT(zfsvfs);
817 return (SET_ERROR(EOPNOTSUPP));
818 }
819
820 /*
821 * We don't allow recursive attributes..
822 * Maybe someday we will.
823 */
824 if (zdp->z_pflags & ZFS_XATTR) {
825 ZFS_EXIT(zfsvfs);
826 return (SET_ERROR(EINVAL));
827 }
828
829 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
830 ZFS_EXIT(zfsvfs);
831 return (error);
832 }
833 *vpp = ZTOV(zp);
834
835 /*
836 * Do we have permission to get into attribute directory?
837 */
838 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
839 if (error) {
840 vrele(ZTOV(zp));
841 }
842
843 ZFS_EXIT(zfsvfs);
844 return (error);
845 }
846
847 /*
848 * Check accessibility of directory if we're not coming in via
849 * VOP_CACHEDLOOKUP.
850 */
851 if (!cached) {
852 #ifdef NOEXECCHECK
853 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
854 cnp->cn_flags &= ~NOEXECCHECK;
855 } else
856 #endif
857 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
858 ZFS_EXIT(zfsvfs);
859 return (error);
860 }
861 }
862
863 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
864 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
865 ZFS_EXIT(zfsvfs);
866 return (SET_ERROR(EILSEQ));
867 }
868
869
870 /*
871 * First handle the special cases.
872 */
873 if ((cnp->cn_flags & ISDOTDOT) != 0) {
874 /*
875 * If we are a snapshot mounted under .zfs, return
876 * the vp for the snapshot directory.
877 */
878 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
879 struct componentname cn;
880 vnode_t *zfsctl_vp;
881 int ltype;
882
883 ZFS_EXIT(zfsvfs);
884 ltype = VOP_ISLOCKED(dvp);
885 VOP_UNLOCK1(dvp);
886 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
887 &zfsctl_vp);
888 if (error == 0) {
889 cn.cn_nameptr = "snapshot";
890 cn.cn_namelen = strlen(cn.cn_nameptr);
891 cn.cn_nameiop = cnp->cn_nameiop;
892 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
893 cn.cn_lkflags = cnp->cn_lkflags;
894 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
895 vput(zfsctl_vp);
896 }
897 vn_lock(dvp, ltype | LK_RETRY);
898 return (error);
899 }
900 }
901 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
902 ZFS_EXIT(zfsvfs);
903 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
904 return (SET_ERROR(ENOTSUP));
905 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
906 return (error);
907 }
908
909 /*
910 * The loop is retry the lookup if the parent-child relationship
911 * changes during the dot-dot locking complexities.
912 */
913 for (;;) {
914 uint64_t parent;
915
916 error = zfs_dirlook(zdp, nm, &zp);
917 if (error == 0)
918 *vpp = ZTOV(zp);
919
920 ZFS_EXIT(zfsvfs);
921 if (error != 0)
922 break;
923
924 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
925 if (error != 0) {
926 /*
927 * If we've got a locking error, then the vnode
928 * got reclaimed because of a force unmount.
929 * We never enter doomed vnodes into the name cache.
930 */
931 *vpp = NULL;
932 return (error);
933 }
934
935 if ((cnp->cn_flags & ISDOTDOT) == 0)
936 break;
937
938 ZFS_ENTER(zfsvfs);
939 if (zdp->z_sa_hdl == NULL) {
940 error = SET_ERROR(EIO);
941 } else {
942 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
943 &parent, sizeof (parent));
944 }
945 if (error != 0) {
946 ZFS_EXIT(zfsvfs);
947 vput(ZTOV(zp));
948 break;
949 }
950 if (zp->z_id == parent) {
951 ZFS_EXIT(zfsvfs);
952 break;
953 }
954 vput(ZTOV(zp));
955 }
956
957 if (error != 0)
958 *vpp = NULL;
959
960 /* Translate errors and add SAVENAME when needed. */
961 if (cnp->cn_flags & ISLASTCN) {
962 switch (nameiop) {
963 case CREATE:
964 case RENAME:
965 if (error == ENOENT) {
966 error = EJUSTRETURN;
967 cnp->cn_flags |= SAVENAME;
968 break;
969 }
970 /* FALLTHROUGH */
971 case DELETE:
972 if (error == 0)
973 cnp->cn_flags |= SAVENAME;
974 break;
975 }
976 }
977
978 /* Insert name into cache (as non-existent) if appropriate. */
979 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
980 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
981 cache_enter(dvp, NULL, cnp);
982
983 /* Insert name into cache if appropriate. */
984 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
985 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
986 if (!(cnp->cn_flags & ISLASTCN) ||
987 (nameiop != DELETE && nameiop != RENAME)) {
988 cache_enter(dvp, *vpp, cnp);
989 }
990 }
991
992 return (error);
993 }
994
995 /*
996 * Attempt to create a new entry in a directory. If the entry
997 * already exists, truncate the file if permissible, else return
998 * an error. Return the vp of the created or trunc'd file.
999 *
1000 * IN: dvp - vnode of directory to put new file entry in.
1001 * name - name of new file entry.
1002 * vap - attributes of new file.
1003 * excl - flag indicating exclusive or non-exclusive mode.
1004 * mode - mode to open file with.
1005 * cr - credentials of caller.
1006 * flag - large file flag [UNUSED].
1007 * ct - caller context
1008 * vsecp - ACL to be set
1009 *
1010 * OUT: vpp - vnode of created or trunc'd entry.
1011 *
1012 * RETURN: 0 on success, error code on failure.
1013 *
1014 * Timestamps:
1015 * dvp - ctime|mtime updated if new entry created
1016 * vp - ctime|mtime always, atime if new
1017 */
1018
1019 /* ARGSUSED */
1020 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp)1021 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
1022 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
1023 {
1024 znode_t *zp;
1025 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1026 zilog_t *zilog;
1027 objset_t *os;
1028 dmu_tx_t *tx;
1029 int error;
1030 ksid_t *ksid;
1031 uid_t uid;
1032 gid_t gid = crgetgid(cr);
1033 uint64_t projid = ZFS_DEFAULT_PROJID;
1034 zfs_acl_ids_t acl_ids;
1035 boolean_t fuid_dirtied;
1036 uint64_t txtype;
1037 #ifdef DEBUG_VFS_LOCKS
1038 vnode_t *dvp = ZTOV(dzp);
1039 #endif
1040
1041 /*
1042 * If we have an ephemeral id, ACL, or XVATTR then
1043 * make sure file system is at proper version
1044 */
1045
1046 ksid = crgetsid(cr, KSID_OWNER);
1047 if (ksid)
1048 uid = ksid_getid(ksid);
1049 else
1050 uid = crgetuid(cr);
1051
1052 if (zfsvfs->z_use_fuids == B_FALSE &&
1053 (vsecp || (vap->va_mask & AT_XVATTR) ||
1054 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1055 return (SET_ERROR(EINVAL));
1056
1057 ZFS_ENTER(zfsvfs);
1058 ZFS_VERIFY_ZP(dzp);
1059 os = zfsvfs->z_os;
1060 zilog = zfsvfs->z_log;
1061
1062 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1063 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1064 ZFS_EXIT(zfsvfs);
1065 return (SET_ERROR(EILSEQ));
1066 }
1067
1068 if (vap->va_mask & AT_XVATTR) {
1069 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1070 crgetuid(cr), cr, vap->va_type)) != 0) {
1071 ZFS_EXIT(zfsvfs);
1072 return (error);
1073 }
1074 }
1075
1076 *zpp = NULL;
1077
1078 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1079 vap->va_mode &= ~S_ISVTX;
1080
1081 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1082 if (error) {
1083 ZFS_EXIT(zfsvfs);
1084 return (error);
1085 }
1086 ASSERT3P(zp, ==, NULL);
1087
1088 /*
1089 * Create a new file object and update the directory
1090 * to reference it.
1091 */
1092 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
1093 goto out;
1094 }
1095
1096 /*
1097 * We only support the creation of regular files in
1098 * extended attribute directories.
1099 */
1100
1101 if ((dzp->z_pflags & ZFS_XATTR) &&
1102 (vap->va_type != VREG)) {
1103 error = SET_ERROR(EINVAL);
1104 goto out;
1105 }
1106
1107 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1108 cr, vsecp, &acl_ids)) != 0)
1109 goto out;
1110
1111 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1112 projid = zfs_inherit_projid(dzp);
1113 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1114 zfs_acl_ids_free(&acl_ids);
1115 error = SET_ERROR(EDQUOT);
1116 goto out;
1117 }
1118
1119 getnewvnode_reserve_();
1120
1121 tx = dmu_tx_create(os);
1122
1123 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1124 ZFS_SA_BASE_ATTR_SIZE);
1125
1126 fuid_dirtied = zfsvfs->z_fuid_dirty;
1127 if (fuid_dirtied)
1128 zfs_fuid_txhold(zfsvfs, tx);
1129 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1130 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1131 if (!zfsvfs->z_use_sa &&
1132 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1133 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1134 0, acl_ids.z_aclp->z_acl_bytes);
1135 }
1136 error = dmu_tx_assign(tx, TXG_WAIT);
1137 if (error) {
1138 zfs_acl_ids_free(&acl_ids);
1139 dmu_tx_abort(tx);
1140 getnewvnode_drop_reserve();
1141 ZFS_EXIT(zfsvfs);
1142 return (error);
1143 }
1144 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1145 if (fuid_dirtied)
1146 zfs_fuid_sync(zfsvfs, tx);
1147
1148 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
1149 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1150 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1151 vsecp, acl_ids.z_fuidp, vap);
1152 zfs_acl_ids_free(&acl_ids);
1153 dmu_tx_commit(tx);
1154
1155 getnewvnode_drop_reserve();
1156
1157 out:
1158 VNCHECKREF(dvp);
1159 if (error == 0) {
1160 *zpp = zp;
1161 }
1162
1163 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1164 zil_commit(zilog, 0);
1165
1166 ZFS_EXIT(zfsvfs);
1167 return (error);
1168 }
1169
1170 /*
1171 * Remove an entry from a directory.
1172 *
1173 * IN: dvp - vnode of directory to remove entry from.
1174 * name - name of entry to remove.
1175 * cr - credentials of caller.
1176 * ct - caller context
1177 * flags - case flags
1178 *
1179 * RETURN: 0 on success, error code on failure.
1180 *
1181 * Timestamps:
1182 * dvp - ctime|mtime
1183 * vp - ctime (if nlink > 0)
1184 */
1185
1186 /*ARGSUSED*/
1187 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1188 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1189 {
1190 znode_t *dzp = VTOZ(dvp);
1191 znode_t *zp;
1192 znode_t *xzp;
1193 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1194 zilog_t *zilog;
1195 uint64_t xattr_obj;
1196 uint64_t obj = 0;
1197 dmu_tx_t *tx;
1198 boolean_t unlinked;
1199 uint64_t txtype;
1200 int error;
1201
1202
1203 ZFS_ENTER(zfsvfs);
1204 ZFS_VERIFY_ZP(dzp);
1205 zp = VTOZ(vp);
1206 ZFS_VERIFY_ZP(zp);
1207 zilog = zfsvfs->z_log;
1208
1209 xattr_obj = 0;
1210 xzp = NULL;
1211
1212 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1213 goto out;
1214 }
1215
1216 /*
1217 * Need to use rmdir for removing directories.
1218 */
1219 if (vp->v_type == VDIR) {
1220 error = SET_ERROR(EPERM);
1221 goto out;
1222 }
1223
1224 vnevent_remove(vp, dvp, name, ct);
1225
1226 obj = zp->z_id;
1227
1228 /* are there any extended attributes? */
1229 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1230 &xattr_obj, sizeof (xattr_obj));
1231 if (error == 0 && xattr_obj) {
1232 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1233 ASSERT0(error);
1234 }
1235
1236 /*
1237 * We may delete the znode now, or we may put it in the unlinked set;
1238 * it depends on whether we're the last link, and on whether there are
1239 * other holds on the vnode. So we dmu_tx_hold() the right things to
1240 * allow for either case.
1241 */
1242 tx = dmu_tx_create(zfsvfs->z_os);
1243 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1244 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1245 zfs_sa_upgrade_txholds(tx, zp);
1246 zfs_sa_upgrade_txholds(tx, dzp);
1247
1248 if (xzp) {
1249 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1250 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1251 }
1252
1253 /* charge as an update -- would be nice not to charge at all */
1254 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1255
1256 /*
1257 * Mark this transaction as typically resulting in a net free of space
1258 */
1259 dmu_tx_mark_netfree(tx);
1260
1261 error = dmu_tx_assign(tx, TXG_WAIT);
1262 if (error) {
1263 dmu_tx_abort(tx);
1264 ZFS_EXIT(zfsvfs);
1265 return (error);
1266 }
1267
1268 /*
1269 * Remove the directory entry.
1270 */
1271 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1272
1273 if (error) {
1274 dmu_tx_commit(tx);
1275 goto out;
1276 }
1277
1278 if (unlinked) {
1279 zfs_unlinked_add(zp, tx);
1280 vp->v_vflag |= VV_NOSYNC;
1281 }
1282 /* XXX check changes to linux vnops */
1283 txtype = TX_REMOVE;
1284 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1285
1286 dmu_tx_commit(tx);
1287 out:
1288
1289 if (xzp)
1290 vrele(ZTOV(xzp));
1291
1292 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1293 zil_commit(zilog, 0);
1294
1295
1296 ZFS_EXIT(zfsvfs);
1297 return (error);
1298 }
1299
1300
1301 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1302 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1303 struct componentname *cnp, int nameiop)
1304 {
1305 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1306 int error;
1307
1308 cnp->cn_nameptr = __DECONST(char *, name);
1309 cnp->cn_namelen = strlen(name);
1310 cnp->cn_nameiop = nameiop;
1311 cnp->cn_flags = ISLASTCN | SAVENAME;
1312 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1313 cnp->cn_cred = kcred;
1314 cnp->cn_thread = curthread;
1315
1316 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1317 struct vop_lookup_args a;
1318
1319 a.a_gen.a_desc = &vop_lookup_desc;
1320 a.a_dvp = ZTOV(dzp);
1321 a.a_vpp = vpp;
1322 a.a_cnp = cnp;
1323 error = vfs_cache_lookup(&a);
1324 } else {
1325 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred,
1326 curthread, 0, B_FALSE);
1327 }
1328 #ifdef ZFS_DEBUG
1329 if (error) {
1330 printf("got error %d on name %s on op %d\n", error, name,
1331 nameiop);
1332 kdb_backtrace();
1333 }
1334 #endif
1335 return (error);
1336 }
1337
1338 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1339 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1340 {
1341 vnode_t *vp;
1342 int error;
1343 struct componentname cn;
1344
1345 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1346 return (error);
1347
1348 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1349 vput(vp);
1350 return (error);
1351 }
1352 /*
1353 * Create a new directory and insert it into dvp using the name
1354 * provided. Return a pointer to the inserted directory.
1355 *
1356 * IN: dvp - vnode of directory to add subdir to.
1357 * dirname - name of new directory.
1358 * vap - attributes of new directory.
1359 * cr - credentials of caller.
1360 * ct - caller context
1361 * flags - case flags
1362 * vsecp - ACL to be set
1363 *
1364 * OUT: vpp - vnode of created directory.
1365 *
1366 * RETURN: 0 on success, error code on failure.
1367 *
1368 * Timestamps:
1369 * dvp - ctime|mtime updated
1370 * vp - ctime|mtime|atime updated
1371 */
1372 /*ARGSUSED*/
1373 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp)1374 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1375 cred_t *cr, int flags, vsecattr_t *vsecp)
1376 {
1377 znode_t *zp;
1378 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1379 zilog_t *zilog;
1380 uint64_t txtype;
1381 dmu_tx_t *tx;
1382 int error;
1383 ksid_t *ksid;
1384 uid_t uid;
1385 gid_t gid = crgetgid(cr);
1386 zfs_acl_ids_t acl_ids;
1387 boolean_t fuid_dirtied;
1388
1389 ASSERT(vap->va_type == VDIR);
1390
1391 /*
1392 * If we have an ephemeral id, ACL, or XVATTR then
1393 * make sure file system is at proper version
1394 */
1395
1396 ksid = crgetsid(cr, KSID_OWNER);
1397 if (ksid)
1398 uid = ksid_getid(ksid);
1399 else
1400 uid = crgetuid(cr);
1401 if (zfsvfs->z_use_fuids == B_FALSE &&
1402 ((vap->va_mask & AT_XVATTR) ||
1403 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1404 return (SET_ERROR(EINVAL));
1405
1406 ZFS_ENTER(zfsvfs);
1407 ZFS_VERIFY_ZP(dzp);
1408 zilog = zfsvfs->z_log;
1409
1410 if (dzp->z_pflags & ZFS_XATTR) {
1411 ZFS_EXIT(zfsvfs);
1412 return (SET_ERROR(EINVAL));
1413 }
1414
1415 if (zfsvfs->z_utf8 && u8_validate(dirname,
1416 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1417 ZFS_EXIT(zfsvfs);
1418 return (SET_ERROR(EILSEQ));
1419 }
1420
1421 if (vap->va_mask & AT_XVATTR) {
1422 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1423 crgetuid(cr), cr, vap->va_type)) != 0) {
1424 ZFS_EXIT(zfsvfs);
1425 return (error);
1426 }
1427 }
1428
1429 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1430 NULL, &acl_ids)) != 0) {
1431 ZFS_EXIT(zfsvfs);
1432 return (error);
1433 }
1434
1435 /*
1436 * First make sure the new directory doesn't exist.
1437 *
1438 * Existence is checked first to make sure we don't return
1439 * EACCES instead of EEXIST which can cause some applications
1440 * to fail.
1441 */
1442 *zpp = NULL;
1443
1444 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1445 zfs_acl_ids_free(&acl_ids);
1446 ZFS_EXIT(zfsvfs);
1447 return (error);
1448 }
1449 ASSERT3P(zp, ==, NULL);
1450
1451 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
1452 zfs_acl_ids_free(&acl_ids);
1453 ZFS_EXIT(zfsvfs);
1454 return (error);
1455 }
1456
1457 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1458 zfs_acl_ids_free(&acl_ids);
1459 ZFS_EXIT(zfsvfs);
1460 return (SET_ERROR(EDQUOT));
1461 }
1462
1463 /*
1464 * Add a new entry to the directory.
1465 */
1466 getnewvnode_reserve_();
1467 tx = dmu_tx_create(zfsvfs->z_os);
1468 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1469 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1470 fuid_dirtied = zfsvfs->z_fuid_dirty;
1471 if (fuid_dirtied)
1472 zfs_fuid_txhold(zfsvfs, tx);
1473 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1474 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1475 acl_ids.z_aclp->z_acl_bytes);
1476 }
1477
1478 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1479 ZFS_SA_BASE_ATTR_SIZE);
1480
1481 error = dmu_tx_assign(tx, TXG_WAIT);
1482 if (error) {
1483 zfs_acl_ids_free(&acl_ids);
1484 dmu_tx_abort(tx);
1485 getnewvnode_drop_reserve();
1486 ZFS_EXIT(zfsvfs);
1487 return (error);
1488 }
1489
1490 /*
1491 * Create new node.
1492 */
1493 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1494
1495 if (fuid_dirtied)
1496 zfs_fuid_sync(zfsvfs, tx);
1497
1498 /*
1499 * Now put new name in parent dir.
1500 */
1501 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1502
1503 *zpp = zp;
1504
1505 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1506 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1507 acl_ids.z_fuidp, vap);
1508
1509 zfs_acl_ids_free(&acl_ids);
1510
1511 dmu_tx_commit(tx);
1512
1513 getnewvnode_drop_reserve();
1514
1515 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1516 zil_commit(zilog, 0);
1517
1518 ZFS_EXIT(zfsvfs);
1519 return (0);
1520 }
1521
1522 #if __FreeBSD_version < 1300124
1523 static void
cache_vop_rmdir(struct vnode * dvp,struct vnode * vp)1524 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
1525 {
1526
1527 cache_purge(dvp);
1528 cache_purge(vp);
1529 }
1530 #endif
1531
1532 /*
1533 * Remove a directory subdir entry. If the current working
1534 * directory is the same as the subdir to be removed, the
1535 * remove will fail.
1536 *
1537 * IN: dvp - vnode of directory to remove from.
1538 * name - name of directory to be removed.
1539 * cwd - vnode of current working directory.
1540 * cr - credentials of caller.
1541 * ct - caller context
1542 * flags - case flags
1543 *
1544 * RETURN: 0 on success, error code on failure.
1545 *
1546 * Timestamps:
1547 * dvp - ctime|mtime updated
1548 */
1549 /*ARGSUSED*/
1550 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1551 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1552 {
1553 znode_t *dzp = VTOZ(dvp);
1554 znode_t *zp = VTOZ(vp);
1555 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1556 zilog_t *zilog;
1557 dmu_tx_t *tx;
1558 int error;
1559
1560 ZFS_ENTER(zfsvfs);
1561 ZFS_VERIFY_ZP(dzp);
1562 ZFS_VERIFY_ZP(zp);
1563 zilog = zfsvfs->z_log;
1564
1565
1566 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1567 goto out;
1568 }
1569
1570 if (vp->v_type != VDIR) {
1571 error = SET_ERROR(ENOTDIR);
1572 goto out;
1573 }
1574
1575 vnevent_rmdir(vp, dvp, name, ct);
1576
1577 tx = dmu_tx_create(zfsvfs->z_os);
1578 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1579 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1580 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1581 zfs_sa_upgrade_txholds(tx, zp);
1582 zfs_sa_upgrade_txholds(tx, dzp);
1583 dmu_tx_mark_netfree(tx);
1584 error = dmu_tx_assign(tx, TXG_WAIT);
1585 if (error) {
1586 dmu_tx_abort(tx);
1587 ZFS_EXIT(zfsvfs);
1588 return (error);
1589 }
1590
1591 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1592
1593 if (error == 0) {
1594 uint64_t txtype = TX_RMDIR;
1595 zfs_log_remove(zilog, tx, txtype, dzp, name,
1596 ZFS_NO_OBJECT, B_FALSE);
1597 }
1598
1599 dmu_tx_commit(tx);
1600
1601 cache_vop_rmdir(dvp, vp);
1602 out:
1603 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1604 zil_commit(zilog, 0);
1605
1606 ZFS_EXIT(zfsvfs);
1607 return (error);
1608 }
1609
1610 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1611 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1612 {
1613 struct componentname cn;
1614 vnode_t *vp;
1615 int error;
1616
1617 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1618 return (error);
1619
1620 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1621 vput(vp);
1622 return (error);
1623 }
1624
1625 /*
1626 * Read as many directory entries as will fit into the provided
1627 * buffer from the given directory cursor position (specified in
1628 * the uio structure).
1629 *
1630 * IN: vp - vnode of directory to read.
1631 * uio - structure supplying read location, range info,
1632 * and return buffer.
1633 * cr - credentials of caller.
1634 * ct - caller context
1635 * flags - case flags
1636 *
1637 * OUT: uio - updated offset and range, buffer filled.
1638 * eofp - set to true if end-of-file detected.
1639 *
1640 * RETURN: 0 on success, error code on failure.
1641 *
1642 * Timestamps:
1643 * vp - atime updated
1644 *
1645 * Note that the low 4 bits of the cookie returned by zap is always zero.
1646 * This allows us to use the low range for "special" directory entries:
1647 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1648 * we use the offset 2 for the '.zfs' directory.
1649 */
1650 /* ARGSUSED */
1651 static int
zfs_readdir(vnode_t * vp,uio_t * uio,cred_t * cr,int * eofp,int * ncookies,ulong_t ** cookies)1652 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
1653 int *ncookies, ulong_t **cookies)
1654 {
1655 znode_t *zp = VTOZ(vp);
1656 iovec_t *iovp;
1657 edirent_t *eodp;
1658 dirent64_t *odp;
1659 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1660 objset_t *os;
1661 caddr_t outbuf;
1662 size_t bufsize;
1663 zap_cursor_t zc;
1664 zap_attribute_t zap;
1665 uint_t bytes_wanted;
1666 uint64_t offset; /* must be unsigned; checks for < 1 */
1667 uint64_t parent;
1668 int local_eof;
1669 int outcount;
1670 int error;
1671 uint8_t prefetch;
1672 boolean_t check_sysattrs;
1673 uint8_t type;
1674 int ncooks;
1675 ulong_t *cooks = NULL;
1676 int flags = 0;
1677
1678 ZFS_ENTER(zfsvfs);
1679 ZFS_VERIFY_ZP(zp);
1680
1681 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1682 &parent, sizeof (parent))) != 0) {
1683 ZFS_EXIT(zfsvfs);
1684 return (error);
1685 }
1686
1687 /*
1688 * If we are not given an eof variable,
1689 * use a local one.
1690 */
1691 if (eofp == NULL)
1692 eofp = &local_eof;
1693
1694 /*
1695 * Check for valid iov_len.
1696 */
1697 if (uio->uio_iov->iov_len <= 0) {
1698 ZFS_EXIT(zfsvfs);
1699 return (SET_ERROR(EINVAL));
1700 }
1701
1702 /*
1703 * Quit if directory has been removed (posix)
1704 */
1705 if ((*eofp = zp->z_unlinked) != 0) {
1706 ZFS_EXIT(zfsvfs);
1707 return (0);
1708 }
1709
1710 error = 0;
1711 os = zfsvfs->z_os;
1712 offset = uio->uio_loffset;
1713 prefetch = zp->z_zn_prefetch;
1714
1715 /*
1716 * Initialize the iterator cursor.
1717 */
1718 if (offset <= 3) {
1719 /*
1720 * Start iteration from the beginning of the directory.
1721 */
1722 zap_cursor_init(&zc, os, zp->z_id);
1723 } else {
1724 /*
1725 * The offset is a serialized cursor.
1726 */
1727 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1728 }
1729
1730 /*
1731 * Get space to change directory entries into fs independent format.
1732 */
1733 iovp = uio->uio_iov;
1734 bytes_wanted = iovp->iov_len;
1735 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
1736 bufsize = bytes_wanted;
1737 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1738 odp = (struct dirent64 *)outbuf;
1739 } else {
1740 bufsize = bytes_wanted;
1741 outbuf = NULL;
1742 odp = (struct dirent64 *)iovp->iov_base;
1743 }
1744 eodp = (struct edirent *)odp;
1745
1746 if (ncookies != NULL) {
1747 /*
1748 * Minimum entry size is dirent size and 1 byte for a file name.
1749 */
1750 ncooks = uio->uio_resid / (sizeof (struct dirent) -
1751 sizeof (((struct dirent *)NULL)->d_name) + 1);
1752 cooks = malloc(ncooks * sizeof (ulong_t), M_TEMP, M_WAITOK);
1753 *cookies = cooks;
1754 *ncookies = ncooks;
1755 }
1756 /*
1757 * If this VFS supports the system attribute view interface; and
1758 * we're looking at an extended attribute directory; and we care
1759 * about normalization conflicts on this vfs; then we must check
1760 * for normalization conflicts with the sysattr name space.
1761 */
1762 #ifdef TODO
1763 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
1764 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
1765 (flags & V_RDDIR_ENTFLAGS);
1766 #else
1767 check_sysattrs = 0;
1768 #endif
1769
1770 /*
1771 * Transform to file-system independent format
1772 */
1773 outcount = 0;
1774 while (outcount < bytes_wanted) {
1775 ino64_t objnum;
1776 ushort_t reclen;
1777 off64_t *next = NULL;
1778
1779 /*
1780 * Special case `.', `..', and `.zfs'.
1781 */
1782 if (offset == 0) {
1783 (void) strcpy(zap.za_name, ".");
1784 zap.za_normalization_conflict = 0;
1785 objnum = zp->z_id;
1786 type = DT_DIR;
1787 } else if (offset == 1) {
1788 (void) strcpy(zap.za_name, "..");
1789 zap.za_normalization_conflict = 0;
1790 objnum = parent;
1791 type = DT_DIR;
1792 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1793 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1794 zap.za_normalization_conflict = 0;
1795 objnum = ZFSCTL_INO_ROOT;
1796 type = DT_DIR;
1797 } else {
1798 /*
1799 * Grab next entry.
1800 */
1801 if ((error = zap_cursor_retrieve(&zc, &zap))) {
1802 if ((*eofp = (error == ENOENT)) != 0)
1803 break;
1804 else
1805 goto update;
1806 }
1807
1808 if (zap.za_integer_length != 8 ||
1809 zap.za_num_integers != 1) {
1810 cmn_err(CE_WARN, "zap_readdir: bad directory "
1811 "entry, obj = %lld, offset = %lld\n",
1812 (u_longlong_t)zp->z_id,
1813 (u_longlong_t)offset);
1814 error = SET_ERROR(ENXIO);
1815 goto update;
1816 }
1817
1818 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1819 /*
1820 * MacOS X can extract the object type here such as:
1821 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1822 */
1823 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1824
1825 if (check_sysattrs && !zap.za_normalization_conflict) {
1826 #ifdef TODO
1827 zap.za_normalization_conflict =
1828 xattr_sysattr_casechk(zap.za_name);
1829 #else
1830 panic("%s:%u: TODO", __func__, __LINE__);
1831 #endif
1832 }
1833 }
1834
1835 if (flags & V_RDDIR_ACCFILTER) {
1836 /*
1837 * If we have no access at all, don't include
1838 * this entry in the returned information
1839 */
1840 znode_t *ezp;
1841 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
1842 goto skip_entry;
1843 if (!zfs_has_access(ezp, cr)) {
1844 vrele(ZTOV(ezp));
1845 goto skip_entry;
1846 }
1847 vrele(ZTOV(ezp));
1848 }
1849
1850 if (flags & V_RDDIR_ENTFLAGS)
1851 reclen = EDIRENT_RECLEN(strlen(zap.za_name));
1852 else
1853 reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1854
1855 /*
1856 * Will this entry fit in the buffer?
1857 */
1858 if (outcount + reclen > bufsize) {
1859 /*
1860 * Did we manage to fit anything in the buffer?
1861 */
1862 if (!outcount) {
1863 error = SET_ERROR(EINVAL);
1864 goto update;
1865 }
1866 break;
1867 }
1868 if (flags & V_RDDIR_ENTFLAGS) {
1869 /*
1870 * Add extended flag entry:
1871 */
1872 eodp->ed_ino = objnum;
1873 eodp->ed_reclen = reclen;
1874 /* NOTE: ed_off is the offset for the *next* entry */
1875 next = &(eodp->ed_off);
1876 eodp->ed_eflags = zap.za_normalization_conflict ?
1877 ED_CASE_CONFLICT : 0;
1878 (void) strncpy(eodp->ed_name, zap.za_name,
1879 EDIRENT_NAMELEN(reclen));
1880 eodp = (edirent_t *)((intptr_t)eodp + reclen);
1881 } else {
1882 /*
1883 * Add normal entry:
1884 */
1885 odp->d_ino = objnum;
1886 odp->d_reclen = reclen;
1887 odp->d_namlen = strlen(zap.za_name);
1888 /* NOTE: d_off is the offset for the *next* entry. */
1889 next = &odp->d_off;
1890 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1891 odp->d_type = type;
1892 dirent_terminate(odp);
1893 odp = (dirent64_t *)((intptr_t)odp + reclen);
1894 }
1895 outcount += reclen;
1896
1897 ASSERT(outcount <= bufsize);
1898
1899 /* Prefetch znode */
1900 if (prefetch)
1901 dmu_prefetch(os, objnum, 0, 0, 0,
1902 ZIO_PRIORITY_SYNC_READ);
1903
1904 skip_entry:
1905 /*
1906 * Move to the next entry, fill in the previous offset.
1907 */
1908 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1909 zap_cursor_advance(&zc);
1910 offset = zap_cursor_serialize(&zc);
1911 } else {
1912 offset += 1;
1913 }
1914
1915 /* Fill the offset right after advancing the cursor. */
1916 if (next != NULL)
1917 *next = offset;
1918 if (cooks != NULL) {
1919 *cooks++ = offset;
1920 ncooks--;
1921 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1922 }
1923 }
1924 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1925
1926 /* Subtract unused cookies */
1927 if (ncookies != NULL)
1928 *ncookies -= ncooks;
1929
1930 if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
1931 iovp->iov_base += outcount;
1932 iovp->iov_len -= outcount;
1933 uio->uio_resid -= outcount;
1934 } else if ((error = uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1935 /*
1936 * Reset the pointer.
1937 */
1938 offset = uio->uio_loffset;
1939 }
1940
1941 update:
1942 zap_cursor_fini(&zc);
1943 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1944 kmem_free(outbuf, bufsize);
1945
1946 if (error == ENOENT)
1947 error = 0;
1948
1949 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1950
1951 uio->uio_loffset = offset;
1952 ZFS_EXIT(zfsvfs);
1953 if (error != 0 && cookies != NULL) {
1954 free(*cookies, M_TEMP);
1955 *cookies = NULL;
1956 *ncookies = 0;
1957 }
1958 return (error);
1959 }
1960
1961 /*
1962 * Get the requested file attributes and place them in the provided
1963 * vattr structure.
1964 *
1965 * IN: vp - vnode of file.
1966 * vap - va_mask identifies requested attributes.
1967 * If AT_XVATTR set, then optional attrs are requested
1968 * flags - ATTR_NOACLCHECK (CIFS server context)
1969 * cr - credentials of caller.
1970 *
1971 * OUT: vap - attribute values.
1972 *
1973 * RETURN: 0 (always succeeds).
1974 */
1975 /* ARGSUSED */
1976 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)1977 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1978 {
1979 znode_t *zp = VTOZ(vp);
1980 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1981 int error = 0;
1982 uint32_t blksize;
1983 u_longlong_t nblocks;
1984 uint64_t mtime[2], ctime[2], crtime[2], rdev;
1985 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
1986 xoptattr_t *xoap = NULL;
1987 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1988 sa_bulk_attr_t bulk[4];
1989 int count = 0;
1990
1991 ZFS_ENTER(zfsvfs);
1992 ZFS_VERIFY_ZP(zp);
1993
1994 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1995
1996 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1997 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1998 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1999 if (vp->v_type == VBLK || vp->v_type == VCHR)
2000 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
2001 &rdev, 8);
2002
2003 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2004 ZFS_EXIT(zfsvfs);
2005 return (error);
2006 }
2007
2008 /*
2009 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2010 * Also, if we are the owner don't bother, since owner should
2011 * always be allowed to read basic attributes of file.
2012 */
2013 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2014 (vap->va_uid != crgetuid(cr))) {
2015 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2016 skipaclchk, cr))) {
2017 ZFS_EXIT(zfsvfs);
2018 return (error);
2019 }
2020 }
2021
2022 /*
2023 * Return all attributes. It's cheaper to provide the answer
2024 * than to determine whether we were asked the question.
2025 */
2026
2027 vap->va_type = IFTOVT(zp->z_mode);
2028 vap->va_mode = zp->z_mode & ~S_IFMT;
2029 vn_fsid(vp, vap);
2030 vap->va_nodeid = zp->z_id;
2031 vap->va_nlink = zp->z_links;
2032 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
2033 zp->z_links < ZFS_LINK_MAX)
2034 vap->va_nlink++;
2035 vap->va_size = zp->z_size;
2036 if (vp->v_type == VBLK || vp->v_type == VCHR)
2037 vap->va_rdev = zfs_cmpldev(rdev);
2038 vap->va_seq = zp->z_seq;
2039 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
2040 vap->va_filerev = zp->z_seq;
2041
2042 /*
2043 * Add in any requested optional attributes and the create time.
2044 * Also set the corresponding bits in the returned attribute bitmap.
2045 */
2046 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2047 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2048 xoap->xoa_archive =
2049 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2050 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2051 }
2052
2053 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2054 xoap->xoa_readonly =
2055 ((zp->z_pflags & ZFS_READONLY) != 0);
2056 XVA_SET_RTN(xvap, XAT_READONLY);
2057 }
2058
2059 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2060 xoap->xoa_system =
2061 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2062 XVA_SET_RTN(xvap, XAT_SYSTEM);
2063 }
2064
2065 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2066 xoap->xoa_hidden =
2067 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2068 XVA_SET_RTN(xvap, XAT_HIDDEN);
2069 }
2070
2071 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2072 xoap->xoa_nounlink =
2073 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2074 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2075 }
2076
2077 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2078 xoap->xoa_immutable =
2079 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2080 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2081 }
2082
2083 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2084 xoap->xoa_appendonly =
2085 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2086 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2087 }
2088
2089 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2090 xoap->xoa_nodump =
2091 ((zp->z_pflags & ZFS_NODUMP) != 0);
2092 XVA_SET_RTN(xvap, XAT_NODUMP);
2093 }
2094
2095 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2096 xoap->xoa_opaque =
2097 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2098 XVA_SET_RTN(xvap, XAT_OPAQUE);
2099 }
2100
2101 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2102 xoap->xoa_av_quarantined =
2103 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2104 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2105 }
2106
2107 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2108 xoap->xoa_av_modified =
2109 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2110 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2111 }
2112
2113 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2114 vp->v_type == VREG) {
2115 zfs_sa_get_scanstamp(zp, xvap);
2116 }
2117
2118 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2119 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2120 XVA_SET_RTN(xvap, XAT_REPARSE);
2121 }
2122 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2123 xoap->xoa_generation = zp->z_gen;
2124 XVA_SET_RTN(xvap, XAT_GEN);
2125 }
2126
2127 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2128 xoap->xoa_offline =
2129 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2130 XVA_SET_RTN(xvap, XAT_OFFLINE);
2131 }
2132
2133 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2134 xoap->xoa_sparse =
2135 ((zp->z_pflags & ZFS_SPARSE) != 0);
2136 XVA_SET_RTN(xvap, XAT_SPARSE);
2137 }
2138
2139 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2140 xoap->xoa_projinherit =
2141 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2142 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2143 }
2144
2145 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2146 xoap->xoa_projid = zp->z_projid;
2147 XVA_SET_RTN(xvap, XAT_PROJID);
2148 }
2149 }
2150
2151 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2152 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2153 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2154 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2155
2156
2157 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2158 vap->va_blksize = blksize;
2159 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2160
2161 if (zp->z_blksz == 0) {
2162 /*
2163 * Block size hasn't been set; suggest maximal I/O transfers.
2164 */
2165 vap->va_blksize = zfsvfs->z_max_blksz;
2166 }
2167
2168 ZFS_EXIT(zfsvfs);
2169 return (0);
2170 }
2171
2172 /*
2173 * Set the file attributes to the values contained in the
2174 * vattr structure.
2175 *
2176 * IN: zp - znode of file to be modified.
2177 * vap - new attribute values.
2178 * If AT_XVATTR set, then optional attrs are being set
2179 * flags - ATTR_UTIME set if non-default time values provided.
2180 * - ATTR_NOACLCHECK (CIFS context only).
2181 * cr - credentials of caller.
2182 * ct - caller context
2183 *
2184 * RETURN: 0 on success, error code on failure.
2185 *
2186 * Timestamps:
2187 * vp - ctime updated, mtime updated if size changed.
2188 */
2189 /* ARGSUSED */
2190 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr)2191 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
2192 {
2193 vnode_t *vp = ZTOV(zp);
2194 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2195 objset_t *os = zfsvfs->z_os;
2196 zilog_t *zilog;
2197 dmu_tx_t *tx;
2198 vattr_t oldva;
2199 xvattr_t tmpxvattr;
2200 uint_t mask = vap->va_mask;
2201 uint_t saved_mask = 0;
2202 uint64_t saved_mode;
2203 int trim_mask = 0;
2204 uint64_t new_mode;
2205 uint64_t new_uid, new_gid;
2206 uint64_t xattr_obj;
2207 uint64_t mtime[2], ctime[2];
2208 uint64_t projid = ZFS_INVALID_PROJID;
2209 znode_t *attrzp;
2210 int need_policy = FALSE;
2211 int err, err2;
2212 zfs_fuid_info_t *fuidp = NULL;
2213 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2214 xoptattr_t *xoap;
2215 zfs_acl_t *aclp;
2216 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2217 boolean_t fuid_dirtied = B_FALSE;
2218 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2219 int count = 0, xattr_count = 0;
2220
2221 if (mask == 0)
2222 return (0);
2223
2224 if (mask & AT_NOSET)
2225 return (SET_ERROR(EINVAL));
2226
2227 ZFS_ENTER(zfsvfs);
2228 ZFS_VERIFY_ZP(zp);
2229
2230 zilog = zfsvfs->z_log;
2231
2232 /*
2233 * Make sure that if we have ephemeral uid/gid or xvattr specified
2234 * that file system is at proper version level
2235 */
2236
2237 if (zfsvfs->z_use_fuids == B_FALSE &&
2238 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2239 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2240 (mask & AT_XVATTR))) {
2241 ZFS_EXIT(zfsvfs);
2242 return (SET_ERROR(EINVAL));
2243 }
2244
2245 if (mask & AT_SIZE && vp->v_type == VDIR) {
2246 ZFS_EXIT(zfsvfs);
2247 return (SET_ERROR(EISDIR));
2248 }
2249
2250 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2251 ZFS_EXIT(zfsvfs);
2252 return (SET_ERROR(EINVAL));
2253 }
2254
2255 /*
2256 * If this is an xvattr_t, then get a pointer to the structure of
2257 * optional attributes. If this is NULL, then we have a vattr_t.
2258 */
2259 xoap = xva_getxoptattr(xvap);
2260
2261 xva_init(&tmpxvattr);
2262
2263 /*
2264 * Immutable files can only alter immutable bit and atime
2265 */
2266 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2267 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2268 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2269 ZFS_EXIT(zfsvfs);
2270 return (SET_ERROR(EPERM));
2271 }
2272
2273 /*
2274 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2275 */
2276
2277 /*
2278 * Verify timestamps doesn't overflow 32 bits.
2279 * ZFS can handle large timestamps, but 32bit syscalls can't
2280 * handle times greater than 2039. This check should be removed
2281 * once large timestamps are fully supported.
2282 */
2283 if (mask & (AT_ATIME | AT_MTIME)) {
2284 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2285 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2286 ZFS_EXIT(zfsvfs);
2287 return (SET_ERROR(EOVERFLOW));
2288 }
2289 }
2290 if (xoap != NULL && (mask & AT_XVATTR)) {
2291 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2292 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2293 ZFS_EXIT(zfsvfs);
2294 return (SET_ERROR(EOVERFLOW));
2295 }
2296
2297 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2298 if (!dmu_objset_projectquota_enabled(os) ||
2299 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2300 ZFS_EXIT(zfsvfs);
2301 return (SET_ERROR(EOPNOTSUPP));
2302 }
2303
2304 projid = xoap->xoa_projid;
2305 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2306 ZFS_EXIT(zfsvfs);
2307 return (SET_ERROR(EINVAL));
2308 }
2309
2310 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2311 projid = ZFS_INVALID_PROJID;
2312 else
2313 need_policy = TRUE;
2314 }
2315
2316 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2317 (xoap->xoa_projinherit !=
2318 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2319 (!dmu_objset_projectquota_enabled(os) ||
2320 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2321 ZFS_EXIT(zfsvfs);
2322 return (SET_ERROR(EOPNOTSUPP));
2323 }
2324 }
2325
2326 attrzp = NULL;
2327 aclp = NULL;
2328
2329 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2330 ZFS_EXIT(zfsvfs);
2331 return (SET_ERROR(EROFS));
2332 }
2333
2334 /*
2335 * First validate permissions
2336 */
2337
2338 if (mask & AT_SIZE) {
2339 /*
2340 * XXX - Note, we are not providing any open
2341 * mode flags here (like FNDELAY), so we may
2342 * block if there are locks present... this
2343 * should be addressed in openat().
2344 */
2345 /* XXX - would it be OK to generate a log record here? */
2346 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2347 if (err) {
2348 ZFS_EXIT(zfsvfs);
2349 return (err);
2350 }
2351 }
2352
2353 if (mask & (AT_ATIME|AT_MTIME) ||
2354 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2355 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2356 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2357 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2358 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2359 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2360 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2361 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2362 skipaclchk, cr);
2363 }
2364
2365 if (mask & (AT_UID|AT_GID)) {
2366 int idmask = (mask & (AT_UID|AT_GID));
2367 int take_owner;
2368 int take_group;
2369
2370 /*
2371 * NOTE: even if a new mode is being set,
2372 * we may clear S_ISUID/S_ISGID bits.
2373 */
2374
2375 if (!(mask & AT_MODE))
2376 vap->va_mode = zp->z_mode;
2377
2378 /*
2379 * Take ownership or chgrp to group we are a member of
2380 */
2381
2382 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2383 take_group = (mask & AT_GID) &&
2384 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2385
2386 /*
2387 * If both AT_UID and AT_GID are set then take_owner and
2388 * take_group must both be set in order to allow taking
2389 * ownership.
2390 *
2391 * Otherwise, send the check through secpolicy_vnode_setattr()
2392 *
2393 */
2394
2395 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2396 ((idmask == AT_UID) && take_owner) ||
2397 ((idmask == AT_GID) && take_group)) {
2398 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2399 skipaclchk, cr) == 0) {
2400 /*
2401 * Remove setuid/setgid for non-privileged users
2402 */
2403 secpolicy_setid_clear(vap, vp, cr);
2404 trim_mask = (mask & (AT_UID|AT_GID));
2405 } else {
2406 need_policy = TRUE;
2407 }
2408 } else {
2409 need_policy = TRUE;
2410 }
2411 }
2412
2413 oldva.va_mode = zp->z_mode;
2414 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2415 if (mask & AT_XVATTR) {
2416 /*
2417 * Update xvattr mask to include only those attributes
2418 * that are actually changing.
2419 *
2420 * the bits will be restored prior to actually setting
2421 * the attributes so the caller thinks they were set.
2422 */
2423 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2424 if (xoap->xoa_appendonly !=
2425 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2426 need_policy = TRUE;
2427 } else {
2428 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2429 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2430 }
2431 }
2432
2433 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2434 if (xoap->xoa_projinherit !=
2435 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2436 need_policy = TRUE;
2437 } else {
2438 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2439 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2440 }
2441 }
2442
2443 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2444 if (xoap->xoa_nounlink !=
2445 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2446 need_policy = TRUE;
2447 } else {
2448 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2449 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2450 }
2451 }
2452
2453 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2454 if (xoap->xoa_immutable !=
2455 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2456 need_policy = TRUE;
2457 } else {
2458 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2459 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2460 }
2461 }
2462
2463 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2464 if (xoap->xoa_nodump !=
2465 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2466 need_policy = TRUE;
2467 } else {
2468 XVA_CLR_REQ(xvap, XAT_NODUMP);
2469 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2470 }
2471 }
2472
2473 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2474 if (xoap->xoa_av_modified !=
2475 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2476 need_policy = TRUE;
2477 } else {
2478 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2479 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2480 }
2481 }
2482
2483 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2484 if ((vp->v_type != VREG &&
2485 xoap->xoa_av_quarantined) ||
2486 xoap->xoa_av_quarantined !=
2487 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2488 need_policy = TRUE;
2489 } else {
2490 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2491 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2492 }
2493 }
2494
2495 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2496 ZFS_EXIT(zfsvfs);
2497 return (SET_ERROR(EPERM));
2498 }
2499
2500 if (need_policy == FALSE &&
2501 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2502 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2503 need_policy = TRUE;
2504 }
2505 }
2506
2507 if (mask & AT_MODE) {
2508 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
2509 err = secpolicy_setid_setsticky_clear(vp, vap,
2510 &oldva, cr);
2511 if (err) {
2512 ZFS_EXIT(zfsvfs);
2513 return (err);
2514 }
2515 trim_mask |= AT_MODE;
2516 } else {
2517 need_policy = TRUE;
2518 }
2519 }
2520
2521 if (need_policy) {
2522 /*
2523 * If trim_mask is set then take ownership
2524 * has been granted or write_acl is present and user
2525 * has the ability to modify mode. In that case remove
2526 * UID|GID and or MODE from mask so that
2527 * secpolicy_vnode_setattr() doesn't revoke it.
2528 */
2529
2530 if (trim_mask) {
2531 saved_mask = vap->va_mask;
2532 vap->va_mask &= ~trim_mask;
2533 if (trim_mask & AT_MODE) {
2534 /*
2535 * Save the mode, as secpolicy_vnode_setattr()
2536 * will overwrite it with ova.va_mode.
2537 */
2538 saved_mode = vap->va_mode;
2539 }
2540 }
2541 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2542 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2543 if (err) {
2544 ZFS_EXIT(zfsvfs);
2545 return (err);
2546 }
2547
2548 if (trim_mask) {
2549 vap->va_mask |= saved_mask;
2550 if (trim_mask & AT_MODE) {
2551 /*
2552 * Recover the mode after
2553 * secpolicy_vnode_setattr().
2554 */
2555 vap->va_mode = saved_mode;
2556 }
2557 }
2558 }
2559
2560 /*
2561 * secpolicy_vnode_setattr, or take ownership may have
2562 * changed va_mask
2563 */
2564 mask = vap->va_mask;
2565
2566 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2567 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2568 &xattr_obj, sizeof (xattr_obj));
2569
2570 if (err == 0 && xattr_obj) {
2571 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2572 if (err == 0) {
2573 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2574 if (err != 0)
2575 vrele(ZTOV(attrzp));
2576 }
2577 if (err)
2578 goto out2;
2579 }
2580 if (mask & AT_UID) {
2581 new_uid = zfs_fuid_create(zfsvfs,
2582 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2583 if (new_uid != zp->z_uid &&
2584 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2585 new_uid)) {
2586 if (attrzp)
2587 vput(ZTOV(attrzp));
2588 err = SET_ERROR(EDQUOT);
2589 goto out2;
2590 }
2591 }
2592
2593 if (mask & AT_GID) {
2594 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2595 cr, ZFS_GROUP, &fuidp);
2596 if (new_gid != zp->z_gid &&
2597 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2598 new_gid)) {
2599 if (attrzp)
2600 vput(ZTOV(attrzp));
2601 err = SET_ERROR(EDQUOT);
2602 goto out2;
2603 }
2604 }
2605
2606 if (projid != ZFS_INVALID_PROJID &&
2607 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2608 if (attrzp)
2609 vput(ZTOV(attrzp));
2610 err = SET_ERROR(EDQUOT);
2611 goto out2;
2612 }
2613 }
2614 tx = dmu_tx_create(os);
2615
2616 if (mask & AT_MODE) {
2617 uint64_t pmode = zp->z_mode;
2618 uint64_t acl_obj;
2619 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2620
2621 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2622 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2623 err = SET_ERROR(EPERM);
2624 goto out;
2625 }
2626
2627 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2628 goto out;
2629
2630 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2631 /*
2632 * Are we upgrading ACL from old V0 format
2633 * to V1 format?
2634 */
2635 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2636 zfs_znode_acl_version(zp) ==
2637 ZFS_ACL_VERSION_INITIAL) {
2638 dmu_tx_hold_free(tx, acl_obj, 0,
2639 DMU_OBJECT_END);
2640 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2641 0, aclp->z_acl_bytes);
2642 } else {
2643 dmu_tx_hold_write(tx, acl_obj, 0,
2644 aclp->z_acl_bytes);
2645 }
2646 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2647 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2648 0, aclp->z_acl_bytes);
2649 }
2650 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2651 } else {
2652 if (((mask & AT_XVATTR) &&
2653 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2654 (projid != ZFS_INVALID_PROJID &&
2655 !(zp->z_pflags & ZFS_PROJID)))
2656 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2657 else
2658 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2659 }
2660
2661 if (attrzp) {
2662 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2663 }
2664
2665 fuid_dirtied = zfsvfs->z_fuid_dirty;
2666 if (fuid_dirtied)
2667 zfs_fuid_txhold(zfsvfs, tx);
2668
2669 zfs_sa_upgrade_txholds(tx, zp);
2670
2671 err = dmu_tx_assign(tx, TXG_WAIT);
2672 if (err)
2673 goto out;
2674
2675 count = 0;
2676 /*
2677 * Set each attribute requested.
2678 * We group settings according to the locks they need to acquire.
2679 *
2680 * Note: you cannot set ctime directly, although it will be
2681 * updated as a side-effect of calling this function.
2682 */
2683
2684 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2685 /*
2686 * For the existed object that is upgraded from old system,
2687 * its on-disk layout has no slot for the project ID attribute.
2688 * But quota accounting logic needs to access related slots by
2689 * offset directly. So we need to adjust old objects' layout
2690 * to make the project ID to some unified and fixed offset.
2691 */
2692 if (attrzp)
2693 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2694 if (err == 0)
2695 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2696
2697 if (unlikely(err == EEXIST))
2698 err = 0;
2699 else if (err != 0)
2700 goto out;
2701 else
2702 projid = ZFS_INVALID_PROJID;
2703 }
2704
2705 if (mask & (AT_UID|AT_GID|AT_MODE))
2706 mutex_enter(&zp->z_acl_lock);
2707
2708 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2709 &zp->z_pflags, sizeof (zp->z_pflags));
2710
2711 if (attrzp) {
2712 if (mask & (AT_UID|AT_GID|AT_MODE))
2713 mutex_enter(&attrzp->z_acl_lock);
2714 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2715 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2716 sizeof (attrzp->z_pflags));
2717 if (projid != ZFS_INVALID_PROJID) {
2718 attrzp->z_projid = projid;
2719 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2720 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2721 sizeof (attrzp->z_projid));
2722 }
2723 }
2724
2725 if (mask & (AT_UID|AT_GID)) {
2726
2727 if (mask & AT_UID) {
2728 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2729 &new_uid, sizeof (new_uid));
2730 zp->z_uid = new_uid;
2731 if (attrzp) {
2732 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2733 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2734 sizeof (new_uid));
2735 attrzp->z_uid = new_uid;
2736 }
2737 }
2738
2739 if (mask & AT_GID) {
2740 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2741 NULL, &new_gid, sizeof (new_gid));
2742 zp->z_gid = new_gid;
2743 if (attrzp) {
2744 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2745 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2746 sizeof (new_gid));
2747 attrzp->z_gid = new_gid;
2748 }
2749 }
2750 if (!(mask & AT_MODE)) {
2751 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2752 NULL, &new_mode, sizeof (new_mode));
2753 new_mode = zp->z_mode;
2754 }
2755 err = zfs_acl_chown_setattr(zp);
2756 ASSERT(err == 0);
2757 if (attrzp) {
2758 err = zfs_acl_chown_setattr(attrzp);
2759 ASSERT(err == 0);
2760 }
2761 }
2762
2763 if (mask & AT_MODE) {
2764 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2765 &new_mode, sizeof (new_mode));
2766 zp->z_mode = new_mode;
2767 ASSERT3U((uintptr_t)aclp, !=, 0);
2768 err = zfs_aclset_common(zp, aclp, cr, tx);
2769 ASSERT0(err);
2770 if (zp->z_acl_cached)
2771 zfs_acl_free(zp->z_acl_cached);
2772 zp->z_acl_cached = aclp;
2773 aclp = NULL;
2774 }
2775
2776
2777 if (mask & AT_ATIME) {
2778 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2779 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2780 &zp->z_atime, sizeof (zp->z_atime));
2781 }
2782
2783 if (mask & AT_MTIME) {
2784 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2785 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2786 mtime, sizeof (mtime));
2787 }
2788
2789 if (projid != ZFS_INVALID_PROJID) {
2790 zp->z_projid = projid;
2791 SA_ADD_BULK_ATTR(bulk, count,
2792 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2793 sizeof (zp->z_projid));
2794 }
2795
2796 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2797 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2798 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2799 NULL, mtime, sizeof (mtime));
2800 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2801 &ctime, sizeof (ctime));
2802 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2803 } else if (mask != 0) {
2804 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2805 &ctime, sizeof (ctime));
2806 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2807 if (attrzp) {
2808 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2809 SA_ZPL_CTIME(zfsvfs), NULL,
2810 &ctime, sizeof (ctime));
2811 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2812 mtime, ctime);
2813 }
2814 }
2815
2816 /*
2817 * Do this after setting timestamps to prevent timestamp
2818 * update from toggling bit
2819 */
2820
2821 if (xoap && (mask & AT_XVATTR)) {
2822
2823 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2824 xoap->xoa_createtime = vap->va_birthtime;
2825 /*
2826 * restore trimmed off masks
2827 * so that return masks can be set for caller.
2828 */
2829
2830 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2831 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2832 }
2833 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2834 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2835 }
2836 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2837 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2838 }
2839 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2840 XVA_SET_REQ(xvap, XAT_NODUMP);
2841 }
2842 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2843 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2844 }
2845 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2846 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2847 }
2848 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2849 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2850 }
2851
2852 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2853 ASSERT(vp->v_type == VREG);
2854
2855 zfs_xvattr_set(zp, xvap, tx);
2856 }
2857
2858 if (fuid_dirtied)
2859 zfs_fuid_sync(zfsvfs, tx);
2860
2861 if (mask != 0)
2862 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2863
2864 if (mask & (AT_UID|AT_GID|AT_MODE))
2865 mutex_exit(&zp->z_acl_lock);
2866
2867 if (attrzp) {
2868 if (mask & (AT_UID|AT_GID|AT_MODE))
2869 mutex_exit(&attrzp->z_acl_lock);
2870 }
2871 out:
2872 if (err == 0 && attrzp) {
2873 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2874 xattr_count, tx);
2875 ASSERT(err2 == 0);
2876 }
2877
2878 if (attrzp)
2879 vput(ZTOV(attrzp));
2880
2881 if (aclp)
2882 zfs_acl_free(aclp);
2883
2884 if (fuidp) {
2885 zfs_fuid_info_free(fuidp);
2886 fuidp = NULL;
2887 }
2888
2889 if (err) {
2890 dmu_tx_abort(tx);
2891 } else {
2892 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2893 dmu_tx_commit(tx);
2894 }
2895
2896 out2:
2897 if (os->os_sync == ZFS_SYNC_ALWAYS)
2898 zil_commit(zilog, 0);
2899
2900 ZFS_EXIT(zfsvfs);
2901 return (err);
2902 }
2903
2904 /*
2905 * We acquire all but fdvp locks using non-blocking acquisitions. If we
2906 * fail to acquire any lock in the path we will drop all held locks,
2907 * acquire the new lock in a blocking fashion, and then release it and
2908 * restart the rename. This acquire/release step ensures that we do not
2909 * spin on a lock waiting for release. On error release all vnode locks
2910 * and decrement references the way tmpfs_rename() would do.
2911 */
2912 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)2913 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2914 struct vnode *tdvp, struct vnode **tvpp,
2915 const struct componentname *scnp, const struct componentname *tcnp)
2916 {
2917 zfsvfs_t *zfsvfs;
2918 struct vnode *nvp, *svp, *tvp;
2919 znode_t *sdzp, *tdzp, *szp, *tzp;
2920 const char *snm = scnp->cn_nameptr;
2921 const char *tnm = tcnp->cn_nameptr;
2922 int error;
2923
2924 VOP_UNLOCK1(tdvp);
2925 if (*tvpp != NULL && *tvpp != tdvp)
2926 VOP_UNLOCK1(*tvpp);
2927
2928 relock:
2929 error = vn_lock(sdvp, LK_EXCLUSIVE);
2930 if (error)
2931 goto out;
2932 sdzp = VTOZ(sdvp);
2933
2934 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2935 if (error != 0) {
2936 VOP_UNLOCK1(sdvp);
2937 if (error != EBUSY)
2938 goto out;
2939 error = vn_lock(tdvp, LK_EXCLUSIVE);
2940 if (error)
2941 goto out;
2942 VOP_UNLOCK1(tdvp);
2943 goto relock;
2944 }
2945 tdzp = VTOZ(tdvp);
2946
2947 /*
2948 * Before using sdzp and tdzp we must ensure that they are live.
2949 * As a porting legacy from illumos we have two things to worry
2950 * about. One is typical for FreeBSD and it is that the vnode is
2951 * not reclaimed (doomed). The other is that the znode is live.
2952 * The current code can invalidate the znode without acquiring the
2953 * corresponding vnode lock if the object represented by the znode
2954 * and vnode is no longer valid after a rollback or receive operation.
2955 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
2956 * that protects the znodes from the invalidation.
2957 */
2958 zfsvfs = sdzp->z_zfsvfs;
2959 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2960 ZFS_ENTER(zfsvfs);
2961
2962 /*
2963 * We can not use ZFS_VERIFY_ZP() here because it could directly return
2964 * bypassing the cleanup code in the case of an error.
2965 */
2966 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
2967 ZFS_EXIT(zfsvfs);
2968 VOP_UNLOCK1(sdvp);
2969 VOP_UNLOCK1(tdvp);
2970 error = SET_ERROR(EIO);
2971 goto out;
2972 }
2973
2974 /*
2975 * Re-resolve svp to be certain it still exists and fetch the
2976 * correct vnode.
2977 */
2978 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
2979 if (error != 0) {
2980 /* Source entry invalid or not there. */
2981 ZFS_EXIT(zfsvfs);
2982 VOP_UNLOCK1(sdvp);
2983 VOP_UNLOCK1(tdvp);
2984 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2985 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2986 error = SET_ERROR(EINVAL);
2987 goto out;
2988 }
2989 svp = ZTOV(szp);
2990
2991 /*
2992 * Re-resolve tvp, if it disappeared we just carry on.
2993 */
2994 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
2995 if (error != 0) {
2996 ZFS_EXIT(zfsvfs);
2997 VOP_UNLOCK1(sdvp);
2998 VOP_UNLOCK1(tdvp);
2999 vrele(svp);
3000 if ((tcnp->cn_flags & ISDOTDOT) != 0)
3001 error = SET_ERROR(EINVAL);
3002 goto out;
3003 }
3004 if (tzp != NULL)
3005 tvp = ZTOV(tzp);
3006 else
3007 tvp = NULL;
3008
3009 /*
3010 * At present the vnode locks must be acquired before z_teardown_lock,
3011 * although it would be more logical to use the opposite order.
3012 */
3013 ZFS_EXIT(zfsvfs);
3014
3015 /*
3016 * Now try acquire locks on svp and tvp.
3017 */
3018 nvp = svp;
3019 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3020 if (error != 0) {
3021 VOP_UNLOCK1(sdvp);
3022 VOP_UNLOCK1(tdvp);
3023 if (tvp != NULL)
3024 vrele(tvp);
3025 if (error != EBUSY) {
3026 vrele(nvp);
3027 goto out;
3028 }
3029 error = vn_lock(nvp, LK_EXCLUSIVE);
3030 if (error != 0) {
3031 vrele(nvp);
3032 goto out;
3033 }
3034 VOP_UNLOCK1(nvp);
3035 /*
3036 * Concurrent rename race.
3037 * XXX ?
3038 */
3039 if (nvp == tdvp) {
3040 vrele(nvp);
3041 error = SET_ERROR(EINVAL);
3042 goto out;
3043 }
3044 vrele(*svpp);
3045 *svpp = nvp;
3046 goto relock;
3047 }
3048 vrele(*svpp);
3049 *svpp = nvp;
3050
3051 if (*tvpp != NULL)
3052 vrele(*tvpp);
3053 *tvpp = NULL;
3054 if (tvp != NULL) {
3055 nvp = tvp;
3056 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3057 if (error != 0) {
3058 VOP_UNLOCK1(sdvp);
3059 VOP_UNLOCK1(tdvp);
3060 VOP_UNLOCK1(*svpp);
3061 if (error != EBUSY) {
3062 vrele(nvp);
3063 goto out;
3064 }
3065 error = vn_lock(nvp, LK_EXCLUSIVE);
3066 if (error != 0) {
3067 vrele(nvp);
3068 goto out;
3069 }
3070 vput(nvp);
3071 goto relock;
3072 }
3073 *tvpp = nvp;
3074 }
3075
3076 return (0);
3077
3078 out:
3079 return (error);
3080 }
3081
3082 /*
3083 * Note that we must use VRELE_ASYNC in this function as it walks
3084 * up the directory tree and vrele may need to acquire an exclusive
3085 * lock if a last reference to a vnode is dropped.
3086 */
3087 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)3088 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3089 {
3090 zfsvfs_t *zfsvfs;
3091 znode_t *zp, *zp1;
3092 uint64_t parent;
3093 int error;
3094
3095 zfsvfs = tdzp->z_zfsvfs;
3096 if (tdzp == szp)
3097 return (SET_ERROR(EINVAL));
3098 if (tdzp == sdzp)
3099 return (0);
3100 if (tdzp->z_id == zfsvfs->z_root)
3101 return (0);
3102 zp = tdzp;
3103 for (;;) {
3104 ASSERT(!zp->z_unlinked);
3105 if ((error = sa_lookup(zp->z_sa_hdl,
3106 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3107 break;
3108
3109 if (parent == szp->z_id) {
3110 error = SET_ERROR(EINVAL);
3111 break;
3112 }
3113 if (parent == zfsvfs->z_root)
3114 break;
3115 if (parent == sdzp->z_id)
3116 break;
3117
3118 error = zfs_zget(zfsvfs, parent, &zp1);
3119 if (error != 0)
3120 break;
3121
3122 if (zp != tdzp)
3123 VN_RELE_ASYNC(ZTOV(zp),
3124 dsl_pool_zrele_taskq(
3125 dmu_objset_pool(zfsvfs->z_os)));
3126 zp = zp1;
3127 }
3128
3129 if (error == ENOTDIR)
3130 panic("checkpath: .. not a directory\n");
3131 if (zp != tdzp)
3132 VN_RELE_ASYNC(ZTOV(zp),
3133 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3134 return (error);
3135 }
3136
3137 #if __FreeBSD_version < 1300124
3138 static void
cache_vop_rename(struct vnode * fdvp,struct vnode * fvp,struct vnode * tdvp,struct vnode * tvp,struct componentname * fcnp,struct componentname * tcnp)3139 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
3140 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
3141 {
3142
3143 cache_purge(fvp);
3144 if (tvp != NULL)
3145 cache_purge(tvp);
3146 cache_purge_negative(tdvp);
3147 }
3148 #endif
3149
3150 /*
3151 * Move an entry from the provided source directory to the target
3152 * directory. Change the entry name as indicated.
3153 *
3154 * IN: sdvp - Source directory containing the "old entry".
3155 * snm - Old entry name.
3156 * tdvp - Target directory to contain the "new entry".
3157 * tnm - New entry name.
3158 * cr - credentials of caller.
3159 * ct - caller context
3160 * flags - case flags
3161 *
3162 * RETURN: 0 on success, error code on failure.
3163 *
3164 * Timestamps:
3165 * sdvp,tdvp - ctime|mtime updated
3166 */
3167 /*ARGSUSED*/
3168 static int
zfs_rename_(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr,int log)3169 zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3170 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3171 cred_t *cr, int log)
3172 {
3173 zfsvfs_t *zfsvfs;
3174 znode_t *sdzp, *tdzp, *szp, *tzp;
3175 zilog_t *zilog = NULL;
3176 dmu_tx_t *tx;
3177 const char *snm = scnp->cn_nameptr;
3178 const char *tnm = tcnp->cn_nameptr;
3179 int error = 0;
3180 bool want_seqc_end __maybe_unused = false;
3181
3182 /* Reject renames across filesystems. */
3183 if ((*svpp)->v_mount != tdvp->v_mount ||
3184 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3185 error = SET_ERROR(EXDEV);
3186 goto out;
3187 }
3188
3189 if (zfsctl_is_node(tdvp)) {
3190 error = SET_ERROR(EXDEV);
3191 goto out;
3192 }
3193
3194 /*
3195 * Lock all four vnodes to ensure safety and semantics of renaming.
3196 */
3197 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3198 if (error != 0) {
3199 /* no vnodes are locked in the case of error here */
3200 return (error);
3201 }
3202
3203 tdzp = VTOZ(tdvp);
3204 sdzp = VTOZ(sdvp);
3205 zfsvfs = tdzp->z_zfsvfs;
3206 zilog = zfsvfs->z_log;
3207
3208 /*
3209 * After we re-enter ZFS_ENTER() we will have to revalidate all
3210 * znodes involved.
3211 */
3212 ZFS_ENTER(zfsvfs);
3213
3214 if (zfsvfs->z_utf8 && u8_validate(tnm,
3215 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3216 error = SET_ERROR(EILSEQ);
3217 goto unlockout;
3218 }
3219
3220 /* If source and target are the same file, there is nothing to do. */
3221 if ((*svpp) == (*tvpp)) {
3222 error = 0;
3223 goto unlockout;
3224 }
3225
3226 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3227 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3228 (*tvpp)->v_mountedhere != NULL)) {
3229 error = SET_ERROR(EXDEV);
3230 goto unlockout;
3231 }
3232
3233 /*
3234 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3235 * bypassing the cleanup code in the case of an error.
3236 */
3237 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
3238 error = SET_ERROR(EIO);
3239 goto unlockout;
3240 }
3241
3242 szp = VTOZ(*svpp);
3243 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3244 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
3245 error = SET_ERROR(EIO);
3246 goto unlockout;
3247 }
3248
3249 /*
3250 * This is to prevent the creation of links into attribute space
3251 * by renaming a linked file into/outof an attribute directory.
3252 * See the comment in zfs_link() for why this is considered bad.
3253 */
3254 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3255 error = SET_ERROR(EINVAL);
3256 goto unlockout;
3257 }
3258
3259 /*
3260 * If we are using project inheritance, means if the directory has
3261 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3262 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3263 * such case, we only allow renames into our tree when the project
3264 * IDs are the same.
3265 */
3266 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3267 tdzp->z_projid != szp->z_projid) {
3268 error = SET_ERROR(EXDEV);
3269 goto unlockout;
3270 }
3271
3272 /*
3273 * Must have write access at the source to remove the old entry
3274 * and write access at the target to create the new entry.
3275 * Note that if target and source are the same, this can be
3276 * done in a single check.
3277 */
3278 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
3279 goto unlockout;
3280
3281 if ((*svpp)->v_type == VDIR) {
3282 /*
3283 * Avoid ".", "..", and aliases of "." for obvious reasons.
3284 */
3285 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3286 sdzp == szp ||
3287 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3288 error = EINVAL;
3289 goto unlockout;
3290 }
3291
3292 /*
3293 * Check to make sure rename is valid.
3294 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3295 */
3296 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3297 goto unlockout;
3298 }
3299
3300 /*
3301 * Does target exist?
3302 */
3303 if (tzp) {
3304 /*
3305 * Source and target must be the same type.
3306 */
3307 if ((*svpp)->v_type == VDIR) {
3308 if ((*tvpp)->v_type != VDIR) {
3309 error = SET_ERROR(ENOTDIR);
3310 goto unlockout;
3311 } else {
3312 cache_purge(tdvp);
3313 if (sdvp != tdvp)
3314 cache_purge(sdvp);
3315 }
3316 } else {
3317 if ((*tvpp)->v_type == VDIR) {
3318 error = SET_ERROR(EISDIR);
3319 goto unlockout;
3320 }
3321 }
3322 }
3323
3324 vn_seqc_write_begin(*svpp);
3325 vn_seqc_write_begin(sdvp);
3326 if (*tvpp != NULL)
3327 vn_seqc_write_begin(*tvpp);
3328 if (tdvp != *tvpp)
3329 vn_seqc_write_begin(tdvp);
3330 #if __FreeBSD_version >= 1300102
3331 want_seqc_end = true;
3332 #endif
3333 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3334 if (tzp)
3335 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3336
3337 /*
3338 * notify the target directory if it is not the same
3339 * as source directory.
3340 */
3341 if (tdvp != sdvp) {
3342 vnevent_rename_dest_dir(tdvp, ct);
3343 }
3344
3345 tx = dmu_tx_create(zfsvfs->z_os);
3346 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3347 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3348 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3349 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3350 if (sdzp != tdzp) {
3351 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3352 zfs_sa_upgrade_txholds(tx, tdzp);
3353 }
3354 if (tzp) {
3355 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3356 zfs_sa_upgrade_txholds(tx, tzp);
3357 }
3358
3359 zfs_sa_upgrade_txholds(tx, szp);
3360 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3361 error = dmu_tx_assign(tx, TXG_WAIT);
3362 if (error) {
3363 dmu_tx_abort(tx);
3364 goto unlockout;
3365 }
3366
3367
3368 if (tzp) /* Attempt to remove the existing target */
3369 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3370
3371 if (error == 0) {
3372 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3373 if (error == 0) {
3374 szp->z_pflags |= ZFS_AV_MODIFIED;
3375
3376 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3377 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3378 ASSERT0(error);
3379
3380 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3381 NULL);
3382 if (error == 0) {
3383 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3384 snm, tdzp, tnm, szp);
3385
3386 /*
3387 * Update path information for the target vnode
3388 */
3389 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
3390 } else {
3391 /*
3392 * At this point, we have successfully created
3393 * the target name, but have failed to remove
3394 * the source name. Since the create was done
3395 * with the ZRENAMING flag, there are
3396 * complications; for one, the link count is
3397 * wrong. The easiest way to deal with this
3398 * is to remove the newly created target, and
3399 * return the original error. This must
3400 * succeed; fortunately, it is very unlikely to
3401 * fail, since we just created it.
3402 */
3403 VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx,
3404 ZRENAMING, NULL), ==, 0);
3405 }
3406 }
3407 if (error == 0) {
3408 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3409 }
3410 }
3411
3412 dmu_tx_commit(tx);
3413
3414 unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */
3415 ZFS_EXIT(zfsvfs);
3416 if (want_seqc_end) {
3417 vn_seqc_write_end(*svpp);
3418 vn_seqc_write_end(sdvp);
3419 if (*tvpp != NULL)
3420 vn_seqc_write_end(*tvpp);
3421 if (tdvp != *tvpp)
3422 vn_seqc_write_end(tdvp);
3423 want_seqc_end = false;
3424 }
3425 VOP_UNLOCK1(*svpp);
3426 VOP_UNLOCK1(sdvp);
3427
3428 out: /* original two vnodes are locked */
3429 MPASS(!want_seqc_end);
3430 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3431 zil_commit(zilog, 0);
3432
3433 if (*tvpp != NULL)
3434 VOP_UNLOCK1(*tvpp);
3435 if (tdvp != *tvpp)
3436 VOP_UNLOCK1(tdvp);
3437 return (error);
3438 }
3439
3440 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags)3441 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3442 cred_t *cr, int flags)
3443 {
3444 struct componentname scn, tcn;
3445 vnode_t *sdvp, *tdvp;
3446 vnode_t *svp, *tvp;
3447 int error;
3448 svp = tvp = NULL;
3449
3450 sdvp = ZTOV(sdzp);
3451 tdvp = ZTOV(tdzp);
3452 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3453 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3454 VOP_UNLOCK1(sdvp);
3455 if (error != 0)
3456 goto fail;
3457 VOP_UNLOCK1(svp);
3458
3459 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3460 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3461 if (error == EJUSTRETURN)
3462 tvp = NULL;
3463 else if (error != 0) {
3464 VOP_UNLOCK1(tdvp);
3465 goto fail;
3466 }
3467
3468 error = zfs_rename_(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr, 0);
3469 fail:
3470 if (svp != NULL)
3471 vrele(svp);
3472 if (tvp != NULL)
3473 vrele(tvp);
3474
3475 return (error);
3476 }
3477
3478 /*
3479 * Insert the indicated symbolic reference entry into the directory.
3480 *
3481 * IN: dvp - Directory to contain new symbolic link.
3482 * link - Name for new symlink entry.
3483 * vap - Attributes of new entry.
3484 * cr - credentials of caller.
3485 * ct - caller context
3486 * flags - case flags
3487 *
3488 * RETURN: 0 on success, error code on failure.
3489 *
3490 * Timestamps:
3491 * dvp - ctime|mtime updated
3492 */
3493 /*ARGSUSED*/
3494 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags)3495 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3496 const char *link, znode_t **zpp, cred_t *cr, int flags)
3497 {
3498 znode_t *zp;
3499 dmu_tx_t *tx;
3500 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3501 zilog_t *zilog;
3502 uint64_t len = strlen(link);
3503 int error;
3504 zfs_acl_ids_t acl_ids;
3505 boolean_t fuid_dirtied;
3506 uint64_t txtype = TX_SYMLINK;
3507
3508 ASSERT(vap->va_type == VLNK);
3509
3510 ZFS_ENTER(zfsvfs);
3511 ZFS_VERIFY_ZP(dzp);
3512 zilog = zfsvfs->z_log;
3513
3514 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3515 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3516 ZFS_EXIT(zfsvfs);
3517 return (SET_ERROR(EILSEQ));
3518 }
3519
3520 if (len > MAXPATHLEN) {
3521 ZFS_EXIT(zfsvfs);
3522 return (SET_ERROR(ENAMETOOLONG));
3523 }
3524
3525 if ((error = zfs_acl_ids_create(dzp, 0,
3526 vap, cr, NULL, &acl_ids)) != 0) {
3527 ZFS_EXIT(zfsvfs);
3528 return (error);
3529 }
3530
3531 /*
3532 * Attempt to lock directory; fail if entry already exists.
3533 */
3534 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3535 if (error) {
3536 zfs_acl_ids_free(&acl_ids);
3537 ZFS_EXIT(zfsvfs);
3538 return (error);
3539 }
3540
3541 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3542 zfs_acl_ids_free(&acl_ids);
3543 ZFS_EXIT(zfsvfs);
3544 return (error);
3545 }
3546
3547 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3548 0 /* projid */)) {
3549 zfs_acl_ids_free(&acl_ids);
3550 ZFS_EXIT(zfsvfs);
3551 return (SET_ERROR(EDQUOT));
3552 }
3553
3554 getnewvnode_reserve_();
3555 tx = dmu_tx_create(zfsvfs->z_os);
3556 fuid_dirtied = zfsvfs->z_fuid_dirty;
3557 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3558 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3559 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3560 ZFS_SA_BASE_ATTR_SIZE + len);
3561 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3562 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3563 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3564 acl_ids.z_aclp->z_acl_bytes);
3565 }
3566 if (fuid_dirtied)
3567 zfs_fuid_txhold(zfsvfs, tx);
3568 error = dmu_tx_assign(tx, TXG_WAIT);
3569 if (error) {
3570 zfs_acl_ids_free(&acl_ids);
3571 dmu_tx_abort(tx);
3572 getnewvnode_drop_reserve();
3573 ZFS_EXIT(zfsvfs);
3574 return (error);
3575 }
3576
3577 /*
3578 * Create a new object for the symlink.
3579 * for version 4 ZPL datsets the symlink will be an SA attribute
3580 */
3581 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3582
3583 if (fuid_dirtied)
3584 zfs_fuid_sync(zfsvfs, tx);
3585
3586 if (zp->z_is_sa)
3587 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3588 __DECONST(void *, link), len, tx);
3589 else
3590 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3591
3592 zp->z_size = len;
3593 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3594 &zp->z_size, sizeof (zp->z_size), tx);
3595 /*
3596 * Insert the new object into the directory.
3597 */
3598 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
3599
3600 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3601 *zpp = zp;
3602
3603 zfs_acl_ids_free(&acl_ids);
3604
3605 dmu_tx_commit(tx);
3606
3607 getnewvnode_drop_reserve();
3608
3609 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3610 zil_commit(zilog, 0);
3611
3612 ZFS_EXIT(zfsvfs);
3613 return (error);
3614 }
3615
3616 /*
3617 * Return, in the buffer contained in the provided uio structure,
3618 * the symbolic path referred to by vp.
3619 *
3620 * IN: vp - vnode of symbolic link.
3621 * uio - structure to contain the link path.
3622 * cr - credentials of caller.
3623 * ct - caller context
3624 *
3625 * OUT: uio - structure containing the link path.
3626 *
3627 * RETURN: 0 on success, error code on failure.
3628 *
3629 * Timestamps:
3630 * vp - atime updated
3631 */
3632 /* ARGSUSED */
3633 static int
zfs_readlink(vnode_t * vp,uio_t * uio,cred_t * cr,caller_context_t * ct)3634 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
3635 {
3636 znode_t *zp = VTOZ(vp);
3637 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3638 int error;
3639
3640 ZFS_ENTER(zfsvfs);
3641 ZFS_VERIFY_ZP(zp);
3642
3643 if (zp->z_is_sa)
3644 error = sa_lookup_uio(zp->z_sa_hdl,
3645 SA_ZPL_SYMLINK(zfsvfs), uio);
3646 else
3647 error = zfs_sa_readlink(zp, uio);
3648
3649 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3650
3651 ZFS_EXIT(zfsvfs);
3652 return (error);
3653 }
3654
3655 /*
3656 * Insert a new entry into directory tdvp referencing svp.
3657 *
3658 * IN: tdvp - Directory to contain new entry.
3659 * svp - vnode of new entry.
3660 * name - name of new entry.
3661 * cr - credentials of caller.
3662 *
3663 * RETURN: 0 on success, error code on failure.
3664 *
3665 * Timestamps:
3666 * tdvp - ctime|mtime updated
3667 * svp - ctime updated
3668 */
3669 /* ARGSUSED */
3670 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3671 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3672 int flags)
3673 {
3674 znode_t *tzp;
3675 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3676 zilog_t *zilog;
3677 dmu_tx_t *tx;
3678 int error;
3679 uint64_t parent;
3680 uid_t owner;
3681
3682 ASSERT(ZTOV(tdzp)->v_type == VDIR);
3683
3684 ZFS_ENTER(zfsvfs);
3685 ZFS_VERIFY_ZP(tdzp);
3686 zilog = zfsvfs->z_log;
3687
3688 /*
3689 * POSIX dictates that we return EPERM here.
3690 * Better choices include ENOTSUP or EISDIR.
3691 */
3692 if (ZTOV(szp)->v_type == VDIR) {
3693 ZFS_EXIT(zfsvfs);
3694 return (SET_ERROR(EPERM));
3695 }
3696
3697 ZFS_VERIFY_ZP(szp);
3698
3699 /*
3700 * If we are using project inheritance, means if the directory has
3701 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3702 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3703 * such case, we only allow hard link creation in our tree when the
3704 * project IDs are the same.
3705 */
3706 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3707 tdzp->z_projid != szp->z_projid) {
3708 ZFS_EXIT(zfsvfs);
3709 return (SET_ERROR(EXDEV));
3710 }
3711
3712 if (szp->z_pflags & (ZFS_APPENDONLY |
3713 ZFS_IMMUTABLE | ZFS_READONLY)) {
3714 ZFS_EXIT(zfsvfs);
3715 return (SET_ERROR(EPERM));
3716 }
3717
3718 /* Prevent links to .zfs/shares files */
3719
3720 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3721 &parent, sizeof (uint64_t))) != 0) {
3722 ZFS_EXIT(zfsvfs);
3723 return (error);
3724 }
3725 if (parent == zfsvfs->z_shares_dir) {
3726 ZFS_EXIT(zfsvfs);
3727 return (SET_ERROR(EPERM));
3728 }
3729
3730 if (zfsvfs->z_utf8 && u8_validate(name,
3731 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3732 ZFS_EXIT(zfsvfs);
3733 return (SET_ERROR(EILSEQ));
3734 }
3735
3736 /*
3737 * We do not support links between attributes and non-attributes
3738 * because of the potential security risk of creating links
3739 * into "normal" file space in order to circumvent restrictions
3740 * imposed in attribute space.
3741 */
3742 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3743 ZFS_EXIT(zfsvfs);
3744 return (SET_ERROR(EINVAL));
3745 }
3746
3747
3748 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3749 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3750 ZFS_EXIT(zfsvfs);
3751 return (SET_ERROR(EPERM));
3752 }
3753
3754 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3755 ZFS_EXIT(zfsvfs);
3756 return (error);
3757 }
3758
3759 /*
3760 * Attempt to lock directory; fail if entry already exists.
3761 */
3762 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3763 if (error) {
3764 ZFS_EXIT(zfsvfs);
3765 return (error);
3766 }
3767
3768 tx = dmu_tx_create(zfsvfs->z_os);
3769 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3770 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3771 zfs_sa_upgrade_txholds(tx, szp);
3772 zfs_sa_upgrade_txholds(tx, tdzp);
3773 error = dmu_tx_assign(tx, TXG_WAIT);
3774 if (error) {
3775 dmu_tx_abort(tx);
3776 ZFS_EXIT(zfsvfs);
3777 return (error);
3778 }
3779
3780 error = zfs_link_create(tdzp, name, szp, tx, 0);
3781
3782 if (error == 0) {
3783 uint64_t txtype = TX_LINK;
3784 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3785 }
3786
3787 dmu_tx_commit(tx);
3788
3789 if (error == 0) {
3790 vnevent_link(ZTOV(szp), ct);
3791 }
3792
3793 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3794 zil_commit(zilog, 0);
3795
3796 ZFS_EXIT(zfsvfs);
3797 return (error);
3798 }
3799
3800 /*
3801 * Free or allocate space in a file. Currently, this function only
3802 * supports the `F_FREESP' command. However, this command is somewhat
3803 * misnamed, as its functionality includes the ability to allocate as
3804 * well as free space.
3805 *
3806 * IN: ip - inode of file to free data in.
3807 * cmd - action to take (only F_FREESP supported).
3808 * bfp - section of file to free/alloc.
3809 * flag - current file open mode flags.
3810 * offset - current file offset.
3811 * cr - credentials of caller.
3812 *
3813 * RETURN: 0 on success, error code on failure.
3814 *
3815 * Timestamps:
3816 * ip - ctime|mtime updated
3817 */
3818 /* ARGSUSED */
3819 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3820 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3821 offset_t offset, cred_t *cr)
3822 {
3823 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3824 uint64_t off, len;
3825 int error;
3826
3827 ZFS_ENTER(zfsvfs);
3828 ZFS_VERIFY_ZP(zp);
3829
3830 if (cmd != F_FREESP) {
3831 ZFS_EXIT(zfsvfs);
3832 return (SET_ERROR(EINVAL));
3833 }
3834
3835 /*
3836 * Callers might not be able to detect properly that we are read-only,
3837 * so check it explicitly here.
3838 */
3839 if (zfs_is_readonly(zfsvfs)) {
3840 ZFS_EXIT(zfsvfs);
3841 return (SET_ERROR(EROFS));
3842 }
3843
3844 if (bfp->l_len < 0) {
3845 ZFS_EXIT(zfsvfs);
3846 return (SET_ERROR(EINVAL));
3847 }
3848
3849 /*
3850 * Permissions aren't checked on Solaris because on this OS
3851 * zfs_space() can only be called with an opened file handle.
3852 * On Linux we can get here through truncate_range() which
3853 * operates directly on inodes, so we need to check access rights.
3854 */
3855 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
3856 ZFS_EXIT(zfsvfs);
3857 return (error);
3858 }
3859
3860 off = bfp->l_start;
3861 len = bfp->l_len; /* 0 means from off to end of file */
3862
3863 error = zfs_freesp(zp, off, len, flag, TRUE);
3864
3865 ZFS_EXIT(zfsvfs);
3866 return (error);
3867 }
3868
3869 /*ARGSUSED*/
3870 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3871 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3872 {
3873 znode_t *zp = VTOZ(vp);
3874 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3875 int error;
3876
3877 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3878 if (zp->z_sa_hdl == NULL) {
3879 /*
3880 * The fs has been unmounted, or we did a
3881 * suspend/resume and this file no longer exists.
3882 */
3883 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3884 vrecycle(vp);
3885 return;
3886 }
3887
3888 if (zp->z_unlinked) {
3889 /*
3890 * Fast path to recycle a vnode of a removed file.
3891 */
3892 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3893 vrecycle(vp);
3894 return;
3895 }
3896
3897 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3898 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3899
3900 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3901 zfs_sa_upgrade_txholds(tx, zp);
3902 error = dmu_tx_assign(tx, TXG_WAIT);
3903 if (error) {
3904 dmu_tx_abort(tx);
3905 } else {
3906 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3907 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3908 zp->z_atime_dirty = 0;
3909 dmu_tx_commit(tx);
3910 }
3911 }
3912 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3913 }
3914
3915
3916 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid));
3917 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid));
3918
3919 /*ARGSUSED*/
3920 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3921 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3922 {
3923 znode_t *zp = VTOZ(vp);
3924 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3925 uint32_t gen;
3926 uint64_t gen64;
3927 uint64_t object = zp->z_id;
3928 zfid_short_t *zfid;
3929 int size, i, error;
3930
3931 ZFS_ENTER(zfsvfs);
3932 ZFS_VERIFY_ZP(zp);
3933
3934 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3935 &gen64, sizeof (uint64_t))) != 0) {
3936 ZFS_EXIT(zfsvfs);
3937 return (error);
3938 }
3939
3940 gen = (uint32_t)gen64;
3941
3942 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3943 fidp->fid_len = size;
3944
3945 zfid = (zfid_short_t *)fidp;
3946
3947 zfid->zf_len = size;
3948
3949 for (i = 0; i < sizeof (zfid->zf_object); i++)
3950 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3951
3952 /* Must have a non-zero generation number to distinguish from .zfs */
3953 if (gen == 0)
3954 gen = 1;
3955 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3956 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3957
3958 if (size == LONG_FID_LEN) {
3959 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3960 zfid_long_t *zlfid;
3961
3962 zlfid = (zfid_long_t *)fidp;
3963
3964 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3965 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3966
3967 /* XXX - this should be the generation number for the objset */
3968 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3969 zlfid->zf_setgen[i] = 0;
3970 }
3971
3972 ZFS_EXIT(zfsvfs);
3973 return (0);
3974 }
3975
3976 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)3977 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3978 caller_context_t *ct)
3979 {
3980 znode_t *zp;
3981 zfsvfs_t *zfsvfs;
3982
3983 switch (cmd) {
3984 case _PC_LINK_MAX:
3985 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
3986 return (0);
3987
3988 case _PC_FILESIZEBITS:
3989 *valp = 64;
3990 return (0);
3991 case _PC_MIN_HOLE_SIZE:
3992 *valp = (int)SPA_MINBLOCKSIZE;
3993 return (0);
3994 case _PC_ACL_EXTENDED:
3995 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
3996 zp = VTOZ(vp);
3997 zfsvfs = zp->z_zfsvfs;
3998 ZFS_ENTER(zfsvfs);
3999 ZFS_VERIFY_ZP(zp);
4000 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
4001 ZFS_EXIT(zfsvfs);
4002 #else
4003 *valp = 0;
4004 #endif
4005 return (0);
4006
4007 case _PC_ACL_NFS4:
4008 zp = VTOZ(vp);
4009 zfsvfs = zp->z_zfsvfs;
4010 ZFS_ENTER(zfsvfs);
4011 ZFS_VERIFY_ZP(zp);
4012 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
4013 ZFS_EXIT(zfsvfs);
4014 return (0);
4015
4016 case _PC_ACL_PATH_MAX:
4017 *valp = ACL_MAX_ENTRIES;
4018 return (0);
4019
4020 default:
4021 return (EOPNOTSUPP);
4022 }
4023 }
4024
4025 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)4026 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4027 int *rahead)
4028 {
4029 znode_t *zp = VTOZ(vp);
4030 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4031 objset_t *os = zp->z_zfsvfs->z_os;
4032 zfs_locked_range_t *lr;
4033 vm_object_t object;
4034 off_t start, end, obj_size;
4035 uint_t blksz;
4036 int pgsin_b, pgsin_a;
4037 int error;
4038
4039 ZFS_ENTER(zfsvfs);
4040 ZFS_VERIFY_ZP(zp);
4041
4042 start = IDX_TO_OFF(ma[0]->pindex);
4043 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4044
4045 /*
4046 * Lock a range covering all required and optional pages.
4047 * Note that we need to handle the case of the block size growing.
4048 */
4049 for (;;) {
4050 blksz = zp->z_blksz;
4051 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
4052 rounddown(start, blksz),
4053 roundup(end, blksz) - rounddown(start, blksz), RL_READER);
4054 if (lr == NULL) {
4055 if (rahead != NULL) {
4056 *rahead = 0;
4057 rahead = NULL;
4058 }
4059 if (rbehind != NULL) {
4060 *rbehind = 0;
4061 rbehind = NULL;
4062 }
4063 break;
4064 }
4065 if (blksz == zp->z_blksz)
4066 break;
4067 zfs_rangelock_exit(lr);
4068 }
4069
4070 object = ma[0]->object;
4071 zfs_vmobject_wlock(object);
4072 obj_size = object->un_pager.vnp.vnp_size;
4073 zfs_vmobject_wunlock(object);
4074 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4075 if (lr != NULL)
4076 zfs_rangelock_exit(lr);
4077 ZFS_EXIT(zfsvfs);
4078 return (zfs_vm_pagerret_bad);
4079 }
4080
4081 pgsin_b = 0;
4082 if (rbehind != NULL) {
4083 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4084 pgsin_b = MIN(*rbehind, pgsin_b);
4085 }
4086
4087 pgsin_a = 0;
4088 if (rahead != NULL) {
4089 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4090 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4091 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4092 pgsin_a = MIN(*rahead, pgsin_a);
4093 }
4094
4095 /*
4096 * NB: we need to pass the exact byte size of the data that we expect
4097 * to read after accounting for the file size. This is required because
4098 * ZFS will panic if we request DMU to read beyond the end of the last
4099 * allocated block.
4100 */
4101 error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a,
4102 MIN(end, obj_size) - (end - PAGE_SIZE));
4103
4104 if (lr != NULL)
4105 zfs_rangelock_exit(lr);
4106 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4107 ZFS_EXIT(zfsvfs);
4108
4109 if (error != 0)
4110 return (zfs_vm_pagerret_error);
4111
4112 VM_CNT_INC(v_vnodein);
4113 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4114 if (rbehind != NULL)
4115 *rbehind = pgsin_b;
4116 if (rahead != NULL)
4117 *rahead = pgsin_a;
4118 return (zfs_vm_pagerret_ok);
4119 }
4120
4121 #ifndef _SYS_SYSPROTO_H_
4122 struct vop_getpages_args {
4123 struct vnode *a_vp;
4124 vm_page_t *a_m;
4125 int a_count;
4126 int *a_rbehind;
4127 int *a_rahead;
4128 };
4129 #endif
4130
4131 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4132 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4133 {
4134
4135 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4136 ap->a_rahead));
4137 }
4138
4139 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4140 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4141 int *rtvals)
4142 {
4143 znode_t *zp = VTOZ(vp);
4144 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4145 zfs_locked_range_t *lr;
4146 dmu_tx_t *tx;
4147 struct sf_buf *sf;
4148 vm_object_t object;
4149 vm_page_t m;
4150 caddr_t va;
4151 size_t tocopy;
4152 size_t lo_len;
4153 vm_ooffset_t lo_off;
4154 vm_ooffset_t off;
4155 uint_t blksz;
4156 int ncount;
4157 int pcount;
4158 int err;
4159 int i;
4160
4161 ZFS_ENTER(zfsvfs);
4162 ZFS_VERIFY_ZP(zp);
4163
4164 object = vp->v_object;
4165 pcount = btoc(len);
4166 ncount = pcount;
4167
4168 KASSERT(ma[0]->object == object, ("mismatching object"));
4169 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4170
4171 for (i = 0; i < pcount; i++)
4172 rtvals[i] = zfs_vm_pagerret_error;
4173
4174 off = IDX_TO_OFF(ma[0]->pindex);
4175 blksz = zp->z_blksz;
4176 lo_off = rounddown(off, blksz);
4177 lo_len = roundup(len + (off - lo_off), blksz);
4178 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4179
4180 zfs_vmobject_wlock(object);
4181 if (len + off > object->un_pager.vnp.vnp_size) {
4182 if (object->un_pager.vnp.vnp_size > off) {
4183 int pgoff;
4184
4185 len = object->un_pager.vnp.vnp_size - off;
4186 ncount = btoc(len);
4187 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4188 /*
4189 * If the object is locked and the following
4190 * conditions hold, then the page's dirty
4191 * field cannot be concurrently changed by a
4192 * pmap operation.
4193 */
4194 m = ma[ncount - 1];
4195 vm_page_assert_sbusied(m);
4196 KASSERT(!pmap_page_is_write_mapped(m),
4197 ("zfs_putpages: page %p is not read-only",
4198 m));
4199 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4200 pgoff);
4201 }
4202 } else {
4203 len = 0;
4204 ncount = 0;
4205 }
4206 if (ncount < pcount) {
4207 for (i = ncount; i < pcount; i++) {
4208 rtvals[i] = zfs_vm_pagerret_bad;
4209 }
4210 }
4211 }
4212 zfs_vmobject_wunlock(object);
4213
4214 if (ncount == 0)
4215 goto out;
4216
4217 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4218 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4219 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4220 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4221 zp->z_projid))) {
4222 goto out;
4223 }
4224
4225 tx = dmu_tx_create(zfsvfs->z_os);
4226 dmu_tx_hold_write(tx, zp->z_id, off, len);
4227
4228 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4229 zfs_sa_upgrade_txholds(tx, zp);
4230 err = dmu_tx_assign(tx, TXG_WAIT);
4231 if (err != 0) {
4232 dmu_tx_abort(tx);
4233 goto out;
4234 }
4235
4236 if (zp->z_blksz < PAGE_SIZE) {
4237 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4238 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4239 va = zfs_map_page(ma[i], &sf);
4240 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4241 zfs_unmap_page(sf);
4242 }
4243 } else {
4244 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4245 }
4246
4247 if (err == 0) {
4248 uint64_t mtime[2], ctime[2];
4249 sa_bulk_attr_t bulk[3];
4250 int count = 0;
4251
4252 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4253 &mtime, 16);
4254 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4255 &ctime, 16);
4256 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4257 &zp->z_pflags, 8);
4258 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4259 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4260 ASSERT0(err);
4261 /*
4262 * XXX we should be passing a callback to undirty
4263 * but that would make the locking messier
4264 */
4265 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4266 len, 0, NULL, NULL);
4267
4268 zfs_vmobject_wlock(object);
4269 for (i = 0; i < ncount; i++) {
4270 rtvals[i] = zfs_vm_pagerret_ok;
4271 vm_page_undirty(ma[i]);
4272 }
4273 zfs_vmobject_wunlock(object);
4274 VM_CNT_INC(v_vnodeout);
4275 VM_CNT_ADD(v_vnodepgsout, ncount);
4276 }
4277 dmu_tx_commit(tx);
4278
4279 out:
4280 zfs_rangelock_exit(lr);
4281 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4282 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4283 zil_commit(zfsvfs->z_log, zp->z_id);
4284 ZFS_EXIT(zfsvfs);
4285 return (rtvals[0]);
4286 }
4287
4288 #ifndef _SYS_SYSPROTO_H_
4289 struct vop_putpages_args {
4290 struct vnode *a_vp;
4291 vm_page_t *a_m;
4292 int a_count;
4293 int a_sync;
4294 int *a_rtvals;
4295 };
4296 #endif
4297
4298 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4299 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4300 {
4301
4302 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4303 ap->a_rtvals));
4304 }
4305
4306 #ifndef _SYS_SYSPROTO_H_
4307 struct vop_bmap_args {
4308 struct vnode *a_vp;
4309 daddr_t a_bn;
4310 struct bufobj **a_bop;
4311 daddr_t *a_bnp;
4312 int *a_runp;
4313 int *a_runb;
4314 };
4315 #endif
4316
4317 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4318 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4319 {
4320
4321 if (ap->a_bop != NULL)
4322 *ap->a_bop = &ap->a_vp->v_bufobj;
4323 if (ap->a_bnp != NULL)
4324 *ap->a_bnp = ap->a_bn;
4325 if (ap->a_runp != NULL)
4326 *ap->a_runp = 0;
4327 if (ap->a_runb != NULL)
4328 *ap->a_runb = 0;
4329
4330 return (0);
4331 }
4332
4333 #ifndef _SYS_SYSPROTO_H_
4334 struct vop_open_args {
4335 struct vnode *a_vp;
4336 int a_mode;
4337 struct ucred *a_cred;
4338 struct thread *a_td;
4339 };
4340 #endif
4341
4342 static int
zfs_freebsd_open(struct vop_open_args * ap)4343 zfs_freebsd_open(struct vop_open_args *ap)
4344 {
4345 vnode_t *vp = ap->a_vp;
4346 znode_t *zp = VTOZ(vp);
4347 int error;
4348
4349 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4350 if (error == 0)
4351 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4352 return (error);
4353 }
4354
4355 #ifndef _SYS_SYSPROTO_H_
4356 struct vop_close_args {
4357 struct vnode *a_vp;
4358 int a_fflag;
4359 struct ucred *a_cred;
4360 struct thread *a_td;
4361 };
4362 #endif
4363
4364 static int
zfs_freebsd_close(struct vop_close_args * ap)4365 zfs_freebsd_close(struct vop_close_args *ap)
4366 {
4367
4368 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4369 }
4370
4371 #ifndef _SYS_SYSPROTO_H_
4372 struct vop_ioctl_args {
4373 struct vnode *a_vp;
4374 ulong_t a_command;
4375 caddr_t a_data;
4376 int a_fflag;
4377 struct ucred *cred;
4378 struct thread *td;
4379 };
4380 #endif
4381
4382 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4383 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4384 {
4385
4386 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4387 ap->a_fflag, ap->a_cred, NULL));
4388 }
4389
4390 static int
ioflags(int ioflags)4391 ioflags(int ioflags)
4392 {
4393 int flags = 0;
4394
4395 if (ioflags & IO_APPEND)
4396 flags |= FAPPEND;
4397 if (ioflags & IO_NDELAY)
4398 flags |= FNONBLOCK;
4399 if (ioflags & IO_SYNC)
4400 flags |= (FSYNC | FDSYNC | FRSYNC);
4401
4402 return (flags);
4403 }
4404
4405 #ifndef _SYS_SYSPROTO_H_
4406 struct vop_read_args {
4407 struct vnode *a_vp;
4408 struct uio *a_uio;
4409 int a_ioflag;
4410 struct ucred *a_cred;
4411 };
4412 #endif
4413
4414 static int
zfs_freebsd_read(struct vop_read_args * ap)4415 zfs_freebsd_read(struct vop_read_args *ap)
4416 {
4417
4418 return (zfs_read(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
4419 ap->a_cred));
4420 }
4421
4422 #ifndef _SYS_SYSPROTO_H_
4423 struct vop_write_args {
4424 struct vnode *a_vp;
4425 struct uio *a_uio;
4426 int a_ioflag;
4427 struct ucred *a_cred;
4428 };
4429 #endif
4430
4431 static int
zfs_freebsd_write(struct vop_write_args * ap)4432 zfs_freebsd_write(struct vop_write_args *ap)
4433 {
4434
4435 return (zfs_write(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
4436 ap->a_cred));
4437 }
4438
4439 #if __FreeBSD_version >= 1300102
4440 /*
4441 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4442 * the comment above cache_fplookup for details.
4443 */
4444 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4445 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4446 {
4447 vnode_t *vp;
4448 znode_t *zp;
4449 uint64_t pflags;
4450
4451 vp = v->a_vp;
4452 zp = VTOZ_SMR(vp);
4453 if (__predict_false(zp == NULL))
4454 return (EAGAIN);
4455 pflags = atomic_load_64(&zp->z_pflags);
4456 if (pflags & ZFS_AV_QUARANTINED)
4457 return (EAGAIN);
4458 if (pflags & ZFS_XATTR)
4459 return (EAGAIN);
4460 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4461 return (EAGAIN);
4462 return (0);
4463 }
4464 #endif
4465
4466 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4467 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4468 {
4469 vnode_t *vp;
4470 znode_t *zp;
4471 char *target;
4472
4473 vp = v->a_vp;
4474 zp = VTOZ_SMR(vp);
4475 if (__predict_false(zp == NULL)) {
4476 return (EAGAIN);
4477 }
4478
4479 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4480 if (target == NULL) {
4481 return (EAGAIN);
4482 }
4483 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4484 }
4485
4486 #ifndef _SYS_SYSPROTO_H_
4487 struct vop_access_args {
4488 struct vnode *a_vp;
4489 accmode_t a_accmode;
4490 struct ucred *a_cred;
4491 struct thread *a_td;
4492 };
4493 #endif
4494
4495 static int
zfs_freebsd_access(struct vop_access_args * ap)4496 zfs_freebsd_access(struct vop_access_args *ap)
4497 {
4498 vnode_t *vp = ap->a_vp;
4499 znode_t *zp = VTOZ(vp);
4500 accmode_t accmode;
4501 int error = 0;
4502
4503
4504 if (ap->a_accmode == VEXEC) {
4505 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4506 return (0);
4507 }
4508
4509 /*
4510 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4511 */
4512 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4513 if (accmode != 0)
4514 error = zfs_access(zp, accmode, 0, ap->a_cred);
4515
4516 /*
4517 * VADMIN has to be handled by vaccess().
4518 */
4519 if (error == 0) {
4520 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4521 if (accmode != 0) {
4522 #if __FreeBSD_version >= 1300105
4523 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4524 zp->z_gid, accmode, ap->a_cred);
4525 #else
4526 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4527 zp->z_gid, accmode, ap->a_cred, NULL);
4528 #endif
4529 }
4530 }
4531
4532 /*
4533 * For VEXEC, ensure that at least one execute bit is set for
4534 * non-directories.
4535 */
4536 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4537 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4538 error = EACCES;
4539 }
4540
4541 return (error);
4542 }
4543
4544 #ifndef _SYS_SYSPROTO_H_
4545 struct vop_lookup_args {
4546 struct vnode *a_dvp;
4547 struct vnode **a_vpp;
4548 struct componentname *a_cnp;
4549 };
4550 #endif
4551
4552 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4553 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4554 {
4555 struct componentname *cnp = ap->a_cnp;
4556 char nm[NAME_MAX + 1];
4557
4558 ASSERT(cnp->cn_namelen < sizeof (nm));
4559 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4560
4561 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4562 cnp->cn_cred, cnp->cn_thread, 0, cached));
4563 }
4564
4565 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4566 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4567 {
4568
4569 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4570 }
4571
4572 #ifndef _SYS_SYSPROTO_H_
4573 struct vop_lookup_args {
4574 struct vnode *a_dvp;
4575 struct vnode **a_vpp;
4576 struct componentname *a_cnp;
4577 };
4578 #endif
4579
4580 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4581 zfs_cache_lookup(struct vop_lookup_args *ap)
4582 {
4583 zfsvfs_t *zfsvfs;
4584
4585 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4586 if (zfsvfs->z_use_namecache)
4587 return (vfs_cache_lookup(ap));
4588 else
4589 return (zfs_freebsd_lookup(ap, B_FALSE));
4590 }
4591
4592 #ifndef _SYS_SYSPROTO_H_
4593 struct vop_create_args {
4594 struct vnode *a_dvp;
4595 struct vnode **a_vpp;
4596 struct componentname *a_cnp;
4597 struct vattr *a_vap;
4598 };
4599 #endif
4600
4601 static int
zfs_freebsd_create(struct vop_create_args * ap)4602 zfs_freebsd_create(struct vop_create_args *ap)
4603 {
4604 zfsvfs_t *zfsvfs;
4605 struct componentname *cnp = ap->a_cnp;
4606 vattr_t *vap = ap->a_vap;
4607 znode_t *zp = NULL;
4608 int rc, mode;
4609
4610 ASSERT(cnp->cn_flags & SAVENAME);
4611
4612 vattr_init_mask(vap);
4613 mode = vap->va_mode & ALLPERMS;
4614 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4615 *ap->a_vpp = NULL;
4616
4617 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode,
4618 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
4619 if (rc == 0)
4620 *ap->a_vpp = ZTOV(zp);
4621 if (zfsvfs->z_use_namecache &&
4622 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4623 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4624
4625 return (rc);
4626 }
4627
4628 #ifndef _SYS_SYSPROTO_H_
4629 struct vop_remove_args {
4630 struct vnode *a_dvp;
4631 struct vnode *a_vp;
4632 struct componentname *a_cnp;
4633 };
4634 #endif
4635
4636 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4637 zfs_freebsd_remove(struct vop_remove_args *ap)
4638 {
4639
4640 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4641
4642 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4643 ap->a_cnp->cn_cred));
4644 }
4645
4646 #ifndef _SYS_SYSPROTO_H_
4647 struct vop_mkdir_args {
4648 struct vnode *a_dvp;
4649 struct vnode **a_vpp;
4650 struct componentname *a_cnp;
4651 struct vattr *a_vap;
4652 };
4653 #endif
4654
4655 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4656 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4657 {
4658 vattr_t *vap = ap->a_vap;
4659 znode_t *zp = NULL;
4660 int rc;
4661
4662 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4663
4664 vattr_init_mask(vap);
4665 *ap->a_vpp = NULL;
4666
4667 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4668 ap->a_cnp->cn_cred, 0, NULL);
4669
4670 if (rc == 0)
4671 *ap->a_vpp = ZTOV(zp);
4672 return (rc);
4673 }
4674
4675 #ifndef _SYS_SYSPROTO_H_
4676 struct vop_rmdir_args {
4677 struct vnode *a_dvp;
4678 struct vnode *a_vp;
4679 struct componentname *a_cnp;
4680 };
4681 #endif
4682
4683 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4684 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4685 {
4686 struct componentname *cnp = ap->a_cnp;
4687
4688 ASSERT(cnp->cn_flags & SAVENAME);
4689
4690 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4691 }
4692
4693 #ifndef _SYS_SYSPROTO_H_
4694 struct vop_readdir_args {
4695 struct vnode *a_vp;
4696 struct uio *a_uio;
4697 struct ucred *a_cred;
4698 int *a_eofflag;
4699 int *a_ncookies;
4700 ulong_t **a_cookies;
4701 };
4702 #endif
4703
4704 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4705 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4706 {
4707
4708 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
4709 ap->a_ncookies, ap->a_cookies));
4710 }
4711
4712 #ifndef _SYS_SYSPROTO_H_
4713 struct vop_fsync_args {
4714 struct vnode *a_vp;
4715 int a_waitfor;
4716 struct thread *a_td;
4717 };
4718 #endif
4719
4720 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4721 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4722 {
4723
4724 vop_stdfsync(ap);
4725 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4726 }
4727
4728 #ifndef _SYS_SYSPROTO_H_
4729 struct vop_getattr_args {
4730 struct vnode *a_vp;
4731 struct vattr *a_vap;
4732 struct ucred *a_cred;
4733 };
4734 #endif
4735
4736 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4737 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4738 {
4739 vattr_t *vap = ap->a_vap;
4740 xvattr_t xvap;
4741 ulong_t fflags = 0;
4742 int error;
4743
4744 xva_init(&xvap);
4745 xvap.xva_vattr = *vap;
4746 xvap.xva_vattr.va_mask |= AT_XVATTR;
4747
4748 /* Convert chflags into ZFS-type flags. */
4749 /* XXX: what about SF_SETTABLE?. */
4750 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4751 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4752 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4753 XVA_SET_REQ(&xvap, XAT_NODUMP);
4754 XVA_SET_REQ(&xvap, XAT_READONLY);
4755 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4756 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4757 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4758 XVA_SET_REQ(&xvap, XAT_REPARSE);
4759 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4760 XVA_SET_REQ(&xvap, XAT_SPARSE);
4761
4762 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4763 if (error != 0)
4764 return (error);
4765
4766 /* Convert ZFS xattr into chflags. */
4767 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4768 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4769 fflags |= (fflag); \
4770 } while (0)
4771 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4772 xvap.xva_xoptattrs.xoa_immutable);
4773 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4774 xvap.xva_xoptattrs.xoa_appendonly);
4775 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4776 xvap.xva_xoptattrs.xoa_nounlink);
4777 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4778 xvap.xva_xoptattrs.xoa_archive);
4779 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4780 xvap.xva_xoptattrs.xoa_nodump);
4781 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4782 xvap.xva_xoptattrs.xoa_readonly);
4783 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4784 xvap.xva_xoptattrs.xoa_system);
4785 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4786 xvap.xva_xoptattrs.xoa_hidden);
4787 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4788 xvap.xva_xoptattrs.xoa_reparse);
4789 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4790 xvap.xva_xoptattrs.xoa_offline);
4791 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4792 xvap.xva_xoptattrs.xoa_sparse);
4793
4794 #undef FLAG_CHECK
4795 *vap = xvap.xva_vattr;
4796 vap->va_flags = fflags;
4797 return (0);
4798 }
4799
4800 #ifndef _SYS_SYSPROTO_H_
4801 struct vop_setattr_args {
4802 struct vnode *a_vp;
4803 struct vattr *a_vap;
4804 struct ucred *a_cred;
4805 };
4806 #endif
4807
4808 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)4809 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4810 {
4811 vnode_t *vp = ap->a_vp;
4812 vattr_t *vap = ap->a_vap;
4813 cred_t *cred = ap->a_cred;
4814 xvattr_t xvap;
4815 ulong_t fflags;
4816 uint64_t zflags;
4817
4818 vattr_init_mask(vap);
4819 vap->va_mask &= ~AT_NOSET;
4820
4821 xva_init(&xvap);
4822 xvap.xva_vattr = *vap;
4823
4824 zflags = VTOZ(vp)->z_pflags;
4825
4826 if (vap->va_flags != VNOVAL) {
4827 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4828 int error;
4829
4830 if (zfsvfs->z_use_fuids == B_FALSE)
4831 return (EOPNOTSUPP);
4832
4833 fflags = vap->va_flags;
4834 /*
4835 * XXX KDM
4836 * We need to figure out whether it makes sense to allow
4837 * UF_REPARSE through, since we don't really have other
4838 * facilities to handle reparse points and zfs_setattr()
4839 * doesn't currently allow setting that attribute anyway.
4840 */
4841 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4842 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4843 UF_OFFLINE|UF_SPARSE)) != 0)
4844 return (EOPNOTSUPP);
4845 /*
4846 * Unprivileged processes are not permitted to unset system
4847 * flags, or modify flags if any system flags are set.
4848 * Privileged non-jail processes may not modify system flags
4849 * if securelevel > 0 and any existing system flags are set.
4850 * Privileged jail processes behave like privileged non-jail
4851 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4852 * otherwise, they behave like unprivileged processes.
4853 */
4854 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4855 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4856 if (zflags &
4857 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4858 error = securelevel_gt(cred, 0);
4859 if (error != 0)
4860 return (error);
4861 }
4862 } else {
4863 /*
4864 * Callers may only modify the file flags on
4865 * objects they have VADMIN rights for.
4866 */
4867 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4868 curthread)) != 0)
4869 return (error);
4870 if (zflags &
4871 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4872 ZFS_NOUNLINK)) {
4873 return (EPERM);
4874 }
4875 if (fflags &
4876 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4877 return (EPERM);
4878 }
4879 }
4880
4881 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4882 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4883 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4884 XVA_SET_REQ(&xvap, (xflag)); \
4885 (xfield) = ((fflags & (fflag)) != 0); \
4886 } \
4887 } while (0)
4888 /* Convert chflags into ZFS-type flags. */
4889 /* XXX: what about SF_SETTABLE?. */
4890 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4891 xvap.xva_xoptattrs.xoa_immutable);
4892 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4893 xvap.xva_xoptattrs.xoa_appendonly);
4894 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4895 xvap.xva_xoptattrs.xoa_nounlink);
4896 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4897 xvap.xva_xoptattrs.xoa_archive);
4898 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4899 xvap.xva_xoptattrs.xoa_nodump);
4900 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4901 xvap.xva_xoptattrs.xoa_readonly);
4902 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4903 xvap.xva_xoptattrs.xoa_system);
4904 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4905 xvap.xva_xoptattrs.xoa_hidden);
4906 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4907 xvap.xva_xoptattrs.xoa_reparse);
4908 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4909 xvap.xva_xoptattrs.xoa_offline);
4910 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4911 xvap.xva_xoptattrs.xoa_sparse);
4912 #undef FLAG_CHANGE
4913 }
4914 if (vap->va_birthtime.tv_sec != VNOVAL) {
4915 xvap.xva_vattr.va_mask |= AT_XVATTR;
4916 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4917 }
4918 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
4919 }
4920
4921 #ifndef _SYS_SYSPROTO_H_
4922 struct vop_rename_args {
4923 struct vnode *a_fdvp;
4924 struct vnode *a_fvp;
4925 struct componentname *a_fcnp;
4926 struct vnode *a_tdvp;
4927 struct vnode *a_tvp;
4928 struct componentname *a_tcnp;
4929 };
4930 #endif
4931
4932 static int
zfs_freebsd_rename(struct vop_rename_args * ap)4933 zfs_freebsd_rename(struct vop_rename_args *ap)
4934 {
4935 vnode_t *fdvp = ap->a_fdvp;
4936 vnode_t *fvp = ap->a_fvp;
4937 vnode_t *tdvp = ap->a_tdvp;
4938 vnode_t *tvp = ap->a_tvp;
4939 int error;
4940
4941 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4942 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4943
4944 error = zfs_rename_(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4945 ap->a_tcnp, ap->a_fcnp->cn_cred, 1);
4946
4947 vrele(fdvp);
4948 vrele(fvp);
4949 vrele(tdvp);
4950 if (tvp != NULL)
4951 vrele(tvp);
4952
4953 return (error);
4954 }
4955
4956 #ifndef _SYS_SYSPROTO_H_
4957 struct vop_symlink_args {
4958 struct vnode *a_dvp;
4959 struct vnode **a_vpp;
4960 struct componentname *a_cnp;
4961 struct vattr *a_vap;
4962 char *a_target;
4963 };
4964 #endif
4965
4966 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)4967 zfs_freebsd_symlink(struct vop_symlink_args *ap)
4968 {
4969 struct componentname *cnp = ap->a_cnp;
4970 vattr_t *vap = ap->a_vap;
4971 znode_t *zp = NULL;
4972 char *symlink;
4973 size_t symlink_len;
4974 int rc;
4975
4976 ASSERT(cnp->cn_flags & SAVENAME);
4977
4978 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
4979 vattr_init_mask(vap);
4980 *ap->a_vpp = NULL;
4981
4982 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
4983 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
4984 if (rc == 0) {
4985 *ap->a_vpp = ZTOV(zp);
4986 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
4987 MPASS(zp->z_cached_symlink == NULL);
4988 symlink_len = strlen(ap->a_target);
4989 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4990 if (symlink != NULL) {
4991 memcpy(symlink, ap->a_target, symlink_len);
4992 symlink[symlink_len] = '\0';
4993 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4994 (uintptr_t)symlink);
4995 }
4996 }
4997 return (rc);
4998 }
4999
5000 #ifndef _SYS_SYSPROTO_H_
5001 struct vop_readlink_args {
5002 struct vnode *a_vp;
5003 struct uio *a_uio;
5004 struct ucred *a_cred;
5005 };
5006 #endif
5007
5008 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)5009 zfs_freebsd_readlink(struct vop_readlink_args *ap)
5010 {
5011 znode_t *zp = VTOZ(ap->a_vp);
5012 struct uio *auio;
5013 char *symlink, *base;
5014 size_t symlink_len;
5015 int error;
5016 bool trycache;
5017
5018 auio = ap->a_uio;
5019 trycache = false;
5020 if (auio->uio_segflg == UIO_SYSSPACE && auio->uio_iovcnt == 1) {
5021 base = auio->uio_iov->iov_base;
5022 symlink_len = auio->uio_iov->iov_len;
5023 trycache = true;
5024 }
5025 error = zfs_readlink(ap->a_vp, auio, ap->a_cred, NULL);
5026 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5027 error != 0 || !trycache) {
5028 return (error);
5029 }
5030 symlink_len -= auio->uio_resid;
5031 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5032 if (symlink != NULL) {
5033 memcpy(symlink, base, symlink_len);
5034 symlink[symlink_len] = '\0';
5035 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5036 (uintptr_t)NULL, (uintptr_t)symlink)) {
5037 cache_symlink_free(symlink, symlink_len + 1);
5038 }
5039 }
5040 return (error);
5041 }
5042
5043 #ifndef _SYS_SYSPROTO_H_
5044 struct vop_link_args {
5045 struct vnode *a_tdvp;
5046 struct vnode *a_vp;
5047 struct componentname *a_cnp;
5048 };
5049 #endif
5050
5051 static int
zfs_freebsd_link(struct vop_link_args * ap)5052 zfs_freebsd_link(struct vop_link_args *ap)
5053 {
5054 struct componentname *cnp = ap->a_cnp;
5055 vnode_t *vp = ap->a_vp;
5056 vnode_t *tdvp = ap->a_tdvp;
5057
5058 if (tdvp->v_mount != vp->v_mount)
5059 return (EXDEV);
5060
5061 ASSERT(cnp->cn_flags & SAVENAME);
5062
5063 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5064 cnp->cn_nameptr, cnp->cn_cred, 0));
5065 }
5066
5067 #ifndef _SYS_SYSPROTO_H_
5068 struct vop_inactive_args {
5069 struct vnode *a_vp;
5070 struct thread *a_td;
5071 };
5072 #endif
5073
5074 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5075 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5076 {
5077 vnode_t *vp = ap->a_vp;
5078
5079 #if __FreeBSD_version >= 1300123
5080 zfs_inactive(vp, curthread->td_ucred, NULL);
5081 #else
5082 zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5083 #endif
5084 return (0);
5085 }
5086
5087 #if __FreeBSD_version >= 1300042
5088 #ifndef _SYS_SYSPROTO_H_
5089 struct vop_need_inactive_args {
5090 struct vnode *a_vp;
5091 };
5092 #endif
5093
5094 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5095 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5096 {
5097 vnode_t *vp = ap->a_vp;
5098 znode_t *zp = VTOZ(vp);
5099 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5100 int need;
5101
5102 if (vn_need_pageq_flush(vp))
5103 return (1);
5104
5105 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5106 return (1);
5107 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5108 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5109
5110 return (need);
5111 }
5112 #endif
5113
5114 #ifndef _SYS_SYSPROTO_H_
5115 struct vop_reclaim_args {
5116 struct vnode *a_vp;
5117 struct thread *a_td;
5118 };
5119 #endif
5120
5121 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5122 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5123 {
5124 vnode_t *vp = ap->a_vp;
5125 znode_t *zp = VTOZ(vp);
5126 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5127
5128 ASSERT(zp != NULL);
5129
5130 #if __FreeBSD_version < 1300042
5131 /* Destroy the vm object and flush associated pages. */
5132 vnode_destroy_vobject(vp);
5133 #endif
5134 /*
5135 * z_teardown_inactive_lock protects from a race with
5136 * zfs_znode_dmu_fini in zfsvfs_teardown during
5137 * force unmount.
5138 */
5139 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5140 if (zp->z_sa_hdl == NULL)
5141 zfs_znode_free(zp);
5142 else
5143 zfs_zinactive(zp);
5144 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5145
5146 vp->v_data = NULL;
5147 return (0);
5148 }
5149
5150 #ifndef _SYS_SYSPROTO_H_
5151 struct vop_fid_args {
5152 struct vnode *a_vp;
5153 struct fid *a_fid;
5154 };
5155 #endif
5156
5157 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5158 zfs_freebsd_fid(struct vop_fid_args *ap)
5159 {
5160
5161 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5162 }
5163
5164
5165 #ifndef _SYS_SYSPROTO_H_
5166 struct vop_pathconf_args {
5167 struct vnode *a_vp;
5168 int a_name;
5169 register_t *a_retval;
5170 } *ap;
5171 #endif
5172
5173 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5174 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5175 {
5176 ulong_t val;
5177 int error;
5178
5179 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5180 curthread->td_ucred, NULL);
5181 if (error == 0) {
5182 *ap->a_retval = val;
5183 return (error);
5184 }
5185 if (error != EOPNOTSUPP)
5186 return (error);
5187
5188 switch (ap->a_name) {
5189 case _PC_NAME_MAX:
5190 *ap->a_retval = NAME_MAX;
5191 return (0);
5192 case _PC_PIPE_BUF:
5193 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5194 *ap->a_retval = PIPE_BUF;
5195 return (0);
5196 }
5197 return (EINVAL);
5198 default:
5199 return (vop_stdpathconf(ap));
5200 }
5201 }
5202
5203 /*
5204 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5205 * extended attribute name:
5206 *
5207 * NAMESPACE PREFIX
5208 * system freebsd:system:
5209 * user (none, can be used to access ZFS fsattr(5) attributes
5210 * created on Solaris)
5211 */
5212 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size)5213 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5214 size_t size)
5215 {
5216 const char *namespace, *prefix, *suffix;
5217
5218 /* We don't allow '/' character in attribute name. */
5219 if (strchr(name, '/') != NULL)
5220 return (EINVAL);
5221 /* We don't allow attribute names that start with "freebsd:" string. */
5222 if (strncmp(name, "freebsd:", 8) == 0)
5223 return (EINVAL);
5224
5225 bzero(attrname, size);
5226
5227 switch (attrnamespace) {
5228 case EXTATTR_NAMESPACE_USER:
5229 #if 0
5230 prefix = "freebsd:";
5231 namespace = EXTATTR_NAMESPACE_USER_STRING;
5232 suffix = ":";
5233 #else
5234 /*
5235 * This is the default namespace by which we can access all
5236 * attributes created on Solaris.
5237 */
5238 prefix = namespace = suffix = "";
5239 #endif
5240 break;
5241 case EXTATTR_NAMESPACE_SYSTEM:
5242 prefix = "freebsd:";
5243 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5244 suffix = ":";
5245 break;
5246 case EXTATTR_NAMESPACE_EMPTY:
5247 default:
5248 return (EINVAL);
5249 }
5250 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5251 name) >= size) {
5252 return (ENAMETOOLONG);
5253 }
5254 return (0);
5255 }
5256
5257 #ifndef _SYS_SYSPROTO_H_
5258 struct vop_getextattr {
5259 IN struct vnode *a_vp;
5260 IN int a_attrnamespace;
5261 IN const char *a_name;
5262 INOUT struct uio *a_uio;
5263 OUT size_t *a_size;
5264 IN struct ucred *a_cred;
5265 IN struct thread *a_td;
5266 };
5267 #endif
5268
5269 /*
5270 * Vnode operating to retrieve a named extended attribute.
5271 */
5272 static int
zfs_getextattr(struct vop_getextattr_args * ap)5273 zfs_getextattr(struct vop_getextattr_args *ap)
5274 {
5275 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5276 struct thread *td = ap->a_td;
5277 struct nameidata nd;
5278 char attrname[255];
5279 struct vattr va;
5280 vnode_t *xvp = NULL, *vp;
5281 int error, flags;
5282
5283 /*
5284 * If the xattr property is off, refuse the request.
5285 */
5286 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
5287 return (SET_ERROR(EOPNOTSUPP));
5288 }
5289
5290 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5291 ap->a_cred, ap->a_td, VREAD);
5292 if (error != 0)
5293 return (error);
5294
5295 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5296 sizeof (attrname));
5297 if (error != 0)
5298 return (error);
5299
5300 ZFS_ENTER(zfsvfs);
5301
5302 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5303 LOOKUP_XATTR, B_FALSE);
5304 if (error != 0) {
5305 ZFS_EXIT(zfsvfs);
5306 return (error);
5307 }
5308
5309 flags = FREAD;
5310 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5311 xvp, td);
5312 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5313 vp = nd.ni_vp;
5314 NDFREE(&nd, NDF_ONLY_PNBUF);
5315 if (error != 0) {
5316 ZFS_EXIT(zfsvfs);
5317 if (error == ENOENT)
5318 error = ENOATTR;
5319 return (error);
5320 }
5321
5322 if (ap->a_size != NULL) {
5323 error = VOP_GETATTR(vp, &va, ap->a_cred);
5324 if (error == 0)
5325 *ap->a_size = (size_t)va.va_size;
5326 } else if (ap->a_uio != NULL)
5327 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5328
5329 VOP_UNLOCK1(vp);
5330 vn_close(vp, flags, ap->a_cred, td);
5331 ZFS_EXIT(zfsvfs);
5332 return (error);
5333 }
5334
5335 #ifndef _SYS_SYSPROTO_H_
5336 struct vop_deleteextattr {
5337 IN struct vnode *a_vp;
5338 IN int a_attrnamespace;
5339 IN const char *a_name;
5340 IN struct ucred *a_cred;
5341 IN struct thread *a_td;
5342 };
5343 #endif
5344
5345 /*
5346 * Vnode operation to remove a named attribute.
5347 */
5348 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5349 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5350 {
5351 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5352 struct thread *td = ap->a_td;
5353 struct nameidata nd;
5354 char attrname[255];
5355 vnode_t *xvp = NULL, *vp;
5356 int error;
5357
5358 /*
5359 * If the xattr property is off, refuse the request.
5360 */
5361 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
5362 return (SET_ERROR(EOPNOTSUPP));
5363 }
5364
5365 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5366 ap->a_cred, ap->a_td, VWRITE);
5367 if (error != 0)
5368 return (error);
5369
5370 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5371 sizeof (attrname));
5372 if (error != 0)
5373 return (error);
5374
5375 ZFS_ENTER(zfsvfs);
5376
5377 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5378 LOOKUP_XATTR, B_FALSE);
5379 if (error != 0) {
5380 ZFS_EXIT(zfsvfs);
5381 return (error);
5382 }
5383
5384 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5385 UIO_SYSSPACE, attrname, xvp, td);
5386 error = namei(&nd);
5387 vp = nd.ni_vp;
5388 if (error != 0) {
5389 ZFS_EXIT(zfsvfs);
5390 NDFREE(&nd, NDF_ONLY_PNBUF);
5391 if (error == ENOENT)
5392 error = ENOATTR;
5393 return (error);
5394 }
5395
5396 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5397 NDFREE(&nd, NDF_ONLY_PNBUF);
5398
5399 vput(nd.ni_dvp);
5400 if (vp == nd.ni_dvp)
5401 vrele(vp);
5402 else
5403 vput(vp);
5404 ZFS_EXIT(zfsvfs);
5405
5406 return (error);
5407 }
5408
5409 #ifndef _SYS_SYSPROTO_H_
5410 struct vop_setextattr {
5411 IN struct vnode *a_vp;
5412 IN int a_attrnamespace;
5413 IN const char *a_name;
5414 INOUT struct uio *a_uio;
5415 IN struct ucred *a_cred;
5416 IN struct thread *a_td;
5417 };
5418 #endif
5419
5420 /*
5421 * Vnode operation to set a named attribute.
5422 */
5423 static int
zfs_setextattr(struct vop_setextattr_args * ap)5424 zfs_setextattr(struct vop_setextattr_args *ap)
5425 {
5426 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5427 struct thread *td = ap->a_td;
5428 struct nameidata nd;
5429 char attrname[255];
5430 struct vattr va;
5431 vnode_t *xvp = NULL, *vp;
5432 int error, flags;
5433
5434 /*
5435 * If the xattr property is off, refuse the request.
5436 */
5437 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
5438 return (SET_ERROR(EOPNOTSUPP));
5439 }
5440
5441 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5442 ap->a_cred, ap->a_td, VWRITE);
5443 if (error != 0)
5444 return (error);
5445 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5446 sizeof (attrname));
5447 if (error != 0)
5448 return (error);
5449
5450 ZFS_ENTER(zfsvfs);
5451
5452 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5453 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5454 if (error != 0) {
5455 ZFS_EXIT(zfsvfs);
5456 return (error);
5457 }
5458
5459 flags = FFLAGS(O_WRONLY | O_CREAT);
5460 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5461 xvp, td);
5462 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5463 NULL);
5464 vp = nd.ni_vp;
5465 NDFREE(&nd, NDF_ONLY_PNBUF);
5466 if (error != 0) {
5467 ZFS_EXIT(zfsvfs);
5468 return (error);
5469 }
5470
5471 VATTR_NULL(&va);
5472 va.va_size = 0;
5473 error = VOP_SETATTR(vp, &va, ap->a_cred);
5474 if (error == 0)
5475 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5476
5477 VOP_UNLOCK1(vp);
5478 vn_close(vp, flags, ap->a_cred, td);
5479 ZFS_EXIT(zfsvfs);
5480 return (error);
5481 }
5482
5483 #ifndef _SYS_SYSPROTO_H_
5484 struct vop_listextattr {
5485 IN struct vnode *a_vp;
5486 IN int a_attrnamespace;
5487 INOUT struct uio *a_uio;
5488 OUT size_t *a_size;
5489 IN struct ucred *a_cred;
5490 IN struct thread *a_td;
5491 };
5492 #endif
5493
5494 /*
5495 * Vnode operation to retrieve extended attributes on a vnode.
5496 */
5497 static int
zfs_listextattr(struct vop_listextattr_args * ap)5498 zfs_listextattr(struct vop_listextattr_args *ap)
5499 {
5500 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5501 struct thread *td = ap->a_td;
5502 struct nameidata nd;
5503 char attrprefix[16];
5504 uint8_t dirbuf[sizeof (struct dirent)];
5505 struct dirent *dp;
5506 struct iovec aiov;
5507 struct uio auio, *uio = ap->a_uio;
5508 size_t *sizep = ap->a_size;
5509 size_t plen;
5510 vnode_t *xvp = NULL, *vp;
5511 int done, error, eof, pos;
5512
5513 /*
5514 * If the xattr property is off, refuse the request.
5515 */
5516 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
5517 return (SET_ERROR(EOPNOTSUPP));
5518 }
5519
5520 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5521 ap->a_cred, ap->a_td, VREAD);
5522 if (error != 0)
5523 return (error);
5524
5525 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5526 sizeof (attrprefix));
5527 if (error != 0)
5528 return (error);
5529 plen = strlen(attrprefix);
5530
5531 ZFS_ENTER(zfsvfs);
5532
5533 if (sizep != NULL)
5534 *sizep = 0;
5535
5536 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5537 LOOKUP_XATTR, B_FALSE);
5538 if (error != 0) {
5539 ZFS_EXIT(zfsvfs);
5540 /*
5541 * ENOATTR means that the EA directory does not yet exist,
5542 * i.e. there are no extended attributes there.
5543 */
5544 if (error == ENOATTR)
5545 error = 0;
5546 return (error);
5547 }
5548
5549 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5550 UIO_SYSSPACE, ".", xvp, td);
5551 error = namei(&nd);
5552 vp = nd.ni_vp;
5553 NDFREE(&nd, NDF_ONLY_PNBUF);
5554 if (error != 0) {
5555 ZFS_EXIT(zfsvfs);
5556 return (error);
5557 }
5558
5559 auio.uio_iov = &aiov;
5560 auio.uio_iovcnt = 1;
5561 auio.uio_segflg = UIO_SYSSPACE;
5562 auio.uio_td = td;
5563 auio.uio_rw = UIO_READ;
5564 auio.uio_offset = 0;
5565
5566 do {
5567 uint8_t nlen;
5568
5569 aiov.iov_base = (void *)dirbuf;
5570 aiov.iov_len = sizeof (dirbuf);
5571 auio.uio_resid = sizeof (dirbuf);
5572 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5573 done = sizeof (dirbuf) - auio.uio_resid;
5574 if (error != 0)
5575 break;
5576 for (pos = 0; pos < done; ) {
5577 dp = (struct dirent *)(dirbuf + pos);
5578 pos += dp->d_reclen;
5579 /*
5580 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5581 * is what we get when attribute was created on Solaris.
5582 */
5583 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5584 continue;
5585 if (plen == 0 &&
5586 strncmp(dp->d_name, "freebsd:", 8) == 0)
5587 continue;
5588 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5589 continue;
5590 nlen = dp->d_namlen - plen;
5591 if (sizep != NULL)
5592 *sizep += 1 + nlen;
5593 else if (uio != NULL) {
5594 /*
5595 * Format of extattr name entry is one byte for
5596 * length and the rest for name.
5597 */
5598 error = uiomove(&nlen, 1, uio->uio_rw, uio);
5599 if (error == 0) {
5600 error = uiomove(dp->d_name + plen, nlen,
5601 uio->uio_rw, uio);
5602 }
5603 if (error != 0)
5604 break;
5605 }
5606 }
5607 } while (!eof && error == 0);
5608
5609 vput(vp);
5610 ZFS_EXIT(zfsvfs);
5611
5612 return (error);
5613 }
5614
5615 #ifndef _SYS_SYSPROTO_H_
5616 struct vop_getacl_args {
5617 struct vnode *vp;
5618 acl_type_t type;
5619 struct acl *aclp;
5620 struct ucred *cred;
5621 struct thread *td;
5622 };
5623 #endif
5624
5625 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)5626 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5627 {
5628 int error;
5629 vsecattr_t vsecattr;
5630
5631 if (ap->a_type != ACL_TYPE_NFS4)
5632 return (EINVAL);
5633
5634 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5635 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5636 &vsecattr, 0, ap->a_cred)))
5637 return (error);
5638
5639 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5640 vsecattr.vsa_aclcnt);
5641 if (vsecattr.vsa_aclentp != NULL)
5642 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5643
5644 return (error);
5645 }
5646
5647 #ifndef _SYS_SYSPROTO_H_
5648 struct vop_setacl_args {
5649 struct vnode *vp;
5650 acl_type_t type;
5651 struct acl *aclp;
5652 struct ucred *cred;
5653 struct thread *td;
5654 };
5655 #endif
5656
5657 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)5658 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5659 {
5660 int error;
5661 vsecattr_t vsecattr;
5662 int aclbsize; /* size of acl list in bytes */
5663 aclent_t *aaclp;
5664
5665 if (ap->a_type != ACL_TYPE_NFS4)
5666 return (EINVAL);
5667
5668 if (ap->a_aclp == NULL)
5669 return (EINVAL);
5670
5671 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5672 return (EINVAL);
5673
5674 /*
5675 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5676 * splitting every entry into two and appending "canonical six"
5677 * entries at the end. Don't allow for setting an ACL that would
5678 * cause chmod(2) to run out of ACL entries.
5679 */
5680 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5681 return (ENOSPC);
5682
5683 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5684 if (error != 0)
5685 return (error);
5686
5687 vsecattr.vsa_mask = VSA_ACE;
5688 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
5689 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5690 aaclp = vsecattr.vsa_aclentp;
5691 vsecattr.vsa_aclentsz = aclbsize;
5692
5693 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5694 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
5695 kmem_free(aaclp, aclbsize);
5696
5697 return (error);
5698 }
5699
5700 #ifndef _SYS_SYSPROTO_H_
5701 struct vop_aclcheck_args {
5702 struct vnode *vp;
5703 acl_type_t type;
5704 struct acl *aclp;
5705 struct ucred *cred;
5706 struct thread *td;
5707 };
5708 #endif
5709
5710 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)5711 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
5712 {
5713
5714 return (EOPNOTSUPP);
5715 }
5716
5717 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)5718 zfs_vptocnp(struct vop_vptocnp_args *ap)
5719 {
5720 vnode_t *covered_vp;
5721 vnode_t *vp = ap->a_vp;
5722 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
5723 znode_t *zp = VTOZ(vp);
5724 int ltype;
5725 int error;
5726
5727 ZFS_ENTER(zfsvfs);
5728 ZFS_VERIFY_ZP(zp);
5729
5730 /*
5731 * If we are a snapshot mounted under .zfs, run the operation
5732 * on the covered vnode.
5733 */
5734 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
5735 char name[MAXNAMLEN + 1];
5736 znode_t *dzp;
5737 size_t len;
5738
5739 error = zfs_znode_parent_and_name(zp, &dzp, name);
5740 if (error == 0) {
5741 len = strlen(name);
5742 if (*ap->a_buflen < len)
5743 error = SET_ERROR(ENOMEM);
5744 }
5745 if (error == 0) {
5746 *ap->a_buflen -= len;
5747 bcopy(name, ap->a_buf + *ap->a_buflen, len);
5748 *ap->a_vpp = ZTOV(dzp);
5749 }
5750 ZFS_EXIT(zfsvfs);
5751 return (error);
5752 }
5753 ZFS_EXIT(zfsvfs);
5754
5755 covered_vp = vp->v_mount->mnt_vnodecovered;
5756 #if __FreeBSD_version >= 1300045
5757 enum vgetstate vs = vget_prep(covered_vp);
5758 #else
5759 vhold(covered_vp);
5760 #endif
5761 ltype = VOP_ISLOCKED(vp);
5762 VOP_UNLOCK1(vp);
5763 #if __FreeBSD_version >= 1300045
5764 error = vget_finish(covered_vp, LK_SHARED, vs);
5765 #else
5766 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
5767 #endif
5768 if (error == 0) {
5769 #if __FreeBSD_version >= 1300123
5770 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
5771 ap->a_buflen);
5772 #else
5773 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
5774 ap->a_buf, ap->a_buflen);
5775 #endif
5776 vput(covered_vp);
5777 }
5778 vn_lock(vp, ltype | LK_RETRY);
5779 if (VN_IS_DOOMED(vp))
5780 error = SET_ERROR(ENOENT);
5781 return (error);
5782 }
5783
5784 struct vop_vector zfs_vnodeops;
5785 struct vop_vector zfs_fifoops;
5786 struct vop_vector zfs_shareops;
5787
5788 struct vop_vector zfs_vnodeops = {
5789 .vop_default = &default_vnodeops,
5790 .vop_inactive = zfs_freebsd_inactive,
5791 #if __FreeBSD_version >= 1300042
5792 .vop_need_inactive = zfs_freebsd_need_inactive,
5793 #endif
5794 .vop_reclaim = zfs_freebsd_reclaim,
5795 #if __FreeBSD_version >= 1300102
5796 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
5797 #endif
5798 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
5799 .vop_access = zfs_freebsd_access,
5800 .vop_allocate = VOP_EINVAL,
5801 .vop_lookup = zfs_cache_lookup,
5802 .vop_cachedlookup = zfs_freebsd_cachedlookup,
5803 .vop_getattr = zfs_freebsd_getattr,
5804 .vop_setattr = zfs_freebsd_setattr,
5805 .vop_create = zfs_freebsd_create,
5806 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
5807 .vop_mkdir = zfs_freebsd_mkdir,
5808 .vop_readdir = zfs_freebsd_readdir,
5809 .vop_fsync = zfs_freebsd_fsync,
5810 .vop_open = zfs_freebsd_open,
5811 .vop_close = zfs_freebsd_close,
5812 .vop_rmdir = zfs_freebsd_rmdir,
5813 .vop_ioctl = zfs_freebsd_ioctl,
5814 .vop_link = zfs_freebsd_link,
5815 .vop_symlink = zfs_freebsd_symlink,
5816 .vop_readlink = zfs_freebsd_readlink,
5817 .vop_read = zfs_freebsd_read,
5818 .vop_write = zfs_freebsd_write,
5819 .vop_remove = zfs_freebsd_remove,
5820 .vop_rename = zfs_freebsd_rename,
5821 .vop_pathconf = zfs_freebsd_pathconf,
5822 .vop_bmap = zfs_freebsd_bmap,
5823 .vop_fid = zfs_freebsd_fid,
5824 .vop_getextattr = zfs_getextattr,
5825 .vop_deleteextattr = zfs_deleteextattr,
5826 .vop_setextattr = zfs_setextattr,
5827 .vop_listextattr = zfs_listextattr,
5828 .vop_getacl = zfs_freebsd_getacl,
5829 .vop_setacl = zfs_freebsd_setacl,
5830 .vop_aclcheck = zfs_freebsd_aclcheck,
5831 .vop_getpages = zfs_freebsd_getpages,
5832 .vop_putpages = zfs_freebsd_putpages,
5833 .vop_vptocnp = zfs_vptocnp,
5834 #if __FreeBSD_version >= 1300064
5835 .vop_lock1 = vop_lock,
5836 .vop_unlock = vop_unlock,
5837 .vop_islocked = vop_islocked,
5838 #endif
5839 };
5840 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
5841
5842 struct vop_vector zfs_fifoops = {
5843 .vop_default = &fifo_specops,
5844 .vop_fsync = zfs_freebsd_fsync,
5845 #if __FreeBSD_version >= 1300102
5846 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
5847 #endif
5848 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
5849 .vop_access = zfs_freebsd_access,
5850 .vop_getattr = zfs_freebsd_getattr,
5851 .vop_inactive = zfs_freebsd_inactive,
5852 .vop_read = VOP_PANIC,
5853 .vop_reclaim = zfs_freebsd_reclaim,
5854 .vop_setattr = zfs_freebsd_setattr,
5855 .vop_write = VOP_PANIC,
5856 .vop_pathconf = zfs_freebsd_pathconf,
5857 .vop_fid = zfs_freebsd_fid,
5858 .vop_getacl = zfs_freebsd_getacl,
5859 .vop_setacl = zfs_freebsd_setacl,
5860 .vop_aclcheck = zfs_freebsd_aclcheck,
5861 };
5862 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
5863
5864 /*
5865 * special share hidden files vnode operations template
5866 */
5867 struct vop_vector zfs_shareops = {
5868 .vop_default = &default_vnodeops,
5869 #if __FreeBSD_version >= 1300121
5870 .vop_fplookup_vexec = VOP_EAGAIN,
5871 #endif
5872 .vop_fplookup_symlink = VOP_EAGAIN,
5873 .vop_access = zfs_freebsd_access,
5874 .vop_inactive = zfs_freebsd_inactive,
5875 .vop_reclaim = zfs_freebsd_reclaim,
5876 .vop_fid = zfs_freebsd_fid,
5877 .vop_pathconf = zfs_freebsd_pathconf,
5878 };
5879 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
5880