1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*-
25 * Portions Copyright (c) 1992, 1993, 1995
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software donated to Berkeley by
29 * Jan-Simon Pendry.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
56 *
57 * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
58 * $FreeBSD$
59 */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/fcntl.h>
64 #include <sys/kernel.h>
65 #include <sys/lock.h>
66 #include <sys/malloc.h>
67 #include <sys/mount.h>
68 #include <sys/mount_internal.h>
69 #include <sys/namei.h>
70 #include <sys/proc.h>
71 #include <sys/vnode.h>
72 #include <sys/vnode_internal.h>
73 #include <security/mac_internal.h>
74
75 #include <sys/param.h>
76
77 #include <IOKit/IOBSD.h>
78
79 #include "bindfs.h"
80
81 #define BINDFS_ENTITLEMENT "com.apple.private.bindfs-allow"
82
83 #define SIZEOF_MEMBER(type, member) (sizeof(((type *)0)->member))
84 #define MAX_MNT_FROM_LENGTH (SIZEOF_MEMBER(struct vfsstatfs, f_mntfromname))
85
86 static int
bindfs_vfs_getlowerattr(mount_t mp,struct vfs_attr * vfap,vfs_context_t ctx)87 bindfs_vfs_getlowerattr(mount_t mp, struct vfs_attr * vfap, vfs_context_t ctx)
88 {
89 memset(vfap, 0, sizeof(*vfap));
90 VFSATTR_INIT(vfap);
91 VFSATTR_WANTED(vfap, f_bsize);
92 VFSATTR_WANTED(vfap, f_iosize);
93 VFSATTR_WANTED(vfap, f_blocks);
94 VFSATTR_WANTED(vfap, f_bfree);
95 VFSATTR_WANTED(vfap, f_bavail);
96 VFSATTR_WANTED(vfap, f_bused);
97 VFSATTR_WANTED(vfap, f_files);
98 VFSATTR_WANTED(vfap, f_ffree);
99 VFSATTR_WANTED(vfap, f_capabilities);
100
101 return vfs_getattr(mp, vfap, ctx);
102 }
103
104 /*
105 * Mount bind layer
106 */
107 static int
bindfs_mount(struct mount * mp,__unused vnode_t devvp,user_addr_t user_data,vfs_context_t ctx)108 bindfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
109 {
110 int error = 0;
111 struct vnode *lowerrootvp = NULL, *vp = NULL;
112 struct vfsstatfs * sp = NULL;
113 struct bind_mount * xmp = NULL;
114 char data[MAXPATHLEN];
115 size_t count;
116 struct vfs_attr vfa;
117 /* set defaults (arbitrary since this file system is readonly) */
118 uint32_t bsize = BLKDEV_IOSIZE;
119 size_t iosize = BLKDEV_IOSIZE;
120 uint64_t blocks = 4711 * 4711;
121 uint64_t bfree = 0;
122 uint64_t bavail = 0;
123 uint64_t bused = 4711;
124 uint64_t files = 4711;
125 uint64_t ffree = 0;
126
127 kauth_cred_t cred = vfs_context_ucred(ctx);
128
129 BINDFSDEBUG("mp = %p %llx\n", (void *)mp, vfs_flags(mp));
130
131 if (vfs_flags(mp) & MNT_ROOTFS) {
132 return EOPNOTSUPP;
133 }
134
135 /*
136 * Update is a no-op
137 */
138 if (vfs_isupdate(mp)) {
139 return ENOTSUP;
140 }
141
142 /* check entitlement */
143 if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT)) {
144 return EPERM;
145 }
146
147 /*
148 * Get argument
149 */
150 error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
151 if (error) {
152 BINDFSERROR("error copying data from user %d\n", error);
153 goto error;
154 }
155
156 /* This could happen if the system is configured for 32 bit inodes instead of
157 * 64 bit */
158 if (count > MAX_MNT_FROM_LENGTH) {
159 error = EINVAL;
160 BINDFSERROR("path to mount too large for this system %zu vs %lu\n", count, MAX_MNT_FROM_LENGTH);
161 goto error;
162 }
163
164 error = vnode_lookup(data, 0, &lowerrootvp, ctx);
165 if (error) {
166 BINDFSERROR("lookup of %s failed error: %d\n", data, error);
167 goto error;
168 }
169
170 /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
171 * Keep this to signal what we want to keep around the thing we are mirroring.
172 * Drop it in unmount.*/
173 error = vnode_ref(lowerrootvp);
174 vnode_put(lowerrootvp);
175 if (error) {
176 // If vnode_ref failed, then bind it out so it can't be used anymore in cleanup.
177 lowerrootvp = NULL;
178 goto error;
179 }
180
181 BINDFSDEBUG("mount %s\n", data);
182
183 xmp = kalloc_type(struct bind_mount, Z_WAITOK | Z_ZERO | Z_NOFAIL);
184
185 /*
186 * Save reference to underlying FS
187 */
188 xmp->bindm_lowerrootvp = lowerrootvp;
189 xmp->bindm_lowerrootvid = vnode_vid(lowerrootvp);
190
191 error = bind_nodeget(mp, lowerrootvp, NULL, &vp, NULL, 1);
192 if (error) {
193 goto error;
194 }
195 /* After bind_nodeget our root vnode is in the hash table and we have to usecounts on lowerrootvp
196 * One use count will get dropped when we reclaim the root during unmount.
197 * The other will get dropped in unmount */
198
199
200 /* vp has an iocount on it from vnode_create. drop that for a usecount. This
201 * is our root vnode so we drop the ref in unmount
202 *
203 * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
204 vnode_ref(vp);
205 vnode_put(vp);
206
207 xmp->bindm_rootvp = vp;
208
209 /* read the flags the user set, but then ignore some of them, we will only
210 * allow them if they are set on the lower file system */
211 uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
212 uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);
213
214 if (lowerflags) {
215 flags |= lowerflags;
216 }
217
218 /* force these flags */
219 flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
220 vfs_setflags(mp, flags);
221
222 vfs_setfsprivate(mp, xmp);
223 vfs_getnewfsid(mp);
224 vfs_setlocklocal(mp);
225
226 /* fill in the stat block */
227 sp = vfs_statfs(mp);
228 strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);
229
230 sp->f_flags = flags;
231
232 xmp->bindm_flags = BINDM_CASEINSENSITIVE; /* default to case insensitive */
233
234 error = bindfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
235 if (error == 0) {
236 if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
237 bsize = vfa.f_bsize;
238 }
239 if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
240 iosize = vfa.f_iosize;
241 }
242 if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
243 blocks = vfa.f_blocks;
244 }
245 if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
246 bfree = vfa.f_bfree;
247 }
248 if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
249 bavail = vfa.f_bavail;
250 }
251 if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
252 bused = vfa.f_bused;
253 }
254 if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
255 files = vfa.f_files;
256 }
257 if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
258 ffree = vfa.f_ffree;
259 }
260 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
261 if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
262 (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
263 xmp->bindm_flags &= ~BINDM_CASEINSENSITIVE;
264 }
265 }
266 } else {
267 goto error;
268 }
269
270 sp->f_bsize = bsize;
271 sp->f_iosize = iosize;
272 sp->f_blocks = blocks;
273 sp->f_bfree = bfree;
274 sp->f_bavail = bavail;
275 sp->f_bused = bused;
276 sp->f_files = files;
277 sp->f_ffree = ffree;
278
279 /* Associate the mac label information from the mirrored filesystem with the
280 * mirror */
281 MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));
282
283 BINDFSDEBUG("lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
284 return 0;
285
286 error:
287 if (xmp) {
288 kfree_type(struct bind_mount, xmp);
289 }
290 if (lowerrootvp) {
291 vnode_getwithref(lowerrootvp);
292 vnode_rele(lowerrootvp);
293 vnode_put(lowerrootvp);
294 }
295 if (vp) {
296 /* we made the root vnode but the mount is failed, so clean it up */
297 vnode_getwithref(vp);
298 vnode_rele(vp);
299 /* give vp back */
300 vnode_recycle(vp);
301 vnode_put(vp);
302 }
303 return error;
304 }
305
306 /*
307 * Free reference to bind layer
308 */
309 static int
bindfs_unmount(struct mount * mp,int mntflags,__unused vfs_context_t ctx)310 bindfs_unmount(struct mount * mp, int mntflags, __unused vfs_context_t ctx)
311 {
312 struct bind_mount * mntdata;
313 struct vnode * vp;
314 int error, flags;
315
316 BINDFSDEBUG("mp = %p\n", (void *)mp);
317
318 /* check entitlement or superuser*/
319 if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT) &&
320 vfs_context_suser(ctx) != 0) {
321 return EPERM;
322 }
323
324 if (mntflags & MNT_FORCE) {
325 flags = FORCECLOSE;
326 } else {
327 flags = 0;
328 }
329
330 mntdata = MOUNTTOBINDMOUNT(mp);
331 vp = mntdata->bindm_rootvp;
332
333 // release our reference on the root before flushing.
334 // it will get pulled out of the mount structure by reclaim
335 vnode_getalways(vp);
336
337 error = vflush(mp, vp, flags);
338 if (error) {
339 vnode_put(vp);
340 return error;
341 }
342
343 if (vnode_isinuse(vp, 1) && flags == 0) {
344 vnode_put(vp);
345 return EBUSY;
346 }
347
348 vnode_rele(vp); // Drop reference taken by bindfs_mount
349 vnode_put(vp); // Drop ref taken above
350
351 //Force close to get rid of the last vnode
352 (void)vflush(mp, NULL, FORCECLOSE);
353
354 /* no more vnodes, so tear down the mountpoint */
355
356 vfs_setfsprivate(mp, NULL);
357
358 vnode_getalways(mntdata->bindm_lowerrootvp);
359 vnode_rele(mntdata->bindm_lowerrootvp);
360 vnode_put(mntdata->bindm_lowerrootvp);
361
362 kfree_type(struct bind_mount, mntdata);
363
364 uint64_t vflags = vfs_flags(mp);
365 vfs_setflags(mp, vflags & ~MNT_LOCAL);
366
367 return 0;
368 }
369
370 static int
bindfs_root(struct mount * mp,struct vnode ** vpp,__unused vfs_context_t ctx)371 bindfs_root(struct mount * mp, struct vnode ** vpp, __unused vfs_context_t ctx)
372 {
373 struct vnode * vp;
374 int error;
375
376 BINDFSDEBUG("mp = %p, vp = %p\n", (void *)mp, (void *)MOUNTTOBINDMOUNT(mp)->bindm_rootvp);
377
378 /*
379 * Return locked reference to root.
380 */
381 vp = MOUNTTOBINDMOUNT(mp)->bindm_rootvp;
382
383 error = vnode_get(vp);
384 if (error) {
385 return error;
386 }
387
388 *vpp = vp;
389 return 0;
390 }
391
392 static int
bindfs_vfs_getattr(struct mount * mp,struct vfs_attr * vfap,vfs_context_t ctx)393 bindfs_vfs_getattr(struct mount * mp, struct vfs_attr * vfap, vfs_context_t ctx)
394 {
395 struct vnode * coveredvp = NULL;
396 struct vfs_attr vfa;
397 struct bind_mount * bind_mp = MOUNTTOBINDMOUNT(mp);
398 vol_capabilities_attr_t capabilities;
399 struct vfsstatfs * sp = vfs_statfs(mp);
400
401 struct timespec tzero = {.tv_sec = 0, .tv_nsec = 0};
402
403 BINDFSDEBUG("\n");
404
405 /* Set default capabilities in case the lower file system is gone */
406 memset(&capabilities, 0, sizeof(capabilities));
407 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
408 capabilities.valid[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
409
410 if (bindfs_vfs_getlowerattr(vnode_mount(bind_mp->bindm_lowerrootvp), &vfa, ctx) == 0) {
411 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
412 memcpy(&capabilities, &vfa.f_capabilities, sizeof(capabilities));
413 /* don't support vget */
414 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
415
416 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
417
418 capabilities.valid[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
419
420 capabilities.valid[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
421
422 /* dont' support interfaces that only make sense on a writable file system
423 * or one with specific vnops implemented */
424 capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = 0;
425
426 capabilities.valid[VOL_CAPABILITIES_INTERFACES] &=
427 ~(VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_READDIRATTR | VOL_CAP_INT_EXCHANGEDATA |
428 VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK);
429 }
430 }
431
432 if (VFSATTR_IS_ACTIVE(vfap, f_create_time)) {
433 VFSATTR_RETURN(vfap, f_create_time, tzero);
434 }
435
436 if (VFSATTR_IS_ACTIVE(vfap, f_modify_time)) {
437 VFSATTR_RETURN(vfap, f_modify_time, tzero);
438 }
439
440 if (VFSATTR_IS_ACTIVE(vfap, f_access_time)) {
441 VFSATTR_RETURN(vfap, f_access_time, tzero);
442 }
443
444 if (VFSATTR_IS_ACTIVE(vfap, f_bsize)) {
445 VFSATTR_RETURN(vfap, f_bsize, sp->f_bsize);
446 }
447
448 if (VFSATTR_IS_ACTIVE(vfap, f_iosize)) {
449 VFSATTR_RETURN(vfap, f_iosize, sp->f_iosize);
450 }
451
452 if (VFSATTR_IS_ACTIVE(vfap, f_owner)) {
453 VFSATTR_RETURN(vfap, f_owner, 0);
454 }
455
456 if (VFSATTR_IS_ACTIVE(vfap, f_blocks)) {
457 VFSATTR_RETURN(vfap, f_blocks, sp->f_blocks);
458 }
459
460 if (VFSATTR_IS_ACTIVE(vfap, f_bfree)) {
461 VFSATTR_RETURN(vfap, f_bfree, sp->f_bfree);
462 }
463
464 if (VFSATTR_IS_ACTIVE(vfap, f_bavail)) {
465 VFSATTR_RETURN(vfap, f_bavail, sp->f_bavail);
466 }
467
468 if (VFSATTR_IS_ACTIVE(vfap, f_bused)) {
469 VFSATTR_RETURN(vfap, f_bused, sp->f_bused);
470 }
471
472 if (VFSATTR_IS_ACTIVE(vfap, f_files)) {
473 VFSATTR_RETURN(vfap, f_files, sp->f_files);
474 }
475
476 if (VFSATTR_IS_ACTIVE(vfap, f_ffree)) {
477 VFSATTR_RETURN(vfap, f_ffree, sp->f_ffree);
478 }
479
480 if (VFSATTR_IS_ACTIVE(vfap, f_fssubtype)) {
481 VFSATTR_RETURN(vfap, f_fssubtype, 0);
482 }
483
484 if (VFSATTR_IS_ACTIVE(vfap, f_capabilities)) {
485 memcpy(&vfap->f_capabilities, &capabilities, sizeof(vol_capabilities_attr_t));
486
487 VFSATTR_SET_SUPPORTED(vfap, f_capabilities);
488 }
489
490 if (VFSATTR_IS_ACTIVE(vfap, f_attributes)) {
491 vol_attributes_attr_t * volattr = &vfap->f_attributes;
492
493 volattr->validattr.commonattr = 0;
494 volattr->validattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
495 volattr->validattr.dirattr = 0;
496 volattr->validattr.fileattr = 0;
497 volattr->validattr.forkattr = 0;
498
499 volattr->nativeattr.commonattr = 0;
500 volattr->nativeattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
501 volattr->nativeattr.dirattr = 0;
502 volattr->nativeattr.fileattr = 0;
503 volattr->nativeattr.forkattr = 0;
504
505 VFSATTR_SET_SUPPORTED(vfap, f_attributes);
506 }
507
508 if (VFSATTR_IS_ACTIVE(vfap, f_vol_name)) {
509 /* The name of the volume is the same as the directory we mounted on */
510 coveredvp = vfs_vnodecovered(mp);
511 if (coveredvp) {
512 const char * name = vnode_getname_printable(coveredvp);
513 strlcpy(vfap->f_vol_name, name, MAXPATHLEN);
514 vnode_putname_printable(name);
515
516 VFSATTR_SET_SUPPORTED(vfap, f_vol_name);
517 vnode_put(coveredvp);
518 }
519 }
520
521 return 0;
522 }
523
524 static int
bindfs_sync(__unused struct mount * mp,__unused int waitfor,__unused vfs_context_t ctx)525 bindfs_sync(__unused struct mount * mp, __unused int waitfor, __unused vfs_context_t ctx)
526 {
527 return 0;
528 }
529
530
531
532 static int
bindfs_vfs_start(__unused struct mount * mp,__unused int flags,__unused vfs_context_t ctx)533 bindfs_vfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t ctx)
534 {
535 BINDFSDEBUG("\n");
536 return 0;
537 }
538
539 extern const struct vnodeopv_desc bindfs_vnodeop_opv_desc;
540
541 const struct vnodeopv_desc * bindfs_vnodeopv_descs[] = {
542 &bindfs_vnodeop_opv_desc,
543 };
544
545 struct vfsops bindfs_vfsops = {
546 .vfs_mount = bindfs_mount,
547 .vfs_unmount = bindfs_unmount,
548 .vfs_start = bindfs_vfs_start,
549 .vfs_root = bindfs_root,
550 .vfs_getattr = bindfs_vfs_getattr,
551 .vfs_sync = bindfs_sync,
552 .vfs_init = bindfs_init,
553 .vfs_sysctl = NULL,
554 .vfs_setattr = NULL,
555 };
556