1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2000-2004 5 * Poul-Henning Kamp. All rights reserved. 6 * Copyright (c) 1989, 1992-1993, 1995 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software donated to Berkeley by 10 * Jan-Simon Pendry. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 34 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 35 * 36 * $FreeBSD$ 37 */ 38 39 /* 40 * TODO: 41 * mkdir: want it ? 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/conf.h> 47 #include <sys/dirent.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/filedesc.h> 51 #include <sys/filio.h> 52 #include <sys/jail.h> 53 #include <sys/kernel.h> 54 #include <sys/limits.h> 55 #include <sys/lock.h> 56 #include <sys/malloc.h> 57 #include <sys/mman.h> 58 #include <sys/mount.h> 59 #include <sys/namei.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/stat.h> 63 #include <sys/sx.h> 64 #include <sys/sysctl.h> 65 #include <sys/time.h> 66 #include <sys/ttycom.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 70 static struct vop_vector devfs_vnodeops; 71 static struct vop_vector devfs_specops; 72 static struct fileops devfs_ops_f; 73 74 #include <fs/devfs/devfs.h> 75 #include <fs/devfs/devfs_int.h> 76 77 #include <security/mac/mac_framework.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_extern.h> 81 #include <vm/vm_object.h> 82 83 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); 84 85 struct mtx devfs_de_interlock; 86 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); 87 struct sx clone_drain_lock; 88 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); 89 struct mtx cdevpriv_mtx; 90 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); 91 92 SYSCTL_DECL(_vfs_devfs); 93 94 static int devfs_dotimes; 95 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, 96 &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); 97 98 /* 99 * Update devfs node timestamp. Note that updates are unlocked and 100 * stat(2) could see partially updated times. 101 */ 102 static void 103 devfs_timestamp(struct timespec *tsp) 104 { 105 time_t ts; 106 107 if (devfs_dotimes) { 108 vfs_timestamp(tsp); 109 } else { 110 ts = time_second; 111 if (tsp->tv_sec != ts) { 112 tsp->tv_sec = ts; 113 tsp->tv_nsec = 0; 114 } 115 } 116 } 117 118 static int 119 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, 120 int *ref) 121 { 122 123 *dswp = devvn_refthread(fp->f_vnode, devp, ref); 124 if (*devp != fp->f_data) { 125 if (*dswp != NULL) 126 dev_relthread(*devp, *ref); 127 return (ENXIO); 128 } 129 KASSERT((*devp)->si_refcount > 0, 130 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 131 if (*dswp == NULL) 132 return (ENXIO); 133 curthread->td_fpop = fp; 134 return (0); 135 } 136 137 int 138 devfs_get_cdevpriv(void **datap) 139 { 140 struct file *fp; 141 struct cdev_privdata *p; 142 int error; 143 144 fp = curthread->td_fpop; 145 if (fp == NULL) 146 return (EBADF); 147 p = fp->f_cdevpriv; 148 if (p != NULL) { 149 error = 0; 150 *datap = p->cdpd_data; 151 } else 152 error = ENOENT; 153 return (error); 154 } 155 156 int 157 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) 158 { 159 struct file *fp; 160 struct cdev_priv *cdp; 161 struct cdev_privdata *p; 162 int error; 163 164 fp = curthread->td_fpop; 165 if (fp == NULL) 166 return (ENOENT); 167 cdp = cdev2priv((struct cdev *)fp->f_data); 168 p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); 169 p->cdpd_data = priv; 170 p->cdpd_dtr = priv_dtr; 171 p->cdpd_fp = fp; 172 mtx_lock(&cdevpriv_mtx); 173 if (fp->f_cdevpriv == NULL) { 174 LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); 175 fp->f_cdevpriv = p; 176 mtx_unlock(&cdevpriv_mtx); 177 error = 0; 178 } else { 179 mtx_unlock(&cdevpriv_mtx); 180 free(p, M_CDEVPDATA); 181 error = EBUSY; 182 } 183 return (error); 184 } 185 186 void 187 devfs_destroy_cdevpriv(struct cdev_privdata *p) 188 { 189 190 mtx_assert(&cdevpriv_mtx, MA_OWNED); 191 KASSERT(p->cdpd_fp->f_cdevpriv == p, 192 ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p)); 193 p->cdpd_fp->f_cdevpriv = NULL; 194 LIST_REMOVE(p, cdpd_list); 195 mtx_unlock(&cdevpriv_mtx); 196 (p->cdpd_dtr)(p->cdpd_data); 197 free(p, M_CDEVPDATA); 198 } 199 200 static void 201 devfs_fpdrop(struct file *fp) 202 { 203 struct cdev_privdata *p; 204 205 mtx_lock(&cdevpriv_mtx); 206 if ((p = fp->f_cdevpriv) == NULL) { 207 mtx_unlock(&cdevpriv_mtx); 208 return; 209 } 210 devfs_destroy_cdevpriv(p); 211 } 212 213 void 214 devfs_clear_cdevpriv(void) 215 { 216 struct file *fp; 217 218 fp = curthread->td_fpop; 219 if (fp == NULL) 220 return; 221 devfs_fpdrop(fp); 222 } 223 224 /* 225 * On success devfs_populate_vp() returns with dmp->dm_lock held. 226 */ 227 static int 228 devfs_populate_vp(struct vnode *vp) 229 { 230 struct devfs_dirent *de; 231 struct devfs_mount *dmp; 232 int locked; 233 234 ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); 235 236 dmp = VFSTODEVFS(vp->v_mount); 237 locked = VOP_ISLOCKED(vp); 238 239 sx_xlock(&dmp->dm_lock); 240 DEVFS_DMP_HOLD(dmp); 241 242 /* Can't call devfs_populate() with the vnode lock held. */ 243 VOP_UNLOCK(vp, 0); 244 devfs_populate(dmp); 245 246 sx_xunlock(&dmp->dm_lock); 247 vn_lock(vp, locked | LK_RETRY); 248 sx_xlock(&dmp->dm_lock); 249 if (DEVFS_DMP_DROP(dmp)) { 250 sx_xunlock(&dmp->dm_lock); 251 devfs_unmount_final(dmp); 252 return (ERESTART); 253 } 254 if ((vp->v_iflag & VI_DOOMED) != 0) { 255 sx_xunlock(&dmp->dm_lock); 256 return (ERESTART); 257 } 258 de = vp->v_data; 259 KASSERT(de != NULL, 260 ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); 261 if ((de->de_flags & DE_DOOMED) != 0) { 262 sx_xunlock(&dmp->dm_lock); 263 return (ERESTART); 264 } 265 266 return (0); 267 } 268 269 static int 270 devfs_vptocnp(struct vop_vptocnp_args *ap) 271 { 272 struct vnode *vp = ap->a_vp; 273 struct vnode **dvp = ap->a_vpp; 274 struct devfs_mount *dmp; 275 char *buf = ap->a_buf; 276 int *buflen = ap->a_buflen; 277 struct devfs_dirent *dd, *de; 278 int i, error; 279 280 dmp = VFSTODEVFS(vp->v_mount); 281 282 error = devfs_populate_vp(vp); 283 if (error != 0) 284 return (error); 285 286 i = *buflen; 287 dd = vp->v_data; 288 289 if (vp->v_type == VCHR) { 290 i -= strlen(dd->de_cdp->cdp_c.si_name); 291 if (i < 0) { 292 error = ENOMEM; 293 goto finished; 294 } 295 bcopy(dd->de_cdp->cdp_c.si_name, buf + i, 296 strlen(dd->de_cdp->cdp_c.si_name)); 297 de = dd->de_dir; 298 } else if (vp->v_type == VDIR) { 299 if (dd == dmp->dm_rootdir) { 300 *dvp = vp; 301 vref(*dvp); 302 goto finished; 303 } 304 i -= dd->de_dirent->d_namlen; 305 if (i < 0) { 306 error = ENOMEM; 307 goto finished; 308 } 309 bcopy(dd->de_dirent->d_name, buf + i, 310 dd->de_dirent->d_namlen); 311 de = dd; 312 } else { 313 error = ENOENT; 314 goto finished; 315 } 316 *buflen = i; 317 de = devfs_parent_dirent(de); 318 if (de == NULL) { 319 error = ENOENT; 320 goto finished; 321 } 322 mtx_lock(&devfs_de_interlock); 323 *dvp = de->de_vnode; 324 if (*dvp != NULL) { 325 VI_LOCK(*dvp); 326 mtx_unlock(&devfs_de_interlock); 327 vholdl(*dvp); 328 VI_UNLOCK(*dvp); 329 vref(*dvp); 330 vdrop(*dvp); 331 } else { 332 mtx_unlock(&devfs_de_interlock); 333 error = ENOENT; 334 } 335 finished: 336 sx_xunlock(&dmp->dm_lock); 337 return (error); 338 } 339 340 /* 341 * Construct the fully qualified path name relative to the mountpoint. 342 * If a NULL cnp is provided, no '/' is appended to the resulting path. 343 */ 344 char * 345 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, 346 struct componentname *cnp) 347 { 348 int i; 349 struct devfs_dirent *de; 350 351 sx_assert(&dmp->dm_lock, SA_LOCKED); 352 353 i = SPECNAMELEN; 354 buf[i] = '\0'; 355 if (cnp != NULL) 356 i -= cnp->cn_namelen; 357 if (i < 0) 358 return (NULL); 359 if (cnp != NULL) 360 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 361 de = dd; 362 while (de != dmp->dm_rootdir) { 363 if (cnp != NULL || i < SPECNAMELEN) { 364 i--; 365 if (i < 0) 366 return (NULL); 367 buf[i] = '/'; 368 } 369 i -= de->de_dirent->d_namlen; 370 if (i < 0) 371 return (NULL); 372 bcopy(de->de_dirent->d_name, buf + i, 373 de->de_dirent->d_namlen); 374 de = devfs_parent_dirent(de); 375 if (de == NULL) 376 return (NULL); 377 } 378 return (buf + i); 379 } 380 381 static int 382 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, 383 struct devfs_dirent *de) 384 { 385 int not_found; 386 387 not_found = 0; 388 if (de->de_flags & DE_DOOMED) 389 not_found = 1; 390 if (DEVFS_DE_DROP(de)) { 391 KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); 392 devfs_dirent_free(de); 393 } 394 if (DEVFS_DMP_DROP(dmp)) { 395 KASSERT(not_found == 1, 396 ("DEVFS mount struct freed before dirent")); 397 not_found = 2; 398 sx_xunlock(&dmp->dm_lock); 399 devfs_unmount_final(dmp); 400 } 401 if (not_found == 1 || (drop_dm_lock && not_found != 2)) 402 sx_unlock(&dmp->dm_lock); 403 return (not_found); 404 } 405 406 static void 407 devfs_insmntque_dtr(struct vnode *vp, void *arg) 408 { 409 struct devfs_dirent *de; 410 411 de = (struct devfs_dirent *)arg; 412 mtx_lock(&devfs_de_interlock); 413 vp->v_data = NULL; 414 de->de_vnode = NULL; 415 mtx_unlock(&devfs_de_interlock); 416 vgone(vp); 417 vput(vp); 418 } 419 420 /* 421 * devfs_allocv shall be entered with dmp->dm_lock held, and it drops 422 * it on return. 423 */ 424 int 425 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, 426 struct vnode **vpp) 427 { 428 int error; 429 struct vnode *vp; 430 struct cdev *dev; 431 struct devfs_mount *dmp; 432 struct cdevsw *dsw; 433 434 dmp = VFSTODEVFS(mp); 435 if (de->de_flags & DE_DOOMED) { 436 sx_xunlock(&dmp->dm_lock); 437 return (ENOENT); 438 } 439 loop: 440 DEVFS_DE_HOLD(de); 441 DEVFS_DMP_HOLD(dmp); 442 mtx_lock(&devfs_de_interlock); 443 vp = de->de_vnode; 444 if (vp != NULL) { 445 VI_LOCK(vp); 446 mtx_unlock(&devfs_de_interlock); 447 sx_xunlock(&dmp->dm_lock); 448 vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); 449 sx_xlock(&dmp->dm_lock); 450 if (devfs_allocv_drop_refs(0, dmp, de)) { 451 vput(vp); 452 return (ENOENT); 453 } 454 else if ((vp->v_iflag & VI_DOOMED) != 0) { 455 mtx_lock(&devfs_de_interlock); 456 if (de->de_vnode == vp) { 457 de->de_vnode = NULL; 458 vp->v_data = NULL; 459 } 460 mtx_unlock(&devfs_de_interlock); 461 vput(vp); 462 goto loop; 463 } 464 sx_xunlock(&dmp->dm_lock); 465 *vpp = vp; 466 return (0); 467 } 468 mtx_unlock(&devfs_de_interlock); 469 if (de->de_dirent->d_type == DT_CHR) { 470 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { 471 devfs_allocv_drop_refs(1, dmp, de); 472 return (ENOENT); 473 } 474 dev = &de->de_cdp->cdp_c; 475 } else { 476 dev = NULL; 477 } 478 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 479 if (error != 0) { 480 devfs_allocv_drop_refs(1, dmp, de); 481 printf("devfs_allocv: failed to allocate new vnode\n"); 482 return (error); 483 } 484 485 if (de->de_dirent->d_type == DT_CHR) { 486 vp->v_type = VCHR; 487 VI_LOCK(vp); 488 dev_lock(); 489 dev_refl(dev); 490 /* XXX: v_rdev should be protect by vnode lock */ 491 vp->v_rdev = dev; 492 KASSERT(vp->v_usecount == 1, 493 ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); 494 dev->si_usecount += vp->v_usecount; 495 /* Special casing of ttys for deadfs. Probably redundant. */ 496 dsw = dev->si_devsw; 497 if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) 498 vp->v_vflag |= VV_ISTTY; 499 dev_unlock(); 500 VI_UNLOCK(vp); 501 if ((dev->si_flags & SI_ETERNAL) != 0) 502 vp->v_vflag |= VV_ETERNALDEV; 503 vp->v_op = &devfs_specops; 504 } else if (de->de_dirent->d_type == DT_DIR) { 505 vp->v_type = VDIR; 506 } else if (de->de_dirent->d_type == DT_LNK) { 507 vp->v_type = VLNK; 508 } else { 509 vp->v_type = VBAD; 510 } 511 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); 512 VN_LOCK_ASHARE(vp); 513 mtx_lock(&devfs_de_interlock); 514 vp->v_data = de; 515 de->de_vnode = vp; 516 mtx_unlock(&devfs_de_interlock); 517 error = insmntque1(vp, mp, devfs_insmntque_dtr, de); 518 if (error != 0) { 519 (void) devfs_allocv_drop_refs(1, dmp, de); 520 return (error); 521 } 522 if (devfs_allocv_drop_refs(0, dmp, de)) { 523 vput(vp); 524 return (ENOENT); 525 } 526 #ifdef MAC 527 mac_devfs_vnode_associate(mp, de, vp); 528 #endif 529 sx_xunlock(&dmp->dm_lock); 530 *vpp = vp; 531 return (0); 532 } 533 534 static int 535 devfs_access(struct vop_access_args *ap) 536 { 537 struct vnode *vp = ap->a_vp; 538 struct devfs_dirent *de; 539 struct proc *p; 540 int error; 541 542 de = vp->v_data; 543 if (vp->v_type == VDIR) 544 de = de->de_dir; 545 546 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 547 ap->a_accmode, ap->a_cred, NULL); 548 if (error == 0) 549 return (0); 550 if (error != EACCES) 551 return (error); 552 p = ap->a_td->td_proc; 553 /* We do, however, allow access to the controlling terminal */ 554 PROC_LOCK(p); 555 if (!(p->p_flag & P_CONTROLT)) { 556 PROC_UNLOCK(p); 557 return (error); 558 } 559 if (p->p_session->s_ttydp == de->de_cdp) 560 error = 0; 561 PROC_UNLOCK(p); 562 return (error); 563 } 564 565 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, 566 "devfs-only flag reuse failed"); 567 568 static int 569 devfs_close(struct vop_close_args *ap) 570 { 571 struct vnode *vp = ap->a_vp, *oldvp; 572 struct thread *td = ap->a_td; 573 struct proc *p; 574 struct cdev *dev = vp->v_rdev; 575 struct cdevsw *dsw; 576 int dflags, error, ref, vp_locked; 577 578 /* 579 * XXX: Don't call d_close() if we were called because of 580 * XXX: insmntque1() failure. 581 */ 582 if (vp->v_data == NULL) 583 return (0); 584 585 /* 586 * Hack: a tty device that is a controlling terminal 587 * has a reference from the session structure. 588 * We cannot easily tell that a character device is 589 * a controlling terminal, unless it is the closing 590 * process' controlling terminal. In that case, 591 * if the reference count is 2 (this last descriptor 592 * plus the session), release the reference from the session. 593 */ 594 if (td != NULL) { 595 p = td->td_proc; 596 PROC_LOCK(p); 597 if (vp == p->p_session->s_ttyvp) { 598 PROC_UNLOCK(p); 599 oldvp = NULL; 600 sx_xlock(&proctree_lock); 601 if (vp == p->p_session->s_ttyvp) { 602 SESS_LOCK(p->p_session); 603 VI_LOCK(vp); 604 if (count_dev(dev) == 2 && 605 (vp->v_iflag & VI_DOOMED) == 0) { 606 p->p_session->s_ttyvp = NULL; 607 p->p_session->s_ttydp = NULL; 608 oldvp = vp; 609 } 610 VI_UNLOCK(vp); 611 SESS_UNLOCK(p->p_session); 612 } 613 sx_xunlock(&proctree_lock); 614 if (oldvp != NULL) 615 vrele(oldvp); 616 } else 617 PROC_UNLOCK(p); 618 } 619 /* 620 * We do not want to really close the device if it 621 * is still in use unless we are trying to close it 622 * forcibly. Since every use (buffer, vnode, swap, cmap) 623 * holds a reference to the vnode, and because we mark 624 * any other vnodes that alias this device, when the 625 * sum of the reference counts on all the aliased 626 * vnodes descends to one, we are on last close. 627 */ 628 dsw = dev_refthread(dev, &ref); 629 if (dsw == NULL) 630 return (ENXIO); 631 dflags = 0; 632 VI_LOCK(vp); 633 if (vp->v_iflag & VI_DOOMED) { 634 /* Forced close. */ 635 dflags |= FREVOKE | FNONBLOCK; 636 } else if (dsw->d_flags & D_TRACKCLOSE) { 637 /* Keep device updated on status. */ 638 } else if (count_dev(dev) > 1) { 639 VI_UNLOCK(vp); 640 dev_relthread(dev, ref); 641 return (0); 642 } 643 if (count_dev(dev) == 1) 644 dflags |= FLASTCLOSE; 645 vholdl(vp); 646 VI_UNLOCK(vp); 647 vp_locked = VOP_ISLOCKED(vp); 648 VOP_UNLOCK(vp, 0); 649 KASSERT(dev->si_refcount > 0, 650 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 651 error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); 652 dev_relthread(dev, ref); 653 vn_lock(vp, vp_locked | LK_RETRY); 654 vdrop(vp); 655 return (error); 656 } 657 658 static int 659 devfs_close_f(struct file *fp, struct thread *td) 660 { 661 int error; 662 struct file *fpop; 663 664 /* 665 * NB: td may be NULL if this descriptor is closed due to 666 * garbage collection from a closed UNIX domain socket. 667 */ 668 fpop = curthread->td_fpop; 669 curthread->td_fpop = fp; 670 error = vnops.fo_close(fp, td); 671 curthread->td_fpop = fpop; 672 673 /* 674 * The f_cdevpriv cannot be assigned non-NULL value while we 675 * are destroying the file. 676 */ 677 if (fp->f_cdevpriv != NULL) 678 devfs_fpdrop(fp); 679 return (error); 680 } 681 682 static int 683 devfs_getattr(struct vop_getattr_args *ap) 684 { 685 struct vnode *vp = ap->a_vp; 686 struct vattr *vap = ap->a_vap; 687 struct devfs_dirent *de; 688 struct devfs_mount *dmp; 689 struct cdev *dev; 690 struct timeval boottime; 691 int error; 692 693 error = devfs_populate_vp(vp); 694 if (error != 0) 695 return (error); 696 697 dmp = VFSTODEVFS(vp->v_mount); 698 sx_xunlock(&dmp->dm_lock); 699 700 de = vp->v_data; 701 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 702 if (vp->v_type == VDIR) { 703 de = de->de_dir; 704 KASSERT(de != NULL, 705 ("Null dir dirent in devfs_getattr vp=%p", vp)); 706 } 707 vap->va_uid = de->de_uid; 708 vap->va_gid = de->de_gid; 709 vap->va_mode = de->de_mode; 710 if (vp->v_type == VLNK) 711 vap->va_size = strlen(de->de_symlink); 712 else if (vp->v_type == VDIR) 713 vap->va_size = vap->va_bytes = DEV_BSIZE; 714 else 715 vap->va_size = 0; 716 if (vp->v_type != VDIR) 717 vap->va_bytes = 0; 718 vap->va_blocksize = DEV_BSIZE; 719 vap->va_type = vp->v_type; 720 721 getboottime(&boottime); 722 #define fix(aa) \ 723 do { \ 724 if ((aa).tv_sec <= 3600) { \ 725 (aa).tv_sec = boottime.tv_sec; \ 726 (aa).tv_nsec = boottime.tv_usec * 1000; \ 727 } \ 728 } while (0) 729 730 if (vp->v_type != VCHR) { 731 fix(de->de_atime); 732 vap->va_atime = de->de_atime; 733 fix(de->de_mtime); 734 vap->va_mtime = de->de_mtime; 735 fix(de->de_ctime); 736 vap->va_ctime = de->de_ctime; 737 } else { 738 dev = vp->v_rdev; 739 fix(dev->si_atime); 740 vap->va_atime = dev->si_atime; 741 fix(dev->si_mtime); 742 vap->va_mtime = dev->si_mtime; 743 fix(dev->si_ctime); 744 vap->va_ctime = dev->si_ctime; 745 746 vap->va_rdev = cdev2priv(dev)->cdp_inode; 747 } 748 vap->va_gen = 0; 749 vap->va_flags = 0; 750 vap->va_filerev = 0; 751 vap->va_nlink = de->de_links; 752 vap->va_fileid = de->de_inode; 753 754 return (error); 755 } 756 757 /* ARGSUSED */ 758 static int 759 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 760 { 761 struct file *fpop; 762 int error; 763 764 fpop = td->td_fpop; 765 td->td_fpop = fp; 766 error = vnops.fo_ioctl(fp, com, data, cred, td); 767 td->td_fpop = fpop; 768 return (error); 769 } 770 771 static int 772 devfs_ioctl(struct vop_ioctl_args *ap) 773 { 774 struct fiodgname_arg *fgn; 775 struct vnode *vpold, *vp; 776 struct cdevsw *dsw; 777 struct thread *td; 778 struct cdev *dev; 779 int error, ref, i; 780 const char *p; 781 u_long com; 782 783 vp = ap->a_vp; 784 com = ap->a_command; 785 td = ap->a_td; 786 787 dsw = devvn_refthread(vp, &dev, &ref); 788 if (dsw == NULL) 789 return (ENXIO); 790 KASSERT(dev->si_refcount > 0, 791 ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); 792 793 if (com == FIODTYPE) { 794 *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; 795 error = 0; 796 goto out; 797 } else if (com == FIODGNAME) { 798 fgn = ap->a_data; 799 p = devtoname(dev); 800 i = strlen(p) + 1; 801 if (i > fgn->len) 802 error = EINVAL; 803 else 804 error = copyout(p, fgn->buf, i); 805 goto out; 806 } 807 808 error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); 809 810 out: 811 dev_relthread(dev, ref); 812 if (error == ENOIOCTL) 813 error = ENOTTY; 814 815 if (error == 0 && com == TIOCSCTTY) { 816 /* Do nothing if reassigning same control tty */ 817 sx_slock(&proctree_lock); 818 if (td->td_proc->p_session->s_ttyvp == vp) { 819 sx_sunlock(&proctree_lock); 820 return (0); 821 } 822 823 vpold = td->td_proc->p_session->s_ttyvp; 824 VREF(vp); 825 SESS_LOCK(td->td_proc->p_session); 826 td->td_proc->p_session->s_ttyvp = vp; 827 td->td_proc->p_session->s_ttydp = cdev2priv(dev); 828 SESS_UNLOCK(td->td_proc->p_session); 829 830 sx_sunlock(&proctree_lock); 831 832 /* Get rid of reference to old control tty */ 833 if (vpold) 834 vrele(vpold); 835 } 836 return (error); 837 } 838 839 /* ARGSUSED */ 840 static int 841 devfs_kqfilter_f(struct file *fp, struct knote *kn) 842 { 843 struct cdev *dev; 844 struct cdevsw *dsw; 845 int error, ref; 846 struct file *fpop; 847 struct thread *td; 848 849 td = curthread; 850 fpop = td->td_fpop; 851 error = devfs_fp_check(fp, &dev, &dsw, &ref); 852 if (error) 853 return (error); 854 error = dsw->d_kqfilter(dev, kn); 855 td->td_fpop = fpop; 856 dev_relthread(dev, ref); 857 return (error); 858 } 859 860 static inline int 861 devfs_prison_check(struct devfs_dirent *de, struct thread *td) 862 { 863 struct cdev_priv *cdp; 864 struct ucred *dcr; 865 struct proc *p; 866 int error; 867 868 cdp = de->de_cdp; 869 if (cdp == NULL) 870 return (0); 871 dcr = cdp->cdp_c.si_cred; 872 if (dcr == NULL) 873 return (0); 874 875 error = prison_check(td->td_ucred, dcr); 876 if (error == 0) 877 return (0); 878 /* We do, however, allow access to the controlling terminal */ 879 p = td->td_proc; 880 PROC_LOCK(p); 881 if (!(p->p_flag & P_CONTROLT)) { 882 PROC_UNLOCK(p); 883 return (error); 884 } 885 if (p->p_session->s_ttydp == cdp) 886 error = 0; 887 PROC_UNLOCK(p); 888 return (error); 889 } 890 891 static int 892 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) 893 { 894 struct componentname *cnp; 895 struct vnode *dvp, **vpp; 896 struct thread *td; 897 struct devfs_dirent *de, *dd; 898 struct devfs_dirent **dde; 899 struct devfs_mount *dmp; 900 struct mount *mp; 901 struct cdev *cdev; 902 int error, flags, nameiop, dvplocked; 903 char specname[SPECNAMELEN + 1], *pname; 904 905 cnp = ap->a_cnp; 906 vpp = ap->a_vpp; 907 dvp = ap->a_dvp; 908 pname = cnp->cn_nameptr; 909 td = cnp->cn_thread; 910 flags = cnp->cn_flags; 911 nameiop = cnp->cn_nameiop; 912 mp = dvp->v_mount; 913 dmp = VFSTODEVFS(mp); 914 dd = dvp->v_data; 915 *vpp = NULLVP; 916 917 if ((flags & ISLASTCN) && nameiop == RENAME) 918 return (EOPNOTSUPP); 919 920 if (dvp->v_type != VDIR) 921 return (ENOTDIR); 922 923 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 924 return (EIO); 925 926 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); 927 if (error) 928 return (error); 929 930 if (cnp->cn_namelen == 1 && *pname == '.') { 931 if ((flags & ISLASTCN) && nameiop != LOOKUP) 932 return (EINVAL); 933 *vpp = dvp; 934 VREF(dvp); 935 return (0); 936 } 937 938 if (flags & ISDOTDOT) { 939 if ((flags & ISLASTCN) && nameiop != LOOKUP) 940 return (EINVAL); 941 de = devfs_parent_dirent(dd); 942 if (de == NULL) 943 return (ENOENT); 944 dvplocked = VOP_ISLOCKED(dvp); 945 VOP_UNLOCK(dvp, 0); 946 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, 947 vpp); 948 *dm_unlock = 0; 949 vn_lock(dvp, dvplocked | LK_RETRY); 950 return (error); 951 } 952 953 dd = dvp->v_data; 954 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); 955 while (de == NULL) { /* While(...) so we can use break */ 956 957 if (nameiop == DELETE) 958 return (ENOENT); 959 960 /* 961 * OK, we didn't have an entry for the name we were asked for 962 * so we try to see if anybody can create it on demand. 963 */ 964 pname = devfs_fqpn(specname, dmp, dd, cnp); 965 if (pname == NULL) 966 break; 967 968 cdev = NULL; 969 DEVFS_DMP_HOLD(dmp); 970 sx_xunlock(&dmp->dm_lock); 971 sx_slock(&clone_drain_lock); 972 EVENTHANDLER_INVOKE(dev_clone, 973 td->td_ucred, pname, strlen(pname), &cdev); 974 sx_sunlock(&clone_drain_lock); 975 976 if (cdev == NULL) 977 sx_xlock(&dmp->dm_lock); 978 else if (devfs_populate_vp(dvp) != 0) { 979 *dm_unlock = 0; 980 sx_xlock(&dmp->dm_lock); 981 if (DEVFS_DMP_DROP(dmp)) { 982 sx_xunlock(&dmp->dm_lock); 983 devfs_unmount_final(dmp); 984 } else 985 sx_xunlock(&dmp->dm_lock); 986 dev_rel(cdev); 987 return (ENOENT); 988 } 989 if (DEVFS_DMP_DROP(dmp)) { 990 *dm_unlock = 0; 991 sx_xunlock(&dmp->dm_lock); 992 devfs_unmount_final(dmp); 993 if (cdev != NULL) 994 dev_rel(cdev); 995 return (ENOENT); 996 } 997 998 if (cdev == NULL) 999 break; 1000 1001 dev_lock(); 1002 dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; 1003 if (dde != NULL && *dde != NULL) 1004 de = *dde; 1005 dev_unlock(); 1006 dev_rel(cdev); 1007 break; 1008 } 1009 1010 if (de == NULL || de->de_flags & DE_WHITEOUT) { 1011 if ((nameiop == CREATE || nameiop == RENAME) && 1012 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 1013 cnp->cn_flags |= SAVENAME; 1014 return (EJUSTRETURN); 1015 } 1016 return (ENOENT); 1017 } 1018 1019 if (devfs_prison_check(de, td)) 1020 return (ENOENT); 1021 1022 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 1023 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 1024 if (error) 1025 return (error); 1026 if (*vpp == dvp) { 1027 VREF(dvp); 1028 *vpp = dvp; 1029 return (0); 1030 } 1031 } 1032 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp); 1033 *dm_unlock = 0; 1034 return (error); 1035 } 1036 1037 static int 1038 devfs_lookup(struct vop_lookup_args *ap) 1039 { 1040 int j; 1041 struct devfs_mount *dmp; 1042 int dm_unlock; 1043 1044 if (devfs_populate_vp(ap->a_dvp) != 0) 1045 return (ENOTDIR); 1046 1047 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1048 dm_unlock = 1; 1049 j = devfs_lookupx(ap, &dm_unlock); 1050 if (dm_unlock == 1) 1051 sx_xunlock(&dmp->dm_lock); 1052 return (j); 1053 } 1054 1055 static int 1056 devfs_mknod(struct vop_mknod_args *ap) 1057 { 1058 struct componentname *cnp; 1059 struct vnode *dvp, **vpp; 1060 struct devfs_dirent *dd, *de; 1061 struct devfs_mount *dmp; 1062 int error; 1063 1064 /* 1065 * The only type of node we should be creating here is a 1066 * character device, for anything else return EOPNOTSUPP. 1067 */ 1068 if (ap->a_vap->va_type != VCHR) 1069 return (EOPNOTSUPP); 1070 dvp = ap->a_dvp; 1071 dmp = VFSTODEVFS(dvp->v_mount); 1072 1073 cnp = ap->a_cnp; 1074 vpp = ap->a_vpp; 1075 dd = dvp->v_data; 1076 1077 error = ENOENT; 1078 sx_xlock(&dmp->dm_lock); 1079 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 1080 if (cnp->cn_namelen != de->de_dirent->d_namlen) 1081 continue; 1082 if (de->de_dirent->d_type == DT_CHR && 1083 (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) 1084 continue; 1085 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 1086 de->de_dirent->d_namlen) != 0) 1087 continue; 1088 if (de->de_flags & DE_WHITEOUT) 1089 break; 1090 goto notfound; 1091 } 1092 if (de == NULL) 1093 goto notfound; 1094 de->de_flags &= ~DE_WHITEOUT; 1095 error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); 1096 return (error); 1097 notfound: 1098 sx_xunlock(&dmp->dm_lock); 1099 return (error); 1100 } 1101 1102 /* ARGSUSED */ 1103 static int 1104 devfs_open(struct vop_open_args *ap) 1105 { 1106 struct thread *td = ap->a_td; 1107 struct vnode *vp = ap->a_vp; 1108 struct cdev *dev = vp->v_rdev; 1109 struct file *fp = ap->a_fp; 1110 int error, ref, vlocked; 1111 struct cdevsw *dsw; 1112 struct file *fpop; 1113 struct mtx *mtxp; 1114 1115 if (vp->v_type == VBLK) 1116 return (ENXIO); 1117 1118 if (dev == NULL) 1119 return (ENXIO); 1120 1121 /* Make this field valid before any I/O in d_open. */ 1122 if (dev->si_iosize_max == 0) 1123 dev->si_iosize_max = DFLTPHYS; 1124 1125 dsw = dev_refthread(dev, &ref); 1126 if (dsw == NULL) 1127 return (ENXIO); 1128 if (fp == NULL && dsw->d_fdopen != NULL) { 1129 dev_relthread(dev, ref); 1130 return (ENXIO); 1131 } 1132 1133 vlocked = VOP_ISLOCKED(vp); 1134 VOP_UNLOCK(vp, 0); 1135 1136 fpop = td->td_fpop; 1137 td->td_fpop = fp; 1138 if (fp != NULL) { 1139 fp->f_data = dev; 1140 fp->f_vnode = vp; 1141 } 1142 if (dsw->d_fdopen != NULL) 1143 error = dsw->d_fdopen(dev, ap->a_mode, td, fp); 1144 else 1145 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 1146 /* Clean up any cdevpriv upon error. */ 1147 if (error != 0) 1148 devfs_clear_cdevpriv(); 1149 td->td_fpop = fpop; 1150 1151 vn_lock(vp, vlocked | LK_RETRY); 1152 dev_relthread(dev, ref); 1153 if (error != 0) { 1154 if (error == ERESTART) 1155 error = EINTR; 1156 return (error); 1157 } 1158 1159 #if 0 /* /dev/console */ 1160 KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); 1161 #else 1162 if (fp == NULL) 1163 return (error); 1164 #endif 1165 if (fp->f_ops == &badfileops) 1166 finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); 1167 mtxp = mtx_pool_find(mtxpool_sleep, fp); 1168 1169 /* 1170 * Hint to the dofilewrite() to not force the buffer draining 1171 * on the writer to the file. Most likely, the write would 1172 * not need normal buffers. 1173 */ 1174 mtx_lock(mtxp); 1175 fp->f_vnread_flags |= FDEVFS_VNODE; 1176 mtx_unlock(mtxp); 1177 return (error); 1178 } 1179 1180 static int 1181 devfs_pathconf(struct vop_pathconf_args *ap) 1182 { 1183 1184 switch (ap->a_name) { 1185 case _PC_FILESIZEBITS: 1186 *ap->a_retval = 64; 1187 return (0); 1188 case _PC_NAME_MAX: 1189 *ap->a_retval = NAME_MAX; 1190 return (0); 1191 case _PC_LINK_MAX: 1192 *ap->a_retval = INT_MAX; 1193 return (0); 1194 case _PC_SYMLINK_MAX: 1195 *ap->a_retval = MAXPATHLEN; 1196 return (0); 1197 case _PC_MAX_CANON: 1198 if (ap->a_vp->v_vflag & VV_ISTTY) { 1199 *ap->a_retval = MAX_CANON; 1200 return (0); 1201 } 1202 return (EINVAL); 1203 case _PC_MAX_INPUT: 1204 if (ap->a_vp->v_vflag & VV_ISTTY) { 1205 *ap->a_retval = MAX_INPUT; 1206 return (0); 1207 } 1208 return (EINVAL); 1209 case _PC_VDISABLE: 1210 if (ap->a_vp->v_vflag & VV_ISTTY) { 1211 *ap->a_retval = _POSIX_VDISABLE; 1212 return (0); 1213 } 1214 return (EINVAL); 1215 case _PC_MAC_PRESENT: 1216 #ifdef MAC 1217 /* 1218 * If MAC is enabled, devfs automatically supports 1219 * trivial non-persistant label storage. 1220 */ 1221 *ap->a_retval = 1; 1222 #else 1223 *ap->a_retval = 0; 1224 #endif 1225 return (0); 1226 case _PC_CHOWN_RESTRICTED: 1227 *ap->a_retval = 1; 1228 return (0); 1229 default: 1230 return (vop_stdpathconf(ap)); 1231 } 1232 /* NOTREACHED */ 1233 } 1234 1235 /* ARGSUSED */ 1236 static int 1237 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 1238 { 1239 struct cdev *dev; 1240 struct cdevsw *dsw; 1241 int error, ref; 1242 struct file *fpop; 1243 1244 fpop = td->td_fpop; 1245 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1246 if (error != 0) { 1247 error = vnops.fo_poll(fp, events, cred, td); 1248 return (error); 1249 } 1250 error = dsw->d_poll(dev, events, td); 1251 td->td_fpop = fpop; 1252 dev_relthread(dev, ref); 1253 return(error); 1254 } 1255 1256 /* 1257 * Print out the contents of a special device vnode. 1258 */ 1259 static int 1260 devfs_print(struct vop_print_args *ap) 1261 { 1262 1263 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 1264 return (0); 1265 } 1266 1267 static int 1268 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, 1269 int flags, struct thread *td) 1270 { 1271 struct cdev *dev; 1272 int ioflag, error, ref; 1273 ssize_t resid; 1274 struct cdevsw *dsw; 1275 struct file *fpop; 1276 1277 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1278 return (EINVAL); 1279 fpop = td->td_fpop; 1280 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1281 if (error != 0) { 1282 error = vnops.fo_read(fp, uio, cred, flags, td); 1283 return (error); 1284 } 1285 resid = uio->uio_resid; 1286 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 1287 if (ioflag & O_DIRECT) 1288 ioflag |= IO_DIRECT; 1289 1290 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1291 error = dsw->d_read(dev, uio, ioflag); 1292 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 1293 devfs_timestamp(&dev->si_atime); 1294 td->td_fpop = fpop; 1295 dev_relthread(dev, ref); 1296 1297 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1298 return (error); 1299 } 1300 1301 static int 1302 devfs_readdir(struct vop_readdir_args *ap) 1303 { 1304 int error; 1305 struct uio *uio; 1306 struct dirent *dp; 1307 struct devfs_dirent *dd; 1308 struct devfs_dirent *de; 1309 struct devfs_mount *dmp; 1310 off_t off; 1311 int *tmp_ncookies = NULL; 1312 1313 if (ap->a_vp->v_type != VDIR) 1314 return (ENOTDIR); 1315 1316 uio = ap->a_uio; 1317 if (uio->uio_offset < 0) 1318 return (EINVAL); 1319 1320 /* 1321 * XXX: This is a temporary hack to get around this filesystem not 1322 * supporting cookies. We store the location of the ncookies pointer 1323 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() 1324 * and set the number of cookies to 0. We then set the pointer to 1325 * NULL so that vfs_read_dirent doesn't try to call realloc() on 1326 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies 1327 * pointer to its original location before returning to the caller. 1328 */ 1329 if (ap->a_ncookies != NULL) { 1330 tmp_ncookies = ap->a_ncookies; 1331 *ap->a_ncookies = 0; 1332 ap->a_ncookies = NULL; 1333 } 1334 1335 dmp = VFSTODEVFS(ap->a_vp->v_mount); 1336 if (devfs_populate_vp(ap->a_vp) != 0) { 1337 if (tmp_ncookies != NULL) 1338 ap->a_ncookies = tmp_ncookies; 1339 return (EIO); 1340 } 1341 error = 0; 1342 de = ap->a_vp->v_data; 1343 off = 0; 1344 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 1345 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 1346 if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) 1347 continue; 1348 if (devfs_prison_check(dd, uio->uio_td)) 1349 continue; 1350 if (dd->de_dirent->d_type == DT_DIR) 1351 de = dd->de_dir; 1352 else 1353 de = dd; 1354 dp = dd->de_dirent; 1355 MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp)); 1356 if (dp->d_reclen > uio->uio_resid) 1357 break; 1358 dp->d_fileno = de->de_inode; 1359 /* NOTE: d_off is the offset for the *next* entry. */ 1360 dp->d_off = off + dp->d_reclen; 1361 if (off >= uio->uio_offset) { 1362 error = vfs_read_dirent(ap, dp, off); 1363 if (error) 1364 break; 1365 } 1366 off += dp->d_reclen; 1367 } 1368 sx_xunlock(&dmp->dm_lock); 1369 uio->uio_offset = off; 1370 1371 /* 1372 * Restore ap->a_ncookies if it wasn't originally NULL in the first 1373 * place. 1374 */ 1375 if (tmp_ncookies != NULL) 1376 ap->a_ncookies = tmp_ncookies; 1377 1378 return (error); 1379 } 1380 1381 static int 1382 devfs_readlink(struct vop_readlink_args *ap) 1383 { 1384 struct devfs_dirent *de; 1385 1386 de = ap->a_vp->v_data; 1387 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 1388 } 1389 1390 static int 1391 devfs_reclaim(struct vop_reclaim_args *ap) 1392 { 1393 struct vnode *vp; 1394 struct devfs_dirent *de; 1395 1396 vp = ap->a_vp; 1397 mtx_lock(&devfs_de_interlock); 1398 de = vp->v_data; 1399 if (de != NULL) { 1400 de->de_vnode = NULL; 1401 vp->v_data = NULL; 1402 } 1403 mtx_unlock(&devfs_de_interlock); 1404 vnode_destroy_vobject(vp); 1405 return (0); 1406 } 1407 1408 static int 1409 devfs_reclaim_vchr(struct vop_reclaim_args *ap) 1410 { 1411 struct vnode *vp; 1412 struct cdev *dev; 1413 1414 vp = ap->a_vp; 1415 MPASS(vp->v_type == VCHR); 1416 1417 devfs_reclaim(ap); 1418 1419 VI_LOCK(vp); 1420 dev_lock(); 1421 dev = vp->v_rdev; 1422 vp->v_rdev = NULL; 1423 if (dev != NULL) 1424 dev->si_usecount -= vp->v_usecount; 1425 dev_unlock(); 1426 VI_UNLOCK(vp); 1427 if (dev != NULL) 1428 dev_rel(dev); 1429 return (0); 1430 } 1431 1432 static int 1433 devfs_remove(struct vop_remove_args *ap) 1434 { 1435 struct vnode *dvp = ap->a_dvp; 1436 struct vnode *vp = ap->a_vp; 1437 struct devfs_dirent *dd; 1438 struct devfs_dirent *de, *de_covered; 1439 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 1440 1441 ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); 1442 ASSERT_VOP_ELOCKED(vp, "devfs_remove"); 1443 1444 sx_xlock(&dmp->dm_lock); 1445 dd = ap->a_dvp->v_data; 1446 de = vp->v_data; 1447 if (de->de_cdp == NULL) { 1448 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 1449 if (de->de_dirent->d_type == DT_LNK) { 1450 de_covered = devfs_find(dd, de->de_dirent->d_name, 1451 de->de_dirent->d_namlen, 0); 1452 if (de_covered != NULL) 1453 de_covered->de_flags &= ~DE_COVERED; 1454 } 1455 /* We need to unlock dvp because devfs_delete() may lock it. */ 1456 VOP_UNLOCK(vp, 0); 1457 if (dvp != vp) 1458 VOP_UNLOCK(dvp, 0); 1459 devfs_delete(dmp, de, 0); 1460 sx_xunlock(&dmp->dm_lock); 1461 if (dvp != vp) 1462 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1463 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1464 } else { 1465 de->de_flags |= DE_WHITEOUT; 1466 sx_xunlock(&dmp->dm_lock); 1467 } 1468 return (0); 1469 } 1470 1471 /* 1472 * Revoke is called on a tty when a terminal session ends. The vnode 1473 * is orphaned by setting v_op to deadfs so we need to let go of it 1474 * as well so that we create a new one next time around. 1475 * 1476 */ 1477 static int 1478 devfs_revoke(struct vop_revoke_args *ap) 1479 { 1480 struct vnode *vp = ap->a_vp, *vp2; 1481 struct cdev *dev; 1482 struct cdev_priv *cdp; 1483 struct devfs_dirent *de; 1484 u_int i; 1485 1486 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 1487 1488 dev = vp->v_rdev; 1489 cdp = cdev2priv(dev); 1490 1491 dev_lock(); 1492 cdp->cdp_inuse++; 1493 dev_unlock(); 1494 1495 vhold(vp); 1496 vgone(vp); 1497 vdrop(vp); 1498 1499 VOP_UNLOCK(vp,0); 1500 loop: 1501 for (;;) { 1502 mtx_lock(&devfs_de_interlock); 1503 dev_lock(); 1504 vp2 = NULL; 1505 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 1506 de = cdp->cdp_dirents[i]; 1507 if (de == NULL) 1508 continue; 1509 1510 vp2 = de->de_vnode; 1511 if (vp2 != NULL) { 1512 dev_unlock(); 1513 VI_LOCK(vp2); 1514 mtx_unlock(&devfs_de_interlock); 1515 if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, 1516 curthread)) 1517 goto loop; 1518 vhold(vp2); 1519 vgone(vp2); 1520 vdrop(vp2); 1521 vput(vp2); 1522 break; 1523 } 1524 } 1525 if (vp2 != NULL) { 1526 continue; 1527 } 1528 dev_unlock(); 1529 mtx_unlock(&devfs_de_interlock); 1530 break; 1531 } 1532 dev_lock(); 1533 cdp->cdp_inuse--; 1534 if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { 1535 TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); 1536 dev_unlock(); 1537 dev_rel(&cdp->cdp_c); 1538 } else 1539 dev_unlock(); 1540 1541 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1542 return (0); 1543 } 1544 1545 static int 1546 devfs_rioctl(struct vop_ioctl_args *ap) 1547 { 1548 struct vnode *vp; 1549 struct devfs_mount *dmp; 1550 int error; 1551 1552 vp = ap->a_vp; 1553 vn_lock(vp, LK_SHARED | LK_RETRY); 1554 if (vp->v_iflag & VI_DOOMED) { 1555 VOP_UNLOCK(vp, 0); 1556 return (EBADF); 1557 } 1558 dmp = VFSTODEVFS(vp->v_mount); 1559 sx_xlock(&dmp->dm_lock); 1560 VOP_UNLOCK(vp, 0); 1561 DEVFS_DMP_HOLD(dmp); 1562 devfs_populate(dmp); 1563 if (DEVFS_DMP_DROP(dmp)) { 1564 sx_xunlock(&dmp->dm_lock); 1565 devfs_unmount_final(dmp); 1566 return (ENOENT); 1567 } 1568 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 1569 sx_xunlock(&dmp->dm_lock); 1570 return (error); 1571 } 1572 1573 static int 1574 devfs_rread(struct vop_read_args *ap) 1575 { 1576 1577 if (ap->a_vp->v_type != VDIR) 1578 return (EINVAL); 1579 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 1580 } 1581 1582 static int 1583 devfs_setattr(struct vop_setattr_args *ap) 1584 { 1585 struct devfs_dirent *de; 1586 struct vattr *vap; 1587 struct vnode *vp; 1588 struct thread *td; 1589 int c, error; 1590 uid_t uid; 1591 gid_t gid; 1592 1593 vap = ap->a_vap; 1594 vp = ap->a_vp; 1595 td = curthread; 1596 if ((vap->va_type != VNON) || 1597 (vap->va_nlink != VNOVAL) || 1598 (vap->va_fsid != VNOVAL) || 1599 (vap->va_fileid != VNOVAL) || 1600 (vap->va_blocksize != VNOVAL) || 1601 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 1602 (vap->va_rdev != VNOVAL) || 1603 ((int)vap->va_bytes != VNOVAL) || 1604 (vap->va_gen != VNOVAL)) { 1605 return (EINVAL); 1606 } 1607 1608 error = devfs_populate_vp(vp); 1609 if (error != 0) 1610 return (error); 1611 1612 de = vp->v_data; 1613 if (vp->v_type == VDIR) 1614 de = de->de_dir; 1615 1616 c = 0; 1617 if (vap->va_uid == (uid_t)VNOVAL) 1618 uid = de->de_uid; 1619 else 1620 uid = vap->va_uid; 1621 if (vap->va_gid == (gid_t)VNOVAL) 1622 gid = de->de_gid; 1623 else 1624 gid = vap->va_gid; 1625 if (uid != de->de_uid || gid != de->de_gid) { 1626 if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1627 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { 1628 error = priv_check(td, PRIV_VFS_CHOWN); 1629 if (error != 0) 1630 goto ret; 1631 } 1632 de->de_uid = uid; 1633 de->de_gid = gid; 1634 c = 1; 1635 } 1636 1637 if (vap->va_mode != (mode_t)VNOVAL) { 1638 if (ap->a_cred->cr_uid != de->de_uid) { 1639 error = priv_check(td, PRIV_VFS_ADMIN); 1640 if (error != 0) 1641 goto ret; 1642 } 1643 de->de_mode = vap->va_mode; 1644 c = 1; 1645 } 1646 1647 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1648 error = vn_utimes_perm(vp, vap, ap->a_cred, td); 1649 if (error != 0) 1650 goto ret; 1651 if (vap->va_atime.tv_sec != VNOVAL) { 1652 if (vp->v_type == VCHR) 1653 vp->v_rdev->si_atime = vap->va_atime; 1654 else 1655 de->de_atime = vap->va_atime; 1656 } 1657 if (vap->va_mtime.tv_sec != VNOVAL) { 1658 if (vp->v_type == VCHR) 1659 vp->v_rdev->si_mtime = vap->va_mtime; 1660 else 1661 de->de_mtime = vap->va_mtime; 1662 } 1663 c = 1; 1664 } 1665 1666 if (c) { 1667 if (vp->v_type == VCHR) 1668 vfs_timestamp(&vp->v_rdev->si_ctime); 1669 else 1670 vfs_timestamp(&de->de_mtime); 1671 } 1672 1673 ret: 1674 sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); 1675 return (error); 1676 } 1677 1678 #ifdef MAC 1679 static int 1680 devfs_setlabel(struct vop_setlabel_args *ap) 1681 { 1682 struct vnode *vp; 1683 struct devfs_dirent *de; 1684 1685 vp = ap->a_vp; 1686 de = vp->v_data; 1687 1688 mac_vnode_relabel(ap->a_cred, vp, ap->a_label); 1689 mac_devfs_update(vp->v_mount, de, vp); 1690 1691 return (0); 1692 } 1693 #endif 1694 1695 static int 1696 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1697 { 1698 1699 return (vnops.fo_stat(fp, sb, cred, td)); 1700 } 1701 1702 static int 1703 devfs_symlink(struct vop_symlink_args *ap) 1704 { 1705 int i, error; 1706 struct devfs_dirent *dd; 1707 struct devfs_dirent *de, *de_covered, *de_dotdot; 1708 struct devfs_mount *dmp; 1709 1710 error = priv_check(curthread, PRIV_DEVFS_SYMLINK); 1711 if (error) 1712 return(error); 1713 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1714 if (devfs_populate_vp(ap->a_dvp) != 0) 1715 return (ENOENT); 1716 1717 dd = ap->a_dvp->v_data; 1718 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1719 de->de_flags = DE_USER; 1720 de->de_uid = 0; 1721 de->de_gid = 0; 1722 de->de_mode = 0755; 1723 de->de_inode = alloc_unr(devfs_inos); 1724 de->de_dir = dd; 1725 de->de_dirent->d_type = DT_LNK; 1726 i = strlen(ap->a_target) + 1; 1727 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1728 bcopy(ap->a_target, de->de_symlink, i); 1729 #ifdef MAC 1730 mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1731 #endif 1732 de_covered = devfs_find(dd, de->de_dirent->d_name, 1733 de->de_dirent->d_namlen, 0); 1734 if (de_covered != NULL) { 1735 if ((de_covered->de_flags & DE_USER) != 0) { 1736 devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); 1737 sx_xunlock(&dmp->dm_lock); 1738 return (EEXIST); 1739 } 1740 KASSERT((de_covered->de_flags & DE_COVERED) == 0, 1741 ("devfs_symlink: entry %p already covered", de_covered)); 1742 de_covered->de_flags |= DE_COVERED; 1743 } 1744 1745 de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ 1746 de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ 1747 TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); 1748 devfs_dir_ref_de(dmp, dd); 1749 devfs_rules_apply(dmp, de); 1750 1751 return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); 1752 } 1753 1754 static int 1755 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) 1756 { 1757 1758 return (vnops.fo_truncate(fp, length, cred, td)); 1759 } 1760 1761 static int 1762 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, 1763 int flags, struct thread *td) 1764 { 1765 struct cdev *dev; 1766 int error, ioflag, ref; 1767 ssize_t resid; 1768 struct cdevsw *dsw; 1769 struct file *fpop; 1770 1771 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1772 return (EINVAL); 1773 fpop = td->td_fpop; 1774 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1775 if (error != 0) { 1776 error = vnops.fo_write(fp, uio, cred, flags, td); 1777 return (error); 1778 } 1779 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1780 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1781 if (ioflag & O_DIRECT) 1782 ioflag |= IO_DIRECT; 1783 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1784 1785 resid = uio->uio_resid; 1786 1787 error = dsw->d_write(dev, uio, ioflag); 1788 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1789 devfs_timestamp(&dev->si_ctime); 1790 dev->si_mtime = dev->si_ctime; 1791 } 1792 td->td_fpop = fpop; 1793 dev_relthread(dev, ref); 1794 1795 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1796 return (error); 1797 } 1798 1799 static int 1800 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1801 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1802 struct thread *td) 1803 { 1804 struct cdev *dev; 1805 struct cdevsw *dsw; 1806 struct mount *mp; 1807 struct vnode *vp; 1808 struct file *fpop; 1809 vm_object_t object; 1810 vm_prot_t maxprot; 1811 int error, ref; 1812 1813 vp = fp->f_vnode; 1814 1815 /* 1816 * Ensure that file and memory protections are 1817 * compatible. 1818 */ 1819 mp = vp->v_mount; 1820 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1821 maxprot = VM_PROT_NONE; 1822 if ((prot & VM_PROT_EXECUTE) != 0) 1823 return (EACCES); 1824 } else 1825 maxprot = VM_PROT_EXECUTE; 1826 if ((fp->f_flag & FREAD) != 0) 1827 maxprot |= VM_PROT_READ; 1828 else if ((prot & VM_PROT_READ) != 0) 1829 return (EACCES); 1830 1831 /* 1832 * If we are sharing potential changes via MAP_SHARED and we 1833 * are trying to get write permission although we opened it 1834 * without asking for it, bail out. 1835 * 1836 * Note that most character devices always share mappings. 1837 * The one exception is that D_MMAP_ANON devices 1838 * (i.e. /dev/zero) permit private writable mappings. 1839 * 1840 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests 1841 * as well as updating maxprot to permit writing for 1842 * D_MMAP_ANON devices rather than doing that here. 1843 */ 1844 if ((flags & MAP_SHARED) != 0) { 1845 if ((fp->f_flag & FWRITE) != 0) 1846 maxprot |= VM_PROT_WRITE; 1847 else if ((prot & VM_PROT_WRITE) != 0) 1848 return (EACCES); 1849 } 1850 maxprot &= cap_maxprot; 1851 1852 fpop = td->td_fpop; 1853 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1854 if (error != 0) 1855 return (error); 1856 1857 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, 1858 &object); 1859 td->td_fpop = fpop; 1860 dev_relthread(dev, ref); 1861 if (error != 0) 1862 return (error); 1863 1864 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1865 foff, FALSE, td); 1866 if (error != 0) 1867 vm_object_deallocate(object); 1868 return (error); 1869 } 1870 1871 dev_t 1872 dev2udev(struct cdev *x) 1873 { 1874 if (x == NULL) 1875 return (NODEV); 1876 return (cdev2priv(x)->cdp_inode); 1877 } 1878 1879 static struct fileops devfs_ops_f = { 1880 .fo_read = devfs_read_f, 1881 .fo_write = devfs_write_f, 1882 .fo_truncate = devfs_truncate_f, 1883 .fo_ioctl = devfs_ioctl_f, 1884 .fo_poll = devfs_poll_f, 1885 .fo_kqfilter = devfs_kqfilter_f, 1886 .fo_stat = devfs_stat_f, 1887 .fo_close = devfs_close_f, 1888 .fo_chmod = vn_chmod, 1889 .fo_chown = vn_chown, 1890 .fo_sendfile = vn_sendfile, 1891 .fo_seek = vn_seek, 1892 .fo_fill_kinfo = vn_fill_kinfo, 1893 .fo_mmap = devfs_mmap_f, 1894 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1895 }; 1896 1897 /* Vops for non-CHR vnodes in /dev. */ 1898 static struct vop_vector devfs_vnodeops = { 1899 .vop_default = &default_vnodeops, 1900 1901 .vop_access = devfs_access, 1902 .vop_getattr = devfs_getattr, 1903 .vop_ioctl = devfs_rioctl, 1904 .vop_lookup = devfs_lookup, 1905 .vop_mknod = devfs_mknod, 1906 .vop_pathconf = devfs_pathconf, 1907 .vop_read = devfs_rread, 1908 .vop_readdir = devfs_readdir, 1909 .vop_readlink = devfs_readlink, 1910 .vop_reclaim = devfs_reclaim, 1911 .vop_remove = devfs_remove, 1912 .vop_revoke = devfs_revoke, 1913 .vop_setattr = devfs_setattr, 1914 #ifdef MAC 1915 .vop_setlabel = devfs_setlabel, 1916 #endif 1917 .vop_symlink = devfs_symlink, 1918 .vop_vptocnp = devfs_vptocnp, 1919 }; 1920 1921 /* Vops for VCHR vnodes in /dev. */ 1922 static struct vop_vector devfs_specops = { 1923 .vop_default = &default_vnodeops, 1924 1925 .vop_access = devfs_access, 1926 .vop_bmap = VOP_PANIC, 1927 .vop_close = devfs_close, 1928 .vop_create = VOP_PANIC, 1929 .vop_fsync = vop_stdfsync, 1930 .vop_getattr = devfs_getattr, 1931 .vop_ioctl = devfs_ioctl, 1932 .vop_link = VOP_PANIC, 1933 .vop_mkdir = VOP_PANIC, 1934 .vop_mknod = VOP_PANIC, 1935 .vop_open = devfs_open, 1936 .vop_pathconf = devfs_pathconf, 1937 .vop_poll = dead_poll, 1938 .vop_print = devfs_print, 1939 .vop_read = dead_read, 1940 .vop_readdir = VOP_PANIC, 1941 .vop_readlink = VOP_PANIC, 1942 .vop_reallocblks = VOP_PANIC, 1943 .vop_reclaim = devfs_reclaim_vchr, 1944 .vop_remove = devfs_remove, 1945 .vop_rename = VOP_PANIC, 1946 .vop_revoke = devfs_revoke, 1947 .vop_rmdir = VOP_PANIC, 1948 .vop_setattr = devfs_setattr, 1949 #ifdef MAC 1950 .vop_setlabel = devfs_setlabel, 1951 #endif 1952 .vop_strategy = VOP_PANIC, 1953 .vop_symlink = VOP_PANIC, 1954 .vop_vptocnp = devfs_vptocnp, 1955 .vop_write = dead_write, 1956 }; 1957 1958 /* 1959 * Our calling convention to the device drivers used to be that we passed 1960 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1961 * flags instead since that's what open(), close() and ioctl() takes and 1962 * we don't really want vnode.h in device drivers. 1963 * We solved the source compatibility by redefining some vnode flags to 1964 * be the same as the fcntl ones and by sending down the bitwise OR of 1965 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1966 * pulls the rug out under this. 1967 */ 1968 CTASSERT(O_NONBLOCK == IO_NDELAY); 1969 CTASSERT(O_FSYNC == IO_SYNC); 1970