1 /*-
2 * modified for EXT2FS support in Lites 1.1
3 *
4 * Aug 1995, Godmar Back ([email protected])
5 * University of Utah, Department of Computer Science
6 */
7 /*-
8 * SPDX-License-Identifier: BSD-3-Clause
9 *
10 * Copyright (c) 1989, 1991, 1993, 1994
11 * The Regents of the University of California. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/endian.h>
52 #include <sys/fcntl.h>
53 #include <sys/malloc.h>
54 #include <sys/sdt.h>
55 #include <sys/stat.h>
56 #include <sys/mutex.h>
57
58 #include <geom/geom.h>
59 #include <geom/geom_vfs.h>
60
61 #include <fs/ext2fs/fs.h>
62 #include <fs/ext2fs/ext2_mount.h>
63 #include <fs/ext2fs/inode.h>
64
65 #include <fs/ext2fs/ext2fs.h>
66 #include <fs/ext2fs/ext2_dinode.h>
67 #include <fs/ext2fs/ext2_extern.h>
68 #include <fs/ext2fs/ext2_extents.h>
69
70 SDT_PROVIDER_DECLARE(ext2fs);
71 /*
72 * ext2fs trace probe:
73 * arg0: verbosity. Higher numbers give more verbose messages
74 * arg1: Textual message
75 */
76 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*");
77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int");
78 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*");
79
80 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
81 static int ext2_mountfs(struct vnode *, struct mount *);
82 static int ext2_reload(struct mount *mp, struct thread *td);
83 static int ext2_sbupdate(struct ext2mount *, int);
84 static int ext2_cgupdate(struct ext2mount *, int);
85 static vfs_unmount_t ext2_unmount;
86 static vfs_root_t ext2_root;
87 static vfs_statfs_t ext2_statfs;
88 static vfs_sync_t ext2_sync;
89 static vfs_vget_t ext2_vget;
90 static vfs_fhtovp_t ext2_fhtovp;
91 static vfs_mount_t ext2_mount;
92
93 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
94 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
95
96 static struct vfsops ext2fs_vfsops = {
97 .vfs_fhtovp = ext2_fhtovp,
98 .vfs_mount = ext2_mount,
99 .vfs_root = ext2_root, /* root inode via vget */
100 .vfs_statfs = ext2_statfs,
101 .vfs_sync = ext2_sync,
102 .vfs_unmount = ext2_unmount,
103 .vfs_vget = ext2_vget,
104 };
105
106 VFS_SET(ext2fs_vfsops, ext2fs, 0);
107
108 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
109 int ronly);
110 static int ext2_compute_sb_data(struct vnode * devvp,
111 struct ext2fs * es, struct m_ext2fs * fs);
112
113 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr",
114 "noclusterw", "noexec", "export", "force", "from", "multilabel",
115 "suiddir", "nosymfollow", "sync", "union", NULL };
116
117 /*
118 * VFS Operations.
119 *
120 * mount system call
121 */
122 static int
ext2_mount(struct mount * mp)123 ext2_mount(struct mount *mp)
124 {
125 struct vfsoptlist *opts;
126 struct vnode *devvp;
127 struct thread *td;
128 struct ext2mount *ump = NULL;
129 struct m_ext2fs *fs;
130 struct nameidata nd, *ndp = &nd;
131 accmode_t accmode;
132 char *path, *fspec;
133 int error, flags, len;
134
135 td = curthread;
136 opts = mp->mnt_optnew;
137
138 if (vfs_filteropt(opts, ext2_opts))
139 return (EINVAL);
140
141 vfs_getopt(opts, "fspath", (void **)&path, NULL);
142 /* Double-check the length of path.. */
143 if (strlen(path) >= MAXMNTLEN)
144 return (ENAMETOOLONG);
145
146 fspec = NULL;
147 error = vfs_getopt(opts, "from", (void **)&fspec, &len);
148 if (!error && fspec[len - 1] != '\0')
149 return (EINVAL);
150
151 /*
152 * If updating, check whether changing from read-only to
153 * read/write; if there is no device name, that's all we do.
154 */
155 if (mp->mnt_flag & MNT_UPDATE) {
156 ump = VFSTOEXT2(mp);
157 fs = ump->um_e2fs;
158 error = 0;
159 if (fs->e2fs_ronly == 0 &&
160 vfs_flagopt(opts, "ro", NULL, 0)) {
161 error = VFS_SYNC(mp, MNT_WAIT);
162 if (error)
163 return (error);
164 flags = WRITECLOSE;
165 if (mp->mnt_flag & MNT_FORCE)
166 flags |= FORCECLOSE;
167 error = ext2_flushfiles(mp, flags, td);
168 if (error == 0 && fs->e2fs_wasvalid &&
169 ext2_cgupdate(ump, MNT_WAIT) == 0) {
170 fs->e2fs->e2fs_state =
171 htole16((le16toh(fs->e2fs->e2fs_state) |
172 E2FS_ISCLEAN));
173 ext2_sbupdate(ump, MNT_WAIT);
174 }
175 fs->e2fs_ronly = 1;
176 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
177 g_topology_lock();
178 g_access(ump->um_cp, 0, -1, 0);
179 g_topology_unlock();
180 }
181 if (!error && (mp->mnt_flag & MNT_RELOAD))
182 error = ext2_reload(mp, td);
183 if (error)
184 return (error);
185 devvp = ump->um_devvp;
186 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
187 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
188 return (EPERM);
189
190 /*
191 * If upgrade to read-write by non-root, then verify
192 * that user has necessary permissions on the device.
193 */
194 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
195 error = VOP_ACCESS(devvp, VREAD | VWRITE,
196 td->td_ucred, td);
197 if (error)
198 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
199 if (error) {
200 VOP_UNLOCK(devvp);
201 return (error);
202 }
203 VOP_UNLOCK(devvp);
204 g_topology_lock();
205 error = g_access(ump->um_cp, 0, 1, 0);
206 g_topology_unlock();
207 if (error)
208 return (error);
209
210 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 ||
211 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) {
212 if (mp->mnt_flag & MNT_FORCE) {
213 printf(
214 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
215 } else {
216 printf(
217 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
218 fs->e2fs_fsmnt);
219 return (EPERM);
220 }
221 }
222 fs->e2fs->e2fs_state =
223 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
224 (void)ext2_cgupdate(ump, MNT_WAIT);
225 fs->e2fs_ronly = 0;
226 MNT_ILOCK(mp);
227 mp->mnt_flag &= ~MNT_RDONLY;
228 MNT_IUNLOCK(mp);
229 }
230 if (vfs_flagopt(opts, "export", NULL, 0)) {
231 /* Process export requests in vfs_mount.c. */
232 return (error);
233 }
234 }
235
236 /*
237 * Not an update, or updating the name: look up the name
238 * and verify that it refers to a sensible disk device.
239 */
240 if (fspec == NULL)
241 return (EINVAL);
242 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec);
243 if ((error = namei(ndp)) != 0)
244 return (error);
245 NDFREE_PNBUF(ndp);
246 devvp = ndp->ni_vp;
247
248 if (!vn_isdisk_error(devvp, &error)) {
249 vput(devvp);
250 return (error);
251 }
252
253 /*
254 * If mount by non-root, then verify that user has necessary
255 * permissions on the device.
256 *
257 * XXXRW: VOP_ACCESS() enough?
258 */
259 accmode = VREAD;
260 if ((mp->mnt_flag & MNT_RDONLY) == 0)
261 accmode |= VWRITE;
262 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
263 if (error)
264 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
265 if (error) {
266 vput(devvp);
267 return (error);
268 }
269
270 if ((mp->mnt_flag & MNT_UPDATE) == 0) {
271 error = ext2_mountfs(devvp, mp);
272 } else {
273 if (devvp != ump->um_devvp) {
274 vput(devvp);
275 return (EINVAL); /* needs translation */
276 } else
277 vput(devvp);
278 }
279 if (error) {
280 vrele(devvp);
281 return (error);
282 }
283 ump = VFSTOEXT2(mp);
284 fs = ump->um_e2fs;
285
286 /*
287 * Note that this strncpy() is ok because of a check at the start
288 * of ext2_mount().
289 */
290 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
291 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
292 vfs_mountedfrom(mp, fspec);
293 return (0);
294 }
295
296 static int
ext2_check_sb_compat(struct ext2fs * es,struct cdev * dev,int ronly)297 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
298 {
299 uint32_t i, mask;
300
301 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) {
302 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
303 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC);
304 return (1);
305 }
306 if (le32toh(es->e2fs_rev) > E2FS_REV0) {
307 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP);
308 if (mask) {
309 printf("WARNING: mount of %s denied due to "
310 "unsupported optional features:\n", devtoname(dev));
311 for (i = 0;
312 i < sizeof(incompat)/sizeof(struct ext2_feature);
313 i++)
314 if (mask & incompat[i].mask)
315 printf("%s ", incompat[i].name);
316 printf("\n");
317 return (1);
318 }
319 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP;
320 if (!ronly && mask) {
321 printf("WARNING: R/W mount of %s denied due to "
322 "unsupported optional features:\n", devtoname(dev));
323 for (i = 0;
324 i < sizeof(ro_compat)/sizeof(struct ext2_feature);
325 i++)
326 if (mask & ro_compat[i].mask)
327 printf("%s ", ro_compat[i].name);
328 printf("\n");
329 return (1);
330 }
331 }
332 return (0);
333 }
334
335 static e4fs_daddr_t
ext2_cg_location(struct m_ext2fs * fs,int number)336 ext2_cg_location(struct m_ext2fs *fs, int number)
337 {
338 int cg, descpb, logical_sb, has_super = 0;
339
340 /*
341 * Adjust logical superblock block number.
342 * Godmar thinks: if the blocksize is greater than 1024, then
343 * the superblock is logically part of block zero.
344 */
345 logical_sb = fs->e2fs_bsize > SBLOCKSIZE ? 0 : 1;
346
347 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
348 number < le32toh(fs->e2fs->e3fs_first_meta_bg))
349 return (logical_sb + number + 1);
350
351 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT))
352 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
353 else
354 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
355
356 cg = descpb * number;
357
358 if (ext2_cg_has_sb(fs, cg))
359 has_super = 1;
360
361 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) +
362 le32toh(fs->e2fs->e2fs_first_dblock));
363 }
364
365 static int
ext2_cg_validate(struct m_ext2fs * fs)366 ext2_cg_validate(struct m_ext2fs *fs)
367 {
368 uint64_t b_bitmap;
369 uint64_t i_bitmap;
370 uint64_t i_tables;
371 uint64_t first_block, last_block, last_cg_block;
372 struct ext2_gd *gd;
373 unsigned int i, cg_count;
374
375 first_block = le32toh(fs->e2fs->e2fs_first_dblock);
376 last_cg_block = ext2_cg_number_gdb(fs, 0);
377 cg_count = fs->e2fs_gcount;
378
379 for (i = 0; i < fs->e2fs_gcount; i++) {
380 gd = &fs->e2fs_gd[i];
381
382 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
383 i == fs->e2fs_gcount - 1) {
384 last_block = fs->e2fs_bcount - 1;
385 } else {
386 last_block = first_block +
387 (EXT2_BLOCKS_PER_GROUP(fs) - 1);
388 }
389
390 if ((cg_count == fs->e2fs_gcount) &&
391 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED))
392 cg_count = i;
393
394 b_bitmap = e2fs_gd_get_b_bitmap(gd);
395 if (b_bitmap == 0) {
396 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
397 "block bitmap is zero", i);
398 return (EINVAL);
399 }
400 if (b_bitmap <= last_cg_block) {
401 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
402 "block bitmap overlaps gds", i);
403 return (EINVAL);
404 }
405 if (b_bitmap < first_block || b_bitmap > last_block) {
406 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
407 "block bitmap not in group", i);
408 return (EINVAL);
409 }
410
411 i_bitmap = e2fs_gd_get_i_bitmap(gd);
412 if (i_bitmap == 0) {
413 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
414 "inode bitmap is zero", i);
415 return (EINVAL);
416 }
417 if (i_bitmap <= last_cg_block) {
418 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
419 "inode bitmap overlaps gds", i);
420 return (EINVAL);
421 }
422 if (i_bitmap < first_block || i_bitmap > last_block) {
423 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
424 "inode bitmap not in group blk", i);
425 return (EINVAL);
426 }
427
428 i_tables = e2fs_gd_get_i_tables(gd);
429 if (i_tables == 0) {
430 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
431 "inode table is zero", i);
432 return (EINVAL);
433 }
434 if (i_tables <= last_cg_block) {
435 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
436 "inode tables overlaps gds", i);
437 return (EINVAL);
438 }
439 if (i_tables < first_block ||
440 i_tables + fs->e2fs_itpg - 1 > last_block) {
441 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
442 "inode tables not in group blk", i);
443 return (EINVAL);
444 }
445
446 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG))
447 first_block += EXT2_BLOCKS_PER_GROUP(fs);
448 }
449
450 return (0);
451 }
452
453 /*
454 * This computes the fields of the m_ext2fs structure from the
455 * data in the ext2fs structure read in.
456 */
457 static int
ext2_compute_sb_data(struct vnode * devvp,struct ext2fs * es,struct m_ext2fs * fs)458 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es,
459 struct m_ext2fs *fs)
460 {
461 struct buf *bp;
462 uint32_t e2fs_descpb, e2fs_gdbcount_alloc;
463 int i, j;
464 int g_count = 0;
465 int error;
466
467 /* Check checksum features */
468 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) &&
469 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
471 "incorrect checksum features combination");
472 return (EINVAL);
473 }
474
475 /* Precompute checksum seed for all metadata */
476 ext2_sb_csum_set_seed(fs);
477
478 /* Verify sb csum if possible */
479 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
480 error = ext2_sb_csum_verify(fs);
481 if (error) {
482 return (error);
483 }
484 }
485
486 /* Check for block size = 1K|2K|4K */
487 if (le32toh(es->e2fs_log_bsize) > 2) {
488 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
489 "bad block size");
490 return (EINVAL);
491 }
492
493 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize);
494 fs->e2fs_bsize = 1U << fs->e2fs_bshift;
495 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1;
496 fs->e2fs_qbmask = fs->e2fs_bsize - 1;
497
498 /* Check for fragment size */
499 if (le32toh(es->e2fs_log_fsize) >
500 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) {
501 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
502 "invalid log cluster size");
503 return (EINVAL);
504 }
505
506 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize);
507 if (fs->e2fs_fsize != fs->e2fs_bsize) {
508 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
509 "fragment size != block size");
510 return (EINVAL);
511 }
512
513 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
514
515 /* Check reserved gdt blocks for future filesystem expansion */
516 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) {
517 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
518 "number of reserved GDT blocks too large");
519 return (EINVAL);
520 }
521
522 if (le32toh(es->e2fs_rev) == E2FS_REV0) {
523 fs->e2fs_isize = E2FS_REV0_INODE_SIZE;
524 } else {
525 fs->e2fs_isize = le16toh(es->e2fs_inode_size);
526
527 /*
528 * Check first ino.
529 */
530 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) {
531 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
532 "invalid first ino");
533 return (EINVAL);
534 }
535
536 /*
537 * Simple sanity check for superblock inode size value.
538 */
539 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
540 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
541 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
542 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
543 "invalid inode size");
544 return (EINVAL);
545 }
546 }
547
548 /* Check group descriptors */
549 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) &&
550 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) {
551 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
552 "unsupported 64bit descriptor size");
553 return (EINVAL);
554 }
555
556 fs->e2fs_bpg = le32toh(es->e2fs_bpg);
557 fs->e2fs_fpg = le32toh(es->e2fs_fpg);
558 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) {
559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
560 "zero blocks/fragments per group");
561 return (EINVAL);
562 } else if (fs->e2fs_bpg != fs->e2fs_fpg) {
563 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
564 "blocks per group not equal fragments per group");
565 return (EINVAL);
566 }
567
568 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) {
569 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
570 "non-standard group size unsupported");
571 return (EINVAL);
572 }
573
574 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
575 if (fs->e2fs_ipb == 0 ||
576 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) {
577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
578 "bad inodes per block size");
579 return (EINVAL);
580 }
581
582 fs->e2fs_ipg = le32toh(es->e2fs_ipg);
583 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) {
584 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
585 "invalid inodes per group");
586 return (EINVAL);
587 }
588
589 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
590
591 fs->e2fs_bcount = le32toh(es->e2fs_bcount);
592 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount);
593 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount);
594 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
595 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32;
596 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32;
597 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32;
598 }
599 if (fs->e2fs_rbcount > fs->e2fs_bcount ||
600 fs->e2fs_fbcount > fs->e2fs_bcount) {
601 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
602 "invalid block count");
603 return (EINVAL);
604 }
605
606 fs->e2fs_ficount = le32toh(es->e2fs_ficount);
607 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) {
608 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
609 "invalid number of free inodes");
610 return (EINVAL);
611 }
612
613 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) ||
614 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) {
615 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
616 "first data block out of range");
617 return (EINVAL);
618 }
619
620 fs->e2fs_gcount = howmany(fs->e2fs_bcount -
621 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs));
622 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) {
623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
624 "groups count too large");
625 return (EINVAL);
626 }
627
628 /* Check for extra isize in big inodes. */
629 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
630 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
632 "no space for extra inode timestamps");
633 return (EINVAL);
634 }
635
636 /* s_resuid / s_resgid ? */
637
638 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
639 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE;
640 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb);
641 } else {
642 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
643 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount,
644 fs->e2fs_bsize / sizeof(struct ext2_gd));
645 }
646 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb);
647 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize,
648 M_EXT2MNT, M_WAITOK | M_ZERO);
649 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
650 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
651
652 for (i = 0; i < fs->e2fs_gdbcount; i++) {
653 error = bread(devvp,
654 fsbtodb(fs, ext2_cg_location(fs, i)),
655 fs->e2fs_bsize, NOCRED, &bp);
656 if (error) {
657 /*
658 * fs->e2fs_gd and fs->e2fs_contigdirs
659 * will be freed later by the caller,
660 * because this function could be called from
661 * MNT_UPDATE path.
662 */
663 return (error);
664 }
665 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
666 memcpy(&fs->e2fs_gd[
667 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
668 bp->b_data, fs->e2fs_bsize);
669 } else {
670 for (j = 0; j < e2fs_descpb &&
671 g_count < fs->e2fs_gcount; j++, g_count++)
672 memcpy(&fs->e2fs_gd[g_count],
673 bp->b_data + j * E2FS_REV0_GD_SIZE,
674 E2FS_REV0_GD_SIZE);
675 }
676 brelse(bp);
677 bp = NULL;
678 }
679
680 /* Validate cgs consistency */
681 error = ext2_cg_validate(fs);
682 if (error)
683 return (error);
684
685 /* Verfy cgs csum */
686 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
687 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
688 error = ext2_gd_csum_verify(fs, devvp->v_rdev);
689 if (error)
690 return (error);
691 }
692 /* Initialization for the ext2 Orlov allocator variant. */
693 fs->e2fs_total_dir = 0;
694 for (i = 0; i < fs->e2fs_gcount; i++)
695 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]);
696
697 if (le32toh(es->e2fs_rev) == E2FS_REV0 ||
698 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
699 fs->e2fs_maxfilesize = 0x7fffffff;
700 else {
701 fs->e2fs_maxfilesize = 0xffffffffffff;
702 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
703 fs->e2fs_maxfilesize = 0x7fffffffffffffff;
704 }
705 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) {
706 fs->e2fs_uhash = 3;
707 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) {
708 #ifdef __CHAR_UNSIGNED__
709 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH);
710 fs->e2fs_uhash = 3;
711 #else
712 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH);
713 #endif
714 }
715 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
716 error = ext2_sb_csum_verify(fs);
717
718 return (error);
719 }
720
721 /*
722 * Reload all incore data for a filesystem (used after running fsck on
723 * the root filesystem and finding things to fix). The filesystem must
724 * be mounted read-only.
725 *
726 * Things to do to update the mount:
727 * 1) invalidate all cached meta-data.
728 * 2) re-read superblock from disk.
729 * 3) invalidate all cluster summary information.
730 * 4) invalidate all inactive vnodes.
731 * 5) invalidate all cached file data.
732 * 6) re-read inode data for all active vnodes.
733 * XXX we are missing some steps, in particular # 3, this has to be reviewed.
734 */
735 static int
ext2_reload(struct mount * mp,struct thread * td)736 ext2_reload(struct mount *mp, struct thread *td)
737 {
738 struct vnode *vp, *mvp, *devvp;
739 struct inode *ip;
740 struct buf *bp;
741 struct ext2fs *es;
742 struct m_ext2fs *fs;
743 struct csum *sump;
744 int error, i;
745 int32_t *lp;
746
747 if ((mp->mnt_flag & MNT_RDONLY) == 0)
748 return (EINVAL);
749 /*
750 * Step 1: invalidate all cached meta-data.
751 */
752 devvp = VFSTOEXT2(mp)->um_devvp;
753 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
754 if (vinvalbuf(devvp, 0, 0, 0) != 0)
755 panic("ext2_reload: dirty1");
756 VOP_UNLOCK(devvp);
757
758 /*
759 * Step 2: re-read superblock from disk.
760 * constants have been adjusted for ext2
761 */
762 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0)
763 return (error);
764 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET);
765 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
766 brelse(bp);
767 return (EIO); /* XXX needs translation */
768 }
769 fs = VFSTOEXT2(mp)->um_e2fs;
770 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
771
772 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) {
773 brelse(bp);
774 return (error);
775 }
776
777 brelse(bp);
778
779 /*
780 * Step 3: invalidate all cluster summary information.
781 */
782 if (fs->e2fs_contigsumsize > 0) {
783 lp = fs->e2fs_maxcluster;
784 sump = fs->e2fs_clustersum;
785 for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
786 *lp++ = fs->e2fs_contigsumsize;
787 sump->cs_init = 0;
788 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
789 }
790 }
791
792 loop:
793 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
794 /*
795 * Step 4: invalidate all cached file data.
796 */
797 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
798 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
799 goto loop;
800 }
801 if (vinvalbuf(vp, 0, 0, 0))
802 panic("ext2_reload: dirty2");
803
804 /*
805 * Step 5: re-read inode data for all active vnodes.
806 */
807 ip = VTOI(vp);
808 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
809 (int)fs->e2fs_bsize, NOCRED, &bp);
810 if (error) {
811 vput(vp);
812 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
813 return (error);
814 }
815
816 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
817 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
818
819 brelse(bp);
820 vput(vp);
821
822 if (error) {
823 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
824 return (error);
825 }
826 }
827 return (0);
828 }
829
830 /*
831 * Common code for mount and mountroot.
832 */
833 static int
ext2_mountfs(struct vnode * devvp,struct mount * mp)834 ext2_mountfs(struct vnode *devvp, struct mount *mp)
835 {
836 struct ext2mount *ump;
837 struct buf *bp;
838 struct m_ext2fs *fs;
839 struct ext2fs *es;
840 struct cdev *dev = devvp->v_rdev;
841 struct g_consumer *cp;
842 struct bufobj *bo;
843 struct csum *sump;
844 int error;
845 int ronly;
846 int i;
847 u_long size;
848 int32_t *lp;
849 int32_t e2fs_maxcontig;
850
851 bp = NULL;
852 ump = NULL;
853
854 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
855 /* XXX: use VOP_ACESS to check FS perms */
856 g_topology_lock();
857 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
858 g_topology_unlock();
859 VOP_UNLOCK(devvp);
860 if (error)
861 return (error);
862
863 if (PAGE_SIZE != SBLOCKBLKSIZE) {
864 printf("WARNING: Unsupported page size %d\n", PAGE_SIZE);
865 error = EINVAL;
866 goto out;
867 }
868 if (cp->provider->sectorsize > PAGE_SIZE) {
869 printf("WARNING: Device sectorsize(%d) is more than %d\n",
870 cp->provider->sectorsize, PAGE_SIZE);
871 error = EINVAL;
872 goto out;
873 }
874
875 bo = &devvp->v_bufobj;
876 bo->bo_private = cp;
877 bo->bo_ops = g_vfs_bufops;
878 if (devvp->v_rdev->si_iosize_max != 0)
879 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
880 if (mp->mnt_iosize_max > maxphys)
881 mp->mnt_iosize_max = maxphys;
882 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0)
883 goto out;
884 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET);
885 if (ext2_check_sb_compat(es, dev, ronly) != 0) {
886 error = EINVAL; /* XXX needs translation */
887 goto out;
888 }
889 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 ||
890 (le16toh(es->e2fs_state) & E2FS_ERRORS)) {
891 if (ronly || (mp->mnt_flag & MNT_FORCE)) {
892 printf(
893 "WARNING: Filesystem was not properly dismounted\n");
894 } else {
895 printf(
896 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n");
897 error = EPERM;
898 goto out;
899 }
900 }
901 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
902
903 /*
904 * I don't know whether this is the right strategy. Note that
905 * we dynamically allocate both an m_ext2fs and an ext2fs
906 * while Linux keeps the super block in a locked buffer.
907 */
908 ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
909 M_EXT2MNT, M_WAITOK | M_ZERO);
910 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
911 M_EXT2MNT, M_WAITOK);
912 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
913 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
914 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
915 goto out;
916
917 /*
918 * Calculate the maximum contiguous blocks and size of cluster summary
919 * array. In FFS this is done by newfs; however, the superblock
920 * in ext2fs doesn't have these variables, so we can calculate
921 * them here.
922 */
923 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize);
924 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
925 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN;
926 if (ump->um_e2fs->e2fs_contigsumsize > 0) {
927 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
928 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
929 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
930 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
931 lp = ump->um_e2fs->e2fs_maxcluster;
932 sump = ump->um_e2fs->e2fs_clustersum;
933 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
934 *lp++ = ump->um_e2fs->e2fs_contigsumsize;
935 sump->cs_init = 0;
936 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
937 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
938 }
939 }
940
941 brelse(bp);
942 bp = NULL;
943 fs = ump->um_e2fs;
944 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */
945
946 /*
947 * If the fs is not mounted read-only, make sure the super block is
948 * always written back on a sync().
949 */
950 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0;
951 if (ronly == 0) {
952 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */
953 fs->e2fs->e2fs_state =
954 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
955 }
956 mp->mnt_data = ump;
957 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
958 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
959 MNT_ILOCK(mp);
960 mp->mnt_flag |= MNT_LOCAL;
961 MNT_IUNLOCK(mp);
962 ump->um_mountp = mp;
963 ump->um_dev = dev;
964 ump->um_devvp = devvp;
965 ump->um_bo = &devvp->v_bufobj;
966 ump->um_cp = cp;
967
968 /*
969 * Setting those two parameters allowed us to use
970 * ufs_bmap w/o changse!
971 */
972 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
973 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1;
974 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
975 if (ronly == 0)
976 ext2_sbupdate(ump, MNT_WAIT);
977 /*
978 * Initialize filesystem stat information in mount struct.
979 */
980 MNT_ILOCK(mp);
981 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
982 MNTK_USES_BCACHE;
983 MNT_IUNLOCK(mp);
984 return (0);
985 out:
986 if (bp)
987 brelse(bp);
988 if (cp != NULL) {
989 g_topology_lock();
990 g_vfs_close(cp);
991 g_topology_unlock();
992 }
993 if (ump) {
994 mtx_destroy(EXT2_MTX(ump));
995 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
996 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
997 free(ump->um_e2fs->e2fs, M_EXT2MNT);
998 free(ump->um_e2fs, M_EXT2MNT);
999 free(ump, M_EXT2MNT);
1000 mp->mnt_data = NULL;
1001 }
1002 return (error);
1003 }
1004
1005 /*
1006 * Unmount system call.
1007 */
1008 static int
ext2_unmount(struct mount * mp,int mntflags)1009 ext2_unmount(struct mount *mp, int mntflags)
1010 {
1011 struct ext2mount *ump;
1012 struct m_ext2fs *fs;
1013 struct csum *sump;
1014 int error, flags, i, ronly;
1015
1016 flags = 0;
1017 if (mntflags & MNT_FORCE) {
1018 if (mp->mnt_flag & MNT_ROOTFS)
1019 return (EINVAL);
1020 flags |= FORCECLOSE;
1021 }
1022 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
1023 return (error);
1024 ump = VFSTOEXT2(mp);
1025 fs = ump->um_e2fs;
1026 ronly = fs->e2fs_ronly;
1027 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
1028 if (fs->e2fs_wasvalid)
1029 fs->e2fs->e2fs_state =
1030 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN);
1031 ext2_sbupdate(ump, MNT_WAIT);
1032 }
1033
1034 g_topology_lock();
1035 g_vfs_close(ump->um_cp);
1036 g_topology_unlock();
1037 vrele(ump->um_devvp);
1038 sump = fs->e2fs_clustersum;
1039 for (i = 0; i < fs->e2fs_gcount; i++, sump++)
1040 free(sump->cs_sum, M_EXT2MNT);
1041 free(fs->e2fs_clustersum, M_EXT2MNT);
1042 free(fs->e2fs_maxcluster, M_EXT2MNT);
1043 free(fs->e2fs_gd, M_EXT2MNT);
1044 free(fs->e2fs_contigdirs, M_EXT2MNT);
1045 free(fs->e2fs, M_EXT2MNT);
1046 free(fs, M_EXT2MNT);
1047 free(ump, M_EXT2MNT);
1048 mp->mnt_data = NULL;
1049 return (error);
1050 }
1051
1052 /*
1053 * Flush out all the files in a filesystem.
1054 */
1055 static int
ext2_flushfiles(struct mount * mp,int flags,struct thread * td)1056 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
1057 {
1058 int error;
1059
1060 error = vflush(mp, 0, flags, td);
1061 return (error);
1062 }
1063
1064 /*
1065 * Get filesystem statistics.
1066 */
1067 int
ext2_statfs(struct mount * mp,struct statfs * sbp)1068 ext2_statfs(struct mount *mp, struct statfs *sbp)
1069 {
1070 struct ext2mount *ump;
1071 struct m_ext2fs *fs;
1072 uint32_t overhead, overhead_per_group, ngdb;
1073 int i, ngroups;
1074
1075 ump = VFSTOEXT2(mp);
1076 fs = ump->um_e2fs;
1077 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC)
1078 panic("ext2_statfs");
1079
1080 /*
1081 * Compute the overhead (FS structures)
1082 */
1083 overhead_per_group =
1084 1 /* block bitmap */ +
1085 1 /* inode bitmap */ +
1086 fs->e2fs_itpg;
1087 overhead = le32toh(fs->e2fs->e2fs_first_dblock) +
1088 fs->e2fs_gcount * overhead_per_group;
1089 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1090 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) {
1091 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
1092 if (ext2_cg_has_sb(fs, i))
1093 ngroups++;
1094 }
1095 } else {
1096 ngroups = fs->e2fs_gcount;
1097 }
1098 ngdb = fs->e2fs_gdbcount;
1099 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1100 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE)
1101 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb);
1102 overhead += ngroups * (1 /* superblock */ + ngdb);
1103
1104 sbp->f_bsize = EXT2_FRAG_SIZE(fs);
1105 sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
1106 sbp->f_blocks = fs->e2fs_bcount - overhead;
1107 sbp->f_bfree = fs->e2fs_fbcount;
1108 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount;
1109 sbp->f_files = le32toh(fs->e2fs->e2fs_icount);
1110 sbp->f_ffree = fs->e2fs_ficount;
1111 return (0);
1112 }
1113
1114 /*
1115 * Go through the disk queues to initiate sandbagged IO;
1116 * go through the inodes to write those that have been modified;
1117 * initiate the writing of the super block if it has been modified.
1118 *
1119 * Note: we are always called with the filesystem marked `MPBUSY'.
1120 */
1121 static int
ext2_sync(struct mount * mp,int waitfor)1122 ext2_sync(struct mount *mp, int waitfor)
1123 {
1124 struct vnode *mvp, *vp;
1125 struct thread *td;
1126 struct inode *ip;
1127 struct ext2mount *ump = VFSTOEXT2(mp);
1128 struct m_ext2fs *fs;
1129 int error, allerror = 0;
1130
1131 td = curthread;
1132 fs = ump->um_e2fs;
1133 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */
1134 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt);
1135 }
1136
1137 /*
1138 * Write back each (modified) inode.
1139 */
1140 loop:
1141 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1142 if (vp->v_type == VNON) {
1143 VI_UNLOCK(vp);
1144 continue;
1145 }
1146 ip = VTOI(vp);
1147 if ((ip->i_flag &
1148 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1149 (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1150 waitfor == MNT_LAZY)) {
1151 VI_UNLOCK(vp);
1152 continue;
1153 }
1154 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1155 if (error) {
1156 if (error == ENOENT) {
1157 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1158 goto loop;
1159 }
1160 continue;
1161 }
1162 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
1163 allerror = error;
1164 vput(vp);
1165 }
1166
1167 /*
1168 * Force stale filesystem control information to be flushed.
1169 */
1170 if (waitfor != MNT_LAZY) {
1171 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1172 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
1173 allerror = error;
1174 VOP_UNLOCK(ump->um_devvp);
1175 }
1176
1177 /*
1178 * Write back modified superblock.
1179 */
1180 if (fs->e2fs_fmod != 0) {
1181 fs->e2fs_fmod = 0;
1182 fs->e2fs->e2fs_wtime = htole32(time_second);
1183 if ((error = ext2_cgupdate(ump, waitfor)) != 0)
1184 allerror = error;
1185 }
1186 return (allerror);
1187 }
1188
1189 /*
1190 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
1191 * in from disk. If it is in core, wait for the lock bit to clear, then
1192 * return the inode locked. Detection and handling of mount points must be
1193 * done by the calling routine.
1194 */
1195 static int
ext2_vget(struct mount * mp,ino_t ino,int flags,struct vnode ** vpp)1196 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1197 {
1198 struct m_ext2fs *fs;
1199 struct inode *ip;
1200 struct ext2mount *ump;
1201 struct buf *bp;
1202 struct vnode *vp;
1203 struct thread *td;
1204 unsigned int i, used_blocks;
1205 int error;
1206
1207 td = curthread;
1208 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
1209 if (error || *vpp != NULL)
1210 return (error);
1211
1212 ump = VFSTOEXT2(mp);
1213 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
1214
1215 /* Allocate a new vnode/inode. */
1216 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
1217 *vpp = NULL;
1218 free(ip, M_EXT2NODE);
1219 return (error);
1220 }
1221 vp->v_data = ip;
1222 ip->i_vnode = vp;
1223 ip->i_e2fs = fs = ump->um_e2fs;
1224 ip->i_ump = ump;
1225 ip->i_number = ino;
1226 cluster_init_vn(&ip->i_clusterw);
1227
1228 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1229 error = insmntque(vp, mp);
1230 if (error != 0) {
1231 free(ip, M_EXT2NODE);
1232 *vpp = NULL;
1233 return (error);
1234 }
1235 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
1236 if (error || *vpp != NULL)
1237 return (error);
1238
1239 /* Read in the disk contents for the inode, copy into the inode. */
1240 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1241 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
1242 /*
1243 * The inode does not contain anything useful, so it would
1244 * be misleading to leave it on its hash chain. With mode
1245 * still zero, it will be unlinked and returned to the free
1246 * list by vput().
1247 */
1248 brelse(bp);
1249 vput(vp);
1250 *vpp = NULL;
1251 return (error);
1252 }
1253 /* convert ext2 inode to dinode */
1254 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
1255 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip);
1256 if (error) {
1257 brelse(bp);
1258 vput(vp);
1259 *vpp = NULL;
1260 return (error);
1261 }
1262 ip->i_block_group = ino_to_cg(fs, ino);
1263 ip->i_next_alloc_block = 0;
1264 ip->i_next_alloc_goal = 0;
1265
1266 /*
1267 * Now we want to make sure that block pointers for unused
1268 * blocks are zeroed out - ext2_balloc depends on this
1269 * although for regular files and directories only
1270 *
1271 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
1272 * out because we could corrupt the extent tree.
1273 */
1274 if (!(ip->i_flag & IN_E4EXTENTS) &&
1275 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
1276 used_blocks = howmany(ip->i_size, fs->e2fs_bsize);
1277 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1278 ip->i_db[i] = 0;
1279 }
1280
1281 bqrelse(bp);
1282
1283 #ifdef EXT2FS_PRINT_EXTENTS
1284 ext2_print_inode(ip);
1285 error = ext4_ext_walk(ip);
1286 if (error) {
1287 vput(vp);
1288 *vpp = NULL;
1289 return (error);
1290 }
1291 #endif
1292
1293 /*
1294 * Initialize the vnode from the inode, check for aliases.
1295 * Note that the underlying vnode may have changed.
1296 */
1297 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
1298 vput(vp);
1299 *vpp = NULL;
1300 return (error);
1301 }
1302
1303 /*
1304 * Finish inode initialization.
1305 */
1306
1307 vn_set_state(vp, VSTATE_CONSTRUCTED);
1308 *vpp = vp;
1309 return (0);
1310 }
1311
1312 /*
1313 * File handle to vnode
1314 *
1315 * Have to be really careful about stale file handles:
1316 * - check that the inode number is valid
1317 * - call ext2_vget() to get the locked inode
1318 * - check for an unallocated inode (i_mode == 0)
1319 * - check that the given client host has export rights and return
1320 * those rights via. exflagsp and credanonp
1321 */
1322 static int
ext2_fhtovp(struct mount * mp,struct fid * fhp,int flags,struct vnode ** vpp)1323 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1324 {
1325 struct inode *ip;
1326 struct ufid *ufhp;
1327 struct vnode *nvp;
1328 struct m_ext2fs *fs;
1329 int error;
1330
1331 ufhp = (struct ufid *)fhp;
1332 fs = VFSTOEXT2(mp)->um_e2fs;
1333 if (ufhp->ufid_ino < EXT2_ROOTINO ||
1334 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg)
1335 return (ESTALE);
1336
1337 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1338 if (error) {
1339 *vpp = NULLVP;
1340 return (error);
1341 }
1342 ip = VTOI(nvp);
1343 if (ip->i_mode == 0 ||
1344 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1345 vput(nvp);
1346 *vpp = NULLVP;
1347 return (ESTALE);
1348 }
1349 *vpp = nvp;
1350 vnode_create_vobject(*vpp, 0, curthread);
1351 return (0);
1352 }
1353
1354 /*
1355 * Write a superblock and associated information back to disk.
1356 */
1357 static int
ext2_sbupdate(struct ext2mount * mp,int waitfor)1358 ext2_sbupdate(struct ext2mount *mp, int waitfor)
1359 {
1360 struct m_ext2fs *fs = mp->um_e2fs;
1361 struct ext2fs *es = fs->e2fs;
1362 struct buf *bp;
1363 int error = 0;
1364
1365 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff);
1366 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff);
1367 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff);
1368 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1369 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32);
1370 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32);
1371 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32);
1372 }
1373
1374 es->e2fs_ficount = htole32(fs->e2fs_ficount);
1375
1376 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1377 ext2_sb_csum_set(fs);
1378
1379 error = bread(mp->um_devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp);
1380 if (error != 0)
1381 return (error);
1382
1383 memcpy((char *)bp->b_data + SBLOCKOFFSET, (caddr_t)es,
1384 (u_int)sizeof(struct ext2fs));
1385 if (waitfor == MNT_WAIT)
1386 error = bwrite(bp);
1387 else
1388 bawrite(bp);
1389
1390 /*
1391 * The buffers for group descriptors, inode bitmaps and block bitmaps
1392 * are not busy at this point and are (hopefully) written by the
1393 * usual sync mechanism. No need to write them here.
1394 */
1395 return (error);
1396 }
1397 int
ext2_cgupdate(struct ext2mount * mp,int waitfor)1398 ext2_cgupdate(struct ext2mount *mp, int waitfor)
1399 {
1400 struct m_ext2fs *fs = mp->um_e2fs;
1401 struct buf *bp;
1402 int i, j, g_count = 0, error = 0, allerror = 0;
1403
1404 allerror = ext2_sbupdate(mp, waitfor);
1405
1406 /* Update gd csums */
1407 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1408 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1409 ext2_gd_csum_set(fs);
1410
1411 for (i = 0; i < fs->e2fs_gdbcount; i++) {
1412 bp = getblk(mp->um_devvp, fsbtodb(fs,
1413 ext2_cg_location(fs, i)),
1414 fs->e2fs_bsize, 0, 0, 0);
1415 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1416 memcpy(bp->b_data, &fs->e2fs_gd[
1417 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
1418 fs->e2fs_bsize);
1419 } else {
1420 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE &&
1421 g_count < fs->e2fs_gcount; j++, g_count++)
1422 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE,
1423 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE);
1424 }
1425 if (waitfor == MNT_WAIT)
1426 error = bwrite(bp);
1427 else
1428 bawrite(bp);
1429 }
1430
1431 if (!allerror && error)
1432 allerror = error;
1433 return (allerror);
1434 }
1435
1436 /*
1437 * Return the root of a filesystem.
1438 */
1439 static int
ext2_root(struct mount * mp,int flags,struct vnode ** vpp)1440 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
1441 {
1442 struct vnode *nvp;
1443 int error;
1444
1445 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
1446 if (error)
1447 return (error);
1448 *vpp = nvp;
1449 return (0);
1450 }
1451