10dca4462SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0-only
20dca4462SChristoph Hellwig /*
30dca4462SChristoph Hellwig * Copyright (C) 1991, 1992 Linus Torvalds
40dca4462SChristoph Hellwig * Copyright (C) 2001 Andrea Arcangeli <[email protected]> SuSE
50dca4462SChristoph Hellwig * Copyright (C) 2016 - 2020 Christoph Hellwig
60dca4462SChristoph Hellwig */
70dca4462SChristoph Hellwig
80dca4462SChristoph Hellwig #include <linux/init.h>
90dca4462SChristoph Hellwig #include <linux/mm.h>
100dca4462SChristoph Hellwig #include <linux/slab.h>
110dca4462SChristoph Hellwig #include <linux/kmod.h>
120dca4462SChristoph Hellwig #include <linux/major.h>
130dca4462SChristoph Hellwig #include <linux/device_cgroup.h>
140dca4462SChristoph Hellwig #include <linux/blkdev.h>
15fe45e630SChristoph Hellwig #include <linux/blk-integrity.h>
160dca4462SChristoph Hellwig #include <linux/backing-dev.h>
170dca4462SChristoph Hellwig #include <linux/module.h>
180dca4462SChristoph Hellwig #include <linux/blkpg.h>
190dca4462SChristoph Hellwig #include <linux/magic.h>
200dca4462SChristoph Hellwig #include <linux/buffer_head.h>
210dca4462SChristoph Hellwig #include <linux/swap.h>
220dca4462SChristoph Hellwig #include <linux/writeback.h>
230dca4462SChristoph Hellwig #include <linux/mount.h>
240dca4462SChristoph Hellwig #include <linux/pseudo_fs.h>
250dca4462SChristoph Hellwig #include <linux/uio.h>
260dca4462SChristoph Hellwig #include <linux/namei.h>
27b55d26bdSDeven Bowers #include <linux/security.h>
280dca4462SChristoph Hellwig #include <linux/part_stat.h>
290dca4462SChristoph Hellwig #include <linux/uaccess.h>
302d985f8cSEric Biggers #include <linux/stat.h>
310dca4462SChristoph Hellwig #include "../fs/internal.h"
320dca4462SChristoph Hellwig #include "blk.h"
330dca4462SChristoph Hellwig
34ed5cc702SJan Kara /* Should we allow writing to mounted block devices? */
35ed5cc702SJan Kara static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
36ed5cc702SJan Kara
370dca4462SChristoph Hellwig struct bdev_inode {
380dca4462SChristoph Hellwig struct block_device bdev;
390dca4462SChristoph Hellwig struct inode vfs_inode;
400dca4462SChristoph Hellwig };
410dca4462SChristoph Hellwig
BDEV_I(struct inode * inode)420dca4462SChristoph Hellwig static inline struct bdev_inode *BDEV_I(struct inode *inode)
430dca4462SChristoph Hellwig {
440dca4462SChristoph Hellwig return container_of(inode, struct bdev_inode, vfs_inode);
450dca4462SChristoph Hellwig }
460dca4462SChristoph Hellwig
BD_INODE(struct block_device * bdev)47df65f166SAl Viro static inline struct inode *BD_INODE(struct block_device *bdev)
48df65f166SAl Viro {
49df65f166SAl Viro return &container_of(bdev, struct bdev_inode, bdev)->vfs_inode;
50df65f166SAl Viro }
51df65f166SAl Viro
I_BDEV(struct inode * inode)520dca4462SChristoph Hellwig struct block_device *I_BDEV(struct inode *inode)
530dca4462SChristoph Hellwig {
540dca4462SChristoph Hellwig return &BDEV_I(inode)->bdev;
550dca4462SChristoph Hellwig }
560dca4462SChristoph Hellwig EXPORT_SYMBOL(I_BDEV);
570dca4462SChristoph Hellwig
file_bdev(struct file * bdev_file)58f3a60882SChristian Brauner struct block_device *file_bdev(struct file *bdev_file)
59f3a60882SChristian Brauner {
607c09a4edSChristian Brauner return I_BDEV(bdev_file->f_mapping->host);
61f3a60882SChristian Brauner }
62f3a60882SChristian Brauner EXPORT_SYMBOL(file_bdev);
63f3a60882SChristian Brauner
bdev_write_inode(struct block_device * bdev)640dca4462SChristoph Hellwig static void bdev_write_inode(struct block_device *bdev)
650dca4462SChristoph Hellwig {
66df65f166SAl Viro struct inode *inode = BD_INODE(bdev);
670dca4462SChristoph Hellwig int ret;
680dca4462SChristoph Hellwig
690dca4462SChristoph Hellwig spin_lock(&inode->i_lock);
700dca4462SChristoph Hellwig while (inode->i_state & I_DIRTY) {
710dca4462SChristoph Hellwig spin_unlock(&inode->i_lock);
720dca4462SChristoph Hellwig ret = write_inode_now(inode, true);
735bf83e9aSChristoph Hellwig if (ret)
745bf83e9aSChristoph Hellwig pr_warn_ratelimited(
755bf83e9aSChristoph Hellwig "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n",
765bf83e9aSChristoph Hellwig bdev, ret);
770dca4462SChristoph Hellwig spin_lock(&inode->i_lock);
780dca4462SChristoph Hellwig }
790dca4462SChristoph Hellwig spin_unlock(&inode->i_lock);
800dca4462SChristoph Hellwig }
810dca4462SChristoph Hellwig
820dca4462SChristoph Hellwig /* Kill _all_ buffers and pagecache , dirty or not.. */
kill_bdev(struct block_device * bdev)830dca4462SChristoph Hellwig static void kill_bdev(struct block_device *bdev)
840dca4462SChristoph Hellwig {
85224941e8SAl Viro struct address_space *mapping = bdev->bd_mapping;
860dca4462SChristoph Hellwig
870dca4462SChristoph Hellwig if (mapping_empty(mapping))
880dca4462SChristoph Hellwig return;
890dca4462SChristoph Hellwig
900dca4462SChristoph Hellwig invalidate_bh_lrus();
910dca4462SChristoph Hellwig truncate_inode_pages(mapping, 0);
920dca4462SChristoph Hellwig }
930dca4462SChristoph Hellwig
940dca4462SChristoph Hellwig /* Invalidate clean unused buffers and pagecache. */
invalidate_bdev(struct block_device * bdev)950dca4462SChristoph Hellwig void invalidate_bdev(struct block_device *bdev)
960dca4462SChristoph Hellwig {
97224941e8SAl Viro struct address_space *mapping = bdev->bd_mapping;
980dca4462SChristoph Hellwig
990dca4462SChristoph Hellwig if (mapping->nrpages) {
1000dca4462SChristoph Hellwig invalidate_bh_lrus();
1010dca4462SChristoph Hellwig lru_add_drain_all(); /* make sure all lru add caches are flushed */
1020dca4462SChristoph Hellwig invalidate_mapping_pages(mapping, 0, -1);
1030dca4462SChristoph Hellwig }
1040dca4462SChristoph Hellwig }
1050dca4462SChristoph Hellwig EXPORT_SYMBOL(invalidate_bdev);
1060dca4462SChristoph Hellwig
1070dca4462SChristoph Hellwig /*
1080dca4462SChristoph Hellwig * Drop all buffers & page cache for given bdev range. This function bails
1090dca4462SChristoph Hellwig * with error if bdev has other exclusive owner (such as filesystem).
1100dca4462SChristoph Hellwig */
truncate_bdev_range(struct block_device * bdev,blk_mode_t mode,loff_t lstart,loff_t lend)11105bdb996SChristoph Hellwig int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
1120dca4462SChristoph Hellwig loff_t lstart, loff_t lend)
1130dca4462SChristoph Hellwig {
1140dca4462SChristoph Hellwig /*
1150dca4462SChristoph Hellwig * If we don't hold exclusive handle for the device, upgrade to it
1160dca4462SChristoph Hellwig * while we discard the buffer cache to avoid discarding buffers
1170dca4462SChristoph Hellwig * under live filesystem.
1180dca4462SChristoph Hellwig */
11905bdb996SChristoph Hellwig if (!(mode & BLK_OPEN_EXCL)) {
1200718afd4SChristoph Hellwig int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
1210dca4462SChristoph Hellwig if (err)
1220dca4462SChristoph Hellwig goto invalidate;
1230dca4462SChristoph Hellwig }
1240dca4462SChristoph Hellwig
125224941e8SAl Viro truncate_inode_pages_range(bdev->bd_mapping, lstart, lend);
12605bdb996SChristoph Hellwig if (!(mode & BLK_OPEN_EXCL))
1270dca4462SChristoph Hellwig bd_abort_claiming(bdev, truncate_bdev_range);
1280dca4462SChristoph Hellwig return 0;
1290dca4462SChristoph Hellwig
1300dca4462SChristoph Hellwig invalidate:
1310dca4462SChristoph Hellwig /*
1320dca4462SChristoph Hellwig * Someone else has handle exclusively open. Try invalidating instead.
1330dca4462SChristoph Hellwig * The 'end' argument is inclusive so the rounding is safe.
1340dca4462SChristoph Hellwig */
135224941e8SAl Viro return invalidate_inode_pages2_range(bdev->bd_mapping,
1360dca4462SChristoph Hellwig lstart >> PAGE_SHIFT,
1370dca4462SChristoph Hellwig lend >> PAGE_SHIFT);
1380dca4462SChristoph Hellwig }
1390dca4462SChristoph Hellwig
set_init_blocksize(struct block_device * bdev)1400dca4462SChristoph Hellwig static void set_init_blocksize(struct block_device *bdev)
1410dca4462SChristoph Hellwig {
1420dca4462SChristoph Hellwig unsigned int bsize = bdev_logical_block_size(bdev);
143df65f166SAl Viro loff_t size = i_size_read(BD_INODE(bdev));
1440dca4462SChristoph Hellwig
1450dca4462SChristoph Hellwig while (bsize < PAGE_SIZE) {
1460dca4462SChristoph Hellwig if (size & bsize)
1470dca4462SChristoph Hellwig break;
1480dca4462SChristoph Hellwig bsize <<= 1;
1490dca4462SChristoph Hellwig }
150df65f166SAl Viro BD_INODE(bdev)->i_blkbits = blksize_bits(bsize);
1513c209171SHannes Reinecke mapping_set_folio_min_order(BD_INODE(bdev)->i_mapping,
1523c209171SHannes Reinecke get_order(bsize));
1530dca4462SChristoph Hellwig }
1540dca4462SChristoph Hellwig
155e03463d2SDarrick J. Wong /**
156e03463d2SDarrick J. Wong * bdev_validate_blocksize - check that this block size is acceptable
157e03463d2SDarrick J. Wong * @bdev: blockdevice to check
158e03463d2SDarrick J. Wong * @block_size: block size to check
159e03463d2SDarrick J. Wong *
160e03463d2SDarrick J. Wong * For block device users that do not use buffer heads or the block device
161e03463d2SDarrick J. Wong * page cache, make sure that this block size can be used with the device.
162e03463d2SDarrick J. Wong *
163e03463d2SDarrick J. Wong * Return: On success zero is returned, negative error code on failure.
164e03463d2SDarrick J. Wong */
bdev_validate_blocksize(struct block_device * bdev,int block_size)165e03463d2SDarrick J. Wong int bdev_validate_blocksize(struct block_device *bdev, int block_size)
166e03463d2SDarrick J. Wong {
167e03463d2SDarrick J. Wong if (blk_validate_block_size(block_size))
168e03463d2SDarrick J. Wong return -EINVAL;
169e03463d2SDarrick J. Wong
170e03463d2SDarrick J. Wong /* Size cannot be smaller than the size supported by the device */
171e03463d2SDarrick J. Wong if (block_size < bdev_logical_block_size(bdev))
172e03463d2SDarrick J. Wong return -EINVAL;
173e03463d2SDarrick J. Wong
174e03463d2SDarrick J. Wong return 0;
175e03463d2SDarrick J. Wong }
176e03463d2SDarrick J. Wong EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
177e03463d2SDarrick J. Wong
set_blocksize(struct file * file,int size)178ead083aeSAl Viro int set_blocksize(struct file *file, int size)
1790dca4462SChristoph Hellwig {
180ead083aeSAl Viro struct inode *inode = file->f_mapping->host;
181ead083aeSAl Viro struct block_device *bdev = I_BDEV(inode);
182e03463d2SDarrick J. Wong int ret;
183ead083aeSAl Viro
184e03463d2SDarrick J. Wong ret = bdev_validate_blocksize(bdev, size);
185e03463d2SDarrick J. Wong if (ret)
186e03463d2SDarrick J. Wong return ret;
1870dca4462SChristoph Hellwig
188d18a8679SAl Viro if (!file->private_data)
189d18a8679SAl Viro return -EINVAL;
190d18a8679SAl Viro
1910dca4462SChristoph Hellwig /* Don't change the size if it is same as current */
192ead083aeSAl Viro if (inode->i_blkbits != blksize_bits(size)) {
193c0e473a0SDarrick J. Wong /*
194c0e473a0SDarrick J. Wong * Flush and truncate the pagecache before we reconfigure the
195c0e473a0SDarrick J. Wong * mapping geometry because folio sizes are variable now. If a
196c0e473a0SDarrick J. Wong * reader has already allocated a folio whose size is smaller
197c0e473a0SDarrick J. Wong * than the new min_order but invokes readahead after the new
198c0e473a0SDarrick J. Wong * min_order becomes visible, readahead will think there are
199c0e473a0SDarrick J. Wong * "zero" blocks per folio and crash. Take the inode and
200c0e473a0SDarrick J. Wong * invalidation locks to avoid racing with
201c0e473a0SDarrick J. Wong * read/write/fallocate.
202c0e473a0SDarrick J. Wong */
203c0e473a0SDarrick J. Wong inode_lock(inode);
204c0e473a0SDarrick J. Wong filemap_invalidate_lock(inode->i_mapping);
205c0e473a0SDarrick J. Wong
2060dca4462SChristoph Hellwig sync_blockdev(bdev);
207c0e473a0SDarrick J. Wong kill_bdev(bdev);
208c0e473a0SDarrick J. Wong
209ead083aeSAl Viro inode->i_blkbits = blksize_bits(size);
2103c209171SHannes Reinecke mapping_set_folio_min_order(inode->i_mapping, get_order(size));
2110dca4462SChristoph Hellwig kill_bdev(bdev);
212c0e473a0SDarrick J. Wong filemap_invalidate_unlock(inode->i_mapping);
213c0e473a0SDarrick J. Wong inode_unlock(inode);
2140dca4462SChristoph Hellwig }
2150dca4462SChristoph Hellwig return 0;
2160dca4462SChristoph Hellwig }
2170dca4462SChristoph Hellwig
2180dca4462SChristoph Hellwig EXPORT_SYMBOL(set_blocksize);
2190dca4462SChristoph Hellwig
sb_set_blocksize(struct super_block * sb,int size)2200dca4462SChristoph Hellwig int sb_set_blocksize(struct super_block *sb, int size)
2210dca4462SChristoph Hellwig {
222a64e5a59SLuis Chamberlain if (!(sb->s_type->fs_flags & FS_LBS) && size > PAGE_SIZE)
223a64e5a59SLuis Chamberlain return 0;
224ead083aeSAl Viro if (set_blocksize(sb->s_bdev_file, size))
2250dca4462SChristoph Hellwig return 0;
22647dd6753SLuis Chamberlain /* If we get here, we know size is validated */
2270dca4462SChristoph Hellwig sb->s_blocksize = size;
2280dca4462SChristoph Hellwig sb->s_blocksize_bits = blksize_bits(size);
2290dca4462SChristoph Hellwig return sb->s_blocksize;
2300dca4462SChristoph Hellwig }
2310dca4462SChristoph Hellwig
2320dca4462SChristoph Hellwig EXPORT_SYMBOL(sb_set_blocksize);
2330dca4462SChristoph Hellwig
sb_min_blocksize(struct super_block * sb,int size)2340dca4462SChristoph Hellwig int sb_min_blocksize(struct super_block *sb, int size)
2350dca4462SChristoph Hellwig {
2360dca4462SChristoph Hellwig int minsize = bdev_logical_block_size(sb->s_bdev);
2370dca4462SChristoph Hellwig if (size < minsize)
2380dca4462SChristoph Hellwig size = minsize;
2390dca4462SChristoph Hellwig return sb_set_blocksize(sb, size);
2400dca4462SChristoph Hellwig }
2410dca4462SChristoph Hellwig
2420dca4462SChristoph Hellwig EXPORT_SYMBOL(sb_min_blocksize);
2430dca4462SChristoph Hellwig
sync_blockdev_nowait(struct block_device * bdev)24470164eb6SChristoph Hellwig int sync_blockdev_nowait(struct block_device *bdev)
2450dca4462SChristoph Hellwig {
2460dca4462SChristoph Hellwig if (!bdev)
2470dca4462SChristoph Hellwig return 0;
248224941e8SAl Viro return filemap_flush(bdev->bd_mapping);
2490dca4462SChristoph Hellwig }
25070164eb6SChristoph Hellwig EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
2510dca4462SChristoph Hellwig
2520dca4462SChristoph Hellwig /*
2530dca4462SChristoph Hellwig * Write out and wait upon all the dirty data associated with a block
2540dca4462SChristoph Hellwig * device via its mapping. Does not take the superblock lock.
2550dca4462SChristoph Hellwig */
sync_blockdev(struct block_device * bdev)2560dca4462SChristoph Hellwig int sync_blockdev(struct block_device *bdev)
2570dca4462SChristoph Hellwig {
25870164eb6SChristoph Hellwig if (!bdev)
25970164eb6SChristoph Hellwig return 0;
260224941e8SAl Viro return filemap_write_and_wait(bdev->bd_mapping);
2610dca4462SChristoph Hellwig }
2620dca4462SChristoph Hellwig EXPORT_SYMBOL(sync_blockdev);
2630dca4462SChristoph Hellwig
sync_blockdev_range(struct block_device * bdev,loff_t lstart,loff_t lend)26497d6fb1bSYuezhang Mo int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
26597d6fb1bSYuezhang Mo {
266224941e8SAl Viro return filemap_write_and_wait_range(bdev->bd_mapping,
26797d6fb1bSYuezhang Mo lstart, lend);
26897d6fb1bSYuezhang Mo }
26997d6fb1bSYuezhang Mo EXPORT_SYMBOL(sync_blockdev_range);
27097d6fb1bSYuezhang Mo
2710dca4462SChristoph Hellwig /**
272982c3b30SChristian Brauner * bdev_freeze - lock a filesystem and force it into a consistent state
2730dca4462SChristoph Hellwig * @bdev: blockdevice to lock
2740dca4462SChristoph Hellwig *
2750dca4462SChristoph Hellwig * If a superblock is found on this device, we take the s_umount semaphore
2760dca4462SChristoph Hellwig * on it to make sure nobody unmounts until the snapshot creation is done.
2770dca4462SChristoph Hellwig * The reference counter (bd_fsfreeze_count) guarantees that only the last
2780dca4462SChristoph Hellwig * unfreeze process can unfreeze the frozen filesystem actually when multiple
279982c3b30SChristian Brauner * freeze requests arrive simultaneously. It counts up in bdev_freeze() and
280982c3b30SChristian Brauner * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze
2810dca4462SChristoph Hellwig * actually.
282982c3b30SChristian Brauner *
283982c3b30SChristian Brauner * Return: On success zero is returned, negative error code on failure.
2840dca4462SChristoph Hellwig */
bdev_freeze(struct block_device * bdev)285982c3b30SChristian Brauner int bdev_freeze(struct block_device *bdev)
2860dca4462SChristoph Hellwig {
2870dca4462SChristoph Hellwig int error = 0;
2880dca4462SChristoph Hellwig
2890dca4462SChristoph Hellwig mutex_lock(&bdev->bd_fsfreeze_mutex);
2900dca4462SChristoph Hellwig
29149ef8832SChristian Brauner if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
29249ef8832SChristian Brauner mutex_unlock(&bdev->bd_fsfreeze_mutex);
29349ef8832SChristian Brauner return 0;
2940dca4462SChristoph Hellwig }
2950dca4462SChristoph Hellwig
29649ef8832SChristian Brauner mutex_lock(&bdev->bd_holder_lock);
29749ef8832SChristian Brauner if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
29849ef8832SChristian Brauner error = bdev->bd_holder_ops->freeze(bdev);
29949ef8832SChristian Brauner lockdep_assert_not_held(&bdev->bd_holder_lock);
30049ef8832SChristian Brauner } else {
30149ef8832SChristian Brauner mutex_unlock(&bdev->bd_holder_lock);
302fbcb8f39SChristian Brauner error = sync_blockdev(bdev);
30349ef8832SChristian Brauner }
30449ef8832SChristian Brauner
30549ef8832SChristian Brauner if (error)
30649ef8832SChristian Brauner atomic_dec(&bdev->bd_fsfreeze_count);
30749ef8832SChristian Brauner
3080dca4462SChristoph Hellwig mutex_unlock(&bdev->bd_fsfreeze_mutex);
3090dca4462SChristoph Hellwig return error;
3100dca4462SChristoph Hellwig }
311982c3b30SChristian Brauner EXPORT_SYMBOL(bdev_freeze);
3120dca4462SChristoph Hellwig
3130dca4462SChristoph Hellwig /**
314982c3b30SChristian Brauner * bdev_thaw - unlock filesystem
3150dca4462SChristoph Hellwig * @bdev: blockdevice to unlock
3160dca4462SChristoph Hellwig *
317982c3b30SChristian Brauner * Unlocks the filesystem and marks it writeable again after bdev_freeze().
318982c3b30SChristian Brauner *
319982c3b30SChristian Brauner * Return: On success zero is returned, negative error code on failure.
3200dca4462SChristoph Hellwig */
bdev_thaw(struct block_device * bdev)321982c3b30SChristian Brauner int bdev_thaw(struct block_device *bdev)
3220dca4462SChristoph Hellwig {
32349ef8832SChristian Brauner int error = -EINVAL, nr_freeze;
3240dca4462SChristoph Hellwig
3250dca4462SChristoph Hellwig mutex_lock(&bdev->bd_fsfreeze_mutex);
32649ef8832SChristian Brauner
32749ef8832SChristian Brauner /*
32849ef8832SChristian Brauner * If this returns < 0 it means that @bd_fsfreeze_count was
32949ef8832SChristian Brauner * already 0 and no decrement was performed.
33049ef8832SChristian Brauner */
33149ef8832SChristian Brauner nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
33249ef8832SChristian Brauner if (nr_freeze < 0)
3330dca4462SChristoph Hellwig goto out;
3340dca4462SChristoph Hellwig
3350dca4462SChristoph Hellwig error = 0;
33649ef8832SChristian Brauner if (nr_freeze > 0)
3370dca4462SChristoph Hellwig goto out;
3380dca4462SChristoph Hellwig
33949ef8832SChristian Brauner mutex_lock(&bdev->bd_holder_lock);
34049ef8832SChristian Brauner if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
34149ef8832SChristian Brauner error = bdev->bd_holder_ops->thaw(bdev);
34249ef8832SChristian Brauner lockdep_assert_not_held(&bdev->bd_holder_lock);
34349ef8832SChristian Brauner } else {
34449ef8832SChristian Brauner mutex_unlock(&bdev->bd_holder_lock);
34549ef8832SChristian Brauner }
3460dca4462SChristoph Hellwig
3470dca4462SChristoph Hellwig if (error)
34849ef8832SChristian Brauner atomic_inc(&bdev->bd_fsfreeze_count);
3490dca4462SChristoph Hellwig out:
3500dca4462SChristoph Hellwig mutex_unlock(&bdev->bd_fsfreeze_mutex);
3510dca4462SChristoph Hellwig return error;
3520dca4462SChristoph Hellwig }
353982c3b30SChristian Brauner EXPORT_SYMBOL(bdev_thaw);
3540dca4462SChristoph Hellwig
3550dca4462SChristoph Hellwig /*
3560dca4462SChristoph Hellwig * pseudo-fs
3570dca4462SChristoph Hellwig */
3580dca4462SChristoph Hellwig
35974e6464aSChristoph Hellwig static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock);
36068279f9cSAlexey Dobriyan static struct kmem_cache *bdev_cachep __ro_after_init;
3610dca4462SChristoph Hellwig
bdev_alloc_inode(struct super_block * sb)3620dca4462SChristoph Hellwig static struct inode *bdev_alloc_inode(struct super_block *sb)
3630dca4462SChristoph Hellwig {
364fd60b288SMuchun Song struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL);
3650dca4462SChristoph Hellwig
3660dca4462SChristoph Hellwig if (!ei)
3670dca4462SChristoph Hellwig return NULL;
3680dca4462SChristoph Hellwig memset(&ei->bdev, 0, sizeof(ei->bdev));
369b55d26bdSDeven Bowers
370b55d26bdSDeven Bowers if (security_bdev_alloc(&ei->bdev)) {
371b55d26bdSDeven Bowers kmem_cache_free(bdev_cachep, ei);
372b55d26bdSDeven Bowers return NULL;
373b55d26bdSDeven Bowers }
3740dca4462SChristoph Hellwig return &ei->vfs_inode;
3750dca4462SChristoph Hellwig }
3760dca4462SChristoph Hellwig
bdev_free_inode(struct inode * inode)3770dca4462SChristoph Hellwig static void bdev_free_inode(struct inode *inode)
3780dca4462SChristoph Hellwig {
3790dca4462SChristoph Hellwig struct block_device *bdev = I_BDEV(inode);
3800dca4462SChristoph Hellwig
3810dca4462SChristoph Hellwig free_percpu(bdev->bd_stats);
3820dca4462SChristoph Hellwig kfree(bdev->bd_meta_info);
383b55d26bdSDeven Bowers security_bdev_free(bdev);
3840dca4462SChristoph Hellwig
3850dca4462SChristoph Hellwig if (!bdev_is_partition(bdev)) {
3860dca4462SChristoph Hellwig if (bdev->bd_disk && bdev->bd_disk->bdi)
3870dca4462SChristoph Hellwig bdi_put(bdev->bd_disk->bdi);
3880dca4462SChristoph Hellwig kfree(bdev->bd_disk);
3890dca4462SChristoph Hellwig }
3900dca4462SChristoph Hellwig
3910dca4462SChristoph Hellwig if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
3920dca4462SChristoph Hellwig blk_free_ext_minor(MINOR(bdev->bd_dev));
3930dca4462SChristoph Hellwig
3940dca4462SChristoph Hellwig kmem_cache_free(bdev_cachep, BDEV_I(inode));
3950dca4462SChristoph Hellwig }
3960dca4462SChristoph Hellwig
init_once(void * data)3970dca4462SChristoph Hellwig static void init_once(void *data)
3980dca4462SChristoph Hellwig {
3990dca4462SChristoph Hellwig struct bdev_inode *ei = data;
4000dca4462SChristoph Hellwig
4010dca4462SChristoph Hellwig inode_init_once(&ei->vfs_inode);
4020dca4462SChristoph Hellwig }
4030dca4462SChristoph Hellwig
bdev_evict_inode(struct inode * inode)4040dca4462SChristoph Hellwig static void bdev_evict_inode(struct inode *inode)
4050dca4462SChristoph Hellwig {
4060dca4462SChristoph Hellwig truncate_inode_pages_final(&inode->i_data);
4070dca4462SChristoph Hellwig invalidate_inode_buffers(inode); /* is it needed here? */
4080dca4462SChristoph Hellwig clear_inode(inode);
4090dca4462SChristoph Hellwig }
4100dca4462SChristoph Hellwig
4110dca4462SChristoph Hellwig static const struct super_operations bdev_sops = {
4120dca4462SChristoph Hellwig .statfs = simple_statfs,
4130dca4462SChristoph Hellwig .alloc_inode = bdev_alloc_inode,
4140dca4462SChristoph Hellwig .free_inode = bdev_free_inode,
4150dca4462SChristoph Hellwig .drop_inode = generic_delete_inode,
4160dca4462SChristoph Hellwig .evict_inode = bdev_evict_inode,
4170dca4462SChristoph Hellwig };
4180dca4462SChristoph Hellwig
bd_init_fs_context(struct fs_context * fc)4190dca4462SChristoph Hellwig static int bd_init_fs_context(struct fs_context *fc)
4200dca4462SChristoph Hellwig {
4210dca4462SChristoph Hellwig struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
4220dca4462SChristoph Hellwig if (!ctx)
4230dca4462SChristoph Hellwig return -ENOMEM;
4240dca4462SChristoph Hellwig fc->s_iflags |= SB_I_CGROUPWB;
4250dca4462SChristoph Hellwig ctx->ops = &bdev_sops;
4260dca4462SChristoph Hellwig return 0;
4270dca4462SChristoph Hellwig }
4280dca4462SChristoph Hellwig
4290dca4462SChristoph Hellwig static struct file_system_type bd_type = {
4300dca4462SChristoph Hellwig .name = "bdev",
4310dca4462SChristoph Hellwig .init_fs_context = bd_init_fs_context,
4320dca4462SChristoph Hellwig .kill_sb = kill_anon_super,
4330dca4462SChristoph Hellwig };
4340dca4462SChristoph Hellwig
43568279f9cSAlexey Dobriyan struct super_block *blockdev_superblock __ro_after_init;
436d9c23321SJiapeng Chong static struct vfsmount *blockdev_mnt __ro_after_init;
4370dca4462SChristoph Hellwig EXPORT_SYMBOL_GPL(blockdev_superblock);
4380dca4462SChristoph Hellwig
bdev_cache_init(void)4390dca4462SChristoph Hellwig void __init bdev_cache_init(void)
4400dca4462SChristoph Hellwig {
4410dca4462SChristoph Hellwig int err;
4420dca4462SChristoph Hellwig
4430dca4462SChristoph Hellwig bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
4440dca4462SChristoph Hellwig 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
44582c6515dSChengming Zhou SLAB_ACCOUNT|SLAB_PANIC),
4460dca4462SChristoph Hellwig init_once);
4470dca4462SChristoph Hellwig err = register_filesystem(&bd_type);
4480dca4462SChristoph Hellwig if (err)
4490dca4462SChristoph Hellwig panic("Cannot register bdev pseudo-fs");
450f3a60882SChristian Brauner blockdev_mnt = kern_mount(&bd_type);
451f3a60882SChristian Brauner if (IS_ERR(blockdev_mnt))
4520dca4462SChristoph Hellwig panic("Cannot create bdev pseudo-fs");
453f3a60882SChristian Brauner blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
4540dca4462SChristoph Hellwig }
4550dca4462SChristoph Hellwig
bdev_alloc(struct gendisk * disk,u8 partno)4560dca4462SChristoph Hellwig struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
4570dca4462SChristoph Hellwig {
4580dca4462SChristoph Hellwig struct block_device *bdev;
4590dca4462SChristoph Hellwig struct inode *inode;
4600dca4462SChristoph Hellwig
4610dca4462SChristoph Hellwig inode = new_inode(blockdev_superblock);
4620dca4462SChristoph Hellwig if (!inode)
4630dca4462SChristoph Hellwig return NULL;
4640dca4462SChristoph Hellwig inode->i_mode = S_IFBLK;
4650dca4462SChristoph Hellwig inode->i_rdev = 0;
4660dca4462SChristoph Hellwig inode->i_data.a_ops = &def_blk_aops;
4670dca4462SChristoph Hellwig mapping_set_gfp_mask(&inode->i_data, GFP_USER);
4680dca4462SChristoph Hellwig
4690dca4462SChristoph Hellwig bdev = I_BDEV(inode);
4700dca4462SChristoph Hellwig mutex_init(&bdev->bd_fsfreeze_mutex);
4710dca4462SChristoph Hellwig spin_lock_init(&bdev->bd_size_lock);
4720718afd4SChristoph Hellwig mutex_init(&bdev->bd_holder_lock);
4731116b9faSAl Viro atomic_set(&bdev->__bd_flags, partno);
474e33aef2cSAl Viro bdev->bd_mapping = &inode->i_data;
47517220ca5SPavel Begunkov bdev->bd_queue = disk->queue;
476ac2b6f9dSAl Viro if (partno && bdev_test_flag(disk->part0, BD_HAS_SUBMIT_BIO))
477ac2b6f9dSAl Viro bdev_set_flag(bdev, BD_HAS_SUBMIT_BIO);
47838c8e3dfSMing Lei bdev->bd_stats = alloc_percpu(struct disk_stats);
4790dca4462SChristoph Hellwig if (!bdev->bd_stats) {
4800dca4462SChristoph Hellwig iput(inode);
4810dca4462SChristoph Hellwig return NULL;
4820dca4462SChristoph Hellwig }
48306cc978dSTetsuo Handa bdev->bd_disk = disk;
4840dca4462SChristoph Hellwig return bdev;
4850dca4462SChristoph Hellwig }
4860dca4462SChristoph Hellwig
bdev_set_nr_sectors(struct block_device * bdev,sector_t sectors)48783794367SDamien Le Moal void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
48883794367SDamien Le Moal {
48983794367SDamien Le Moal spin_lock(&bdev->bd_size_lock);
490df65f166SAl Viro i_size_write(BD_INODE(bdev), (loff_t)sectors << SECTOR_SHIFT);
49183794367SDamien Le Moal bdev->bd_nr_sectors = sectors;
49283794367SDamien Le Moal spin_unlock(&bdev->bd_size_lock);
49383794367SDamien Le Moal }
49483794367SDamien Le Moal
bdev_add(struct block_device * bdev,dev_t dev)4950dca4462SChristoph Hellwig void bdev_add(struct block_device *bdev, dev_t dev)
4960dca4462SChristoph Hellwig {
497df65f166SAl Viro struct inode *inode = BD_INODE(bdev);
4981898efcdSChristoph Hellwig if (bdev_stable_writes(bdev))
499224941e8SAl Viro mapping_set_stable_writes(bdev->bd_mapping);
5000dca4462SChristoph Hellwig bdev->bd_dev = dev;
501df65f166SAl Viro inode->i_rdev = dev;
502df65f166SAl Viro inode->i_ino = dev;
503df65f166SAl Viro insert_inode_hash(inode);
5040dca4462SChristoph Hellwig }
5050dca4462SChristoph Hellwig
bdev_unhash(struct block_device * bdev)5062638c208SAl Viro void bdev_unhash(struct block_device *bdev)
5072638c208SAl Viro {
508df65f166SAl Viro remove_inode_hash(BD_INODE(bdev));
5092638c208SAl Viro }
5102638c208SAl Viro
bdev_drop(struct block_device * bdev)5112638c208SAl Viro void bdev_drop(struct block_device *bdev)
5122638c208SAl Viro {
513df65f166SAl Viro iput(BD_INODE(bdev));
5140dca4462SChristoph Hellwig }
5150dca4462SChristoph Hellwig
nr_blockdev_pages(void)5160dca4462SChristoph Hellwig long nr_blockdev_pages(void)
5170dca4462SChristoph Hellwig {
5180dca4462SChristoph Hellwig struct inode *inode;
5190dca4462SChristoph Hellwig long ret = 0;
5200dca4462SChristoph Hellwig
5210dca4462SChristoph Hellwig spin_lock(&blockdev_superblock->s_inode_list_lock);
5220dca4462SChristoph Hellwig list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
5230dca4462SChristoph Hellwig ret += inode->i_mapping->nrpages;
5240dca4462SChristoph Hellwig spin_unlock(&blockdev_superblock->s_inode_list_lock);
5250dca4462SChristoph Hellwig
5260dca4462SChristoph Hellwig return ret;
5270dca4462SChristoph Hellwig }
5280dca4462SChristoph Hellwig
5290dca4462SChristoph Hellwig /**
5300dca4462SChristoph Hellwig * bd_may_claim - test whether a block device can be claimed
5310dca4462SChristoph Hellwig * @bdev: block device of interest
5320dca4462SChristoph Hellwig * @holder: holder trying to claim @bdev
5330718afd4SChristoph Hellwig * @hops: holder ops
5340dca4462SChristoph Hellwig *
5350dca4462SChristoph Hellwig * Test whether @bdev can be claimed by @holder.
5360dca4462SChristoph Hellwig *
5370dca4462SChristoph Hellwig * RETURNS:
5380dca4462SChristoph Hellwig * %true if @bdev can be claimed, %false otherwise.
5390dca4462SChristoph Hellwig */
bd_may_claim(struct block_device * bdev,void * holder,const struct blk_holder_ops * hops)5400718afd4SChristoph Hellwig static bool bd_may_claim(struct block_device *bdev, void *holder,
5410718afd4SChristoph Hellwig const struct blk_holder_ops *hops)
5420dca4462SChristoph Hellwig {
543ae5f855eSChristoph Hellwig struct block_device *whole = bdev_whole(bdev);
5440dca4462SChristoph Hellwig
54574e6464aSChristoph Hellwig lockdep_assert_held(&bdev_lock);
54674e6464aSChristoph Hellwig
547ae5f855eSChristoph Hellwig if (bdev->bd_holder) {
548ae5f855eSChristoph Hellwig /*
549ae5f855eSChristoph Hellwig * The same holder can always re-claim.
550ae5f855eSChristoph Hellwig */
5510718afd4SChristoph Hellwig if (bdev->bd_holder == holder) {
5520718afd4SChristoph Hellwig if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
5530718afd4SChristoph Hellwig return false;
554ae5f855eSChristoph Hellwig return true;
5550718afd4SChristoph Hellwig }
556ae5f855eSChristoph Hellwig return false;
557ae5f855eSChristoph Hellwig }
558ae5f855eSChristoph Hellwig
559ae5f855eSChristoph Hellwig /*
560ae5f855eSChristoph Hellwig * If the whole devices holder is set to bd_may_claim, a partition on
561ae5f855eSChristoph Hellwig * the device is claimed, but not the whole device.
562ae5f855eSChristoph Hellwig */
563ae5f855eSChristoph Hellwig if (whole != bdev &&
564ae5f855eSChristoph Hellwig whole->bd_holder && whole->bd_holder != bd_may_claim)
565ae5f855eSChristoph Hellwig return false;
566ae5f855eSChristoph Hellwig return true;
5670dca4462SChristoph Hellwig }
5680dca4462SChristoph Hellwig
5690dca4462SChristoph Hellwig /**
5700dca4462SChristoph Hellwig * bd_prepare_to_claim - claim a block device
5710dca4462SChristoph Hellwig * @bdev: block device of interest
5720dca4462SChristoph Hellwig * @holder: holder trying to claim @bdev
5730718afd4SChristoph Hellwig * @hops: holder ops.
5740dca4462SChristoph Hellwig *
5750dca4462SChristoph Hellwig * Claim @bdev. This function fails if @bdev is already claimed by another
5760dca4462SChristoph Hellwig * holder and waits if another claiming is in progress. return, the caller
5770dca4462SChristoph Hellwig * has ownership of bd_claiming and bd_holder[s].
5780dca4462SChristoph Hellwig *
5790dca4462SChristoph Hellwig * RETURNS:
5800dca4462SChristoph Hellwig * 0 if @bdev can be claimed, -EBUSY otherwise.
5810dca4462SChristoph Hellwig */
bd_prepare_to_claim(struct block_device * bdev,void * holder,const struct blk_holder_ops * hops)5820718afd4SChristoph Hellwig int bd_prepare_to_claim(struct block_device *bdev, void *holder,
5830718afd4SChristoph Hellwig const struct blk_holder_ops *hops)
5840dca4462SChristoph Hellwig {
5850dca4462SChristoph Hellwig struct block_device *whole = bdev_whole(bdev);
5860dca4462SChristoph Hellwig
5870dca4462SChristoph Hellwig if (WARN_ON_ONCE(!holder))
5880dca4462SChristoph Hellwig return -EINVAL;
5890dca4462SChristoph Hellwig retry:
59074e6464aSChristoph Hellwig mutex_lock(&bdev_lock);
5910dca4462SChristoph Hellwig /* if someone else claimed, fail */
5920718afd4SChristoph Hellwig if (!bd_may_claim(bdev, holder, hops)) {
59374e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
5940dca4462SChristoph Hellwig return -EBUSY;
5950dca4462SChristoph Hellwig }
5960dca4462SChristoph Hellwig
5970dca4462SChristoph Hellwig /* if claiming is already in progress, wait for it to finish */
5980dca4462SChristoph Hellwig if (whole->bd_claiming) {
599aa3d8a36SNeilBrown wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming);
6000dca4462SChristoph Hellwig DEFINE_WAIT(wait);
6010dca4462SChristoph Hellwig
6020dca4462SChristoph Hellwig prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
60374e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
6040dca4462SChristoph Hellwig schedule();
6050dca4462SChristoph Hellwig finish_wait(wq, &wait);
6060dca4462SChristoph Hellwig goto retry;
6070dca4462SChristoph Hellwig }
6080dca4462SChristoph Hellwig
6090dca4462SChristoph Hellwig /* yay, all mine */
6100dca4462SChristoph Hellwig whole->bd_claiming = holder;
61174e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
6120dca4462SChristoph Hellwig return 0;
6130dca4462SChristoph Hellwig }
6140dca4462SChristoph Hellwig EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
6150dca4462SChristoph Hellwig
bd_clear_claiming(struct block_device * whole,void * holder)6160dca4462SChristoph Hellwig static void bd_clear_claiming(struct block_device *whole, void *holder)
6170dca4462SChristoph Hellwig {
6180dca4462SChristoph Hellwig lockdep_assert_held(&bdev_lock);
6190dca4462SChristoph Hellwig /* tell others that we're done */
6200dca4462SChristoph Hellwig BUG_ON(whole->bd_claiming != holder);
6210dca4462SChristoph Hellwig whole->bd_claiming = NULL;
622aa3d8a36SNeilBrown wake_up_var(&whole->bd_claiming);
6230dca4462SChristoph Hellwig }
6240dca4462SChristoph Hellwig
6250dca4462SChristoph Hellwig /**
6260dca4462SChristoph Hellwig * bd_finish_claiming - finish claiming of a block device
6270dca4462SChristoph Hellwig * @bdev: block device of interest
6280dca4462SChristoph Hellwig * @holder: holder that has claimed @bdev
629017fb83eSBart Van Assche * @hops: block device holder operations
6300dca4462SChristoph Hellwig *
6310dca4462SChristoph Hellwig * Finish exclusive open of a block device. Mark the device as exlusively
6320dca4462SChristoph Hellwig * open by the holder and wake up all waiters for exclusive open to finish.
6330dca4462SChristoph Hellwig */
bd_finish_claiming(struct block_device * bdev,void * holder,const struct blk_holder_ops * hops)6340718afd4SChristoph Hellwig static void bd_finish_claiming(struct block_device *bdev, void *holder,
6350718afd4SChristoph Hellwig const struct blk_holder_ops *hops)
6360dca4462SChristoph Hellwig {
6370dca4462SChristoph Hellwig struct block_device *whole = bdev_whole(bdev);
6380dca4462SChristoph Hellwig
63974e6464aSChristoph Hellwig mutex_lock(&bdev_lock);
6400718afd4SChristoph Hellwig BUG_ON(!bd_may_claim(bdev, holder, hops));
6410dca4462SChristoph Hellwig /*
6420dca4462SChristoph Hellwig * Note that for a whole device bd_holders will be incremented twice,
6430dca4462SChristoph Hellwig * and bd_holder will be set to bd_may_claim before being set to holder
6440dca4462SChristoph Hellwig */
6450dca4462SChristoph Hellwig whole->bd_holders++;
6460dca4462SChristoph Hellwig whole->bd_holder = bd_may_claim;
6470dca4462SChristoph Hellwig bdev->bd_holders++;
6480718afd4SChristoph Hellwig mutex_lock(&bdev->bd_holder_lock);
6490dca4462SChristoph Hellwig bdev->bd_holder = holder;
6500718afd4SChristoph Hellwig bdev->bd_holder_ops = hops;
6510718afd4SChristoph Hellwig mutex_unlock(&bdev->bd_holder_lock);
6520dca4462SChristoph Hellwig bd_clear_claiming(whole, holder);
65374e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
6540dca4462SChristoph Hellwig }
6550dca4462SChristoph Hellwig
6560dca4462SChristoph Hellwig /**
6570dca4462SChristoph Hellwig * bd_abort_claiming - abort claiming of a block device
6580dca4462SChristoph Hellwig * @bdev: block device of interest
6590dca4462SChristoph Hellwig * @holder: holder that has claimed @bdev
6600dca4462SChristoph Hellwig *
6610dca4462SChristoph Hellwig * Abort claiming of a block device when the exclusive open failed. This can be
6620dca4462SChristoph Hellwig * also used when exclusive open is not actually desired and we just needed
6630dca4462SChristoph Hellwig * to block other exclusive openers for a while.
6640dca4462SChristoph Hellwig */
bd_abort_claiming(struct block_device * bdev,void * holder)6650dca4462SChristoph Hellwig void bd_abort_claiming(struct block_device *bdev, void *holder)
6660dca4462SChristoph Hellwig {
66774e6464aSChristoph Hellwig mutex_lock(&bdev_lock);
6680dca4462SChristoph Hellwig bd_clear_claiming(bdev_whole(bdev), holder);
66974e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
6700dca4462SChristoph Hellwig }
6710dca4462SChristoph Hellwig EXPORT_SYMBOL(bd_abort_claiming);
6720dca4462SChristoph Hellwig
bd_end_claim(struct block_device * bdev,void * holder)6732736e8eeSChristoph Hellwig static void bd_end_claim(struct block_device *bdev, void *holder)
6740783b1a7SChristoph Hellwig {
6750783b1a7SChristoph Hellwig struct block_device *whole = bdev_whole(bdev);
6760783b1a7SChristoph Hellwig bool unblock = false;
6770783b1a7SChristoph Hellwig
6780783b1a7SChristoph Hellwig /*
6790783b1a7SChristoph Hellwig * Release a claim on the device. The holder fields are protected with
6800783b1a7SChristoph Hellwig * bdev_lock. open_mutex is used to synchronize disk_holder unlinking.
6810783b1a7SChristoph Hellwig */
68274e6464aSChristoph Hellwig mutex_lock(&bdev_lock);
6832736e8eeSChristoph Hellwig WARN_ON_ONCE(bdev->bd_holder != holder);
6840783b1a7SChristoph Hellwig WARN_ON_ONCE(--bdev->bd_holders < 0);
6850783b1a7SChristoph Hellwig WARN_ON_ONCE(--whole->bd_holders < 0);
6860783b1a7SChristoph Hellwig if (!bdev->bd_holders) {
6870718afd4SChristoph Hellwig mutex_lock(&bdev->bd_holder_lock);
6880783b1a7SChristoph Hellwig bdev->bd_holder = NULL;
6890718afd4SChristoph Hellwig bdev->bd_holder_ops = NULL;
6900718afd4SChristoph Hellwig mutex_unlock(&bdev->bd_holder_lock);
6914c80105eSAl Viro if (bdev_test_flag(bdev, BD_WRITE_HOLDER))
6920783b1a7SChristoph Hellwig unblock = true;
6930783b1a7SChristoph Hellwig }
6940783b1a7SChristoph Hellwig if (!whole->bd_holders)
6950783b1a7SChristoph Hellwig whole->bd_holder = NULL;
69674e6464aSChristoph Hellwig mutex_unlock(&bdev_lock);
6970783b1a7SChristoph Hellwig
6980783b1a7SChristoph Hellwig /*
6990783b1a7SChristoph Hellwig * If this was the last claim, remove holder link and unblock evpoll if
7000783b1a7SChristoph Hellwig * it was a write holder.
7010783b1a7SChristoph Hellwig */
7020783b1a7SChristoph Hellwig if (unblock) {
7030783b1a7SChristoph Hellwig disk_unblock_events(bdev->bd_disk);
7044c80105eSAl Viro bdev_clear_flag(bdev, BD_WRITE_HOLDER);
7050783b1a7SChristoph Hellwig }
7060783b1a7SChristoph Hellwig }
7070783b1a7SChristoph Hellwig
blkdev_flush_mapping(struct block_device * bdev)7080dca4462SChristoph Hellwig static void blkdev_flush_mapping(struct block_device *bdev)
7090dca4462SChristoph Hellwig {
7100dca4462SChristoph Hellwig WARN_ON_ONCE(bdev->bd_holders);
7110dca4462SChristoph Hellwig sync_blockdev(bdev);
7120dca4462SChristoph Hellwig kill_bdev(bdev);
7130dca4462SChristoph Hellwig bdev_write_inode(bdev);
7140dca4462SChristoph Hellwig }
7150dca4462SChristoph Hellwig
blkdev_put_whole(struct block_device * bdev)716752863bdSChristoph Hellwig static void blkdev_put_whole(struct block_device *bdev)
717752863bdSChristoph Hellwig {
718752863bdSChristoph Hellwig if (atomic_dec_and_test(&bdev->bd_openers))
719752863bdSChristoph Hellwig blkdev_flush_mapping(bdev);
720752863bdSChristoph Hellwig if (bdev->bd_disk->fops->release)
721752863bdSChristoph Hellwig bdev->bd_disk->fops->release(bdev->bd_disk);
722752863bdSChristoph Hellwig }
723752863bdSChristoph Hellwig
blkdev_get_whole(struct block_device * bdev,blk_mode_t mode)72405bdb996SChristoph Hellwig static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
7250dca4462SChristoph Hellwig {
7260dca4462SChristoph Hellwig struct gendisk *disk = bdev->bd_disk;
727af22fef3SColin Ian King int ret;
7280dca4462SChristoph Hellwig
7290dca4462SChristoph Hellwig if (disk->fops->open) {
730d32e2bf8SChristoph Hellwig ret = disk->fops->open(disk, mode);
7310dca4462SChristoph Hellwig if (ret) {
7320dca4462SChristoph Hellwig /* avoid ghost partitions on a removed medium */
7330dca4462SChristoph Hellwig if (ret == -ENOMEDIUM &&
7340dca4462SChristoph Hellwig test_bit(GD_NEED_PART_SCAN, &disk->state))
7350dca4462SChristoph Hellwig bdev_disk_changed(disk, true);
7360dca4462SChristoph Hellwig return ret;
7370dca4462SChristoph Hellwig }
7380dca4462SChristoph Hellwig }
7390dca4462SChristoph Hellwig
7409acf381fSChristoph Hellwig if (!atomic_read(&bdev->bd_openers))
7410dca4462SChristoph Hellwig set_init_blocksize(bdev);
7429acf381fSChristoph Hellwig atomic_inc(&bdev->bd_openers);
743752863bdSChristoph Hellwig if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
744752863bdSChristoph Hellwig /*
745752863bdSChristoph Hellwig * Only return scanning errors if we are called from contexts
746752863bdSChristoph Hellwig * that explicitly want them, e.g. the BLKRRPART ioctl.
747752863bdSChristoph Hellwig */
748752863bdSChristoph Hellwig ret = bdev_disk_changed(disk, false);
749752863bdSChristoph Hellwig if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
750752863bdSChristoph Hellwig blkdev_put_whole(bdev);
751752863bdSChristoph Hellwig return ret;
7520dca4462SChristoph Hellwig }
753752863bdSChristoph Hellwig }
754752863bdSChristoph Hellwig return 0;
7550dca4462SChristoph Hellwig }
7560dca4462SChristoph Hellwig
blkdev_get_part(struct block_device * part,blk_mode_t mode)75705bdb996SChristoph Hellwig static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
7580dca4462SChristoph Hellwig {
7590dca4462SChristoph Hellwig struct gendisk *disk = part->bd_disk;
7600dca4462SChristoph Hellwig int ret;
7610dca4462SChristoph Hellwig
7620dca4462SChristoph Hellwig ret = blkdev_get_whole(bdev_whole(part), mode);
7630dca4462SChristoph Hellwig if (ret)
7640dca4462SChristoph Hellwig return ret;
7650dca4462SChristoph Hellwig
7660dca4462SChristoph Hellwig ret = -ENXIO;
7670dca4462SChristoph Hellwig if (!bdev_nr_sectors(part))
7680dca4462SChristoph Hellwig goto out_blkdev_put;
7690dca4462SChristoph Hellwig
7709d1c9287SChristoph Hellwig if (!atomic_read(&part->bd_openers)) {
7710dca4462SChristoph Hellwig disk->open_partitions++;
7720dca4462SChristoph Hellwig set_init_blocksize(part);
7739d1c9287SChristoph Hellwig }
7749acf381fSChristoph Hellwig atomic_inc(&part->bd_openers);
7750dca4462SChristoph Hellwig return 0;
7760dca4462SChristoph Hellwig
7770dca4462SChristoph Hellwig out_blkdev_put:
778ae220766SChristoph Hellwig blkdev_put_whole(bdev_whole(part));
7790dca4462SChristoph Hellwig return ret;
7800dca4462SChristoph Hellwig }
7810dca4462SChristoph Hellwig
bdev_permission(dev_t dev,blk_mode_t mode,void * holder)782a56aefcaSChristian Brauner int bdev_permission(dev_t dev, blk_mode_t mode, void *holder)
783a56aefcaSChristian Brauner {
784a56aefcaSChristian Brauner int ret;
785a56aefcaSChristian Brauner
786a56aefcaSChristian Brauner ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
787a56aefcaSChristian Brauner MAJOR(dev), MINOR(dev),
788a56aefcaSChristian Brauner ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
789a56aefcaSChristian Brauner ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
790a56aefcaSChristian Brauner if (ret)
791a56aefcaSChristian Brauner return ret;
792a56aefcaSChristian Brauner
793a56aefcaSChristian Brauner /* Blocking writes requires exclusive opener */
794a56aefcaSChristian Brauner if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
795a56aefcaSChristian Brauner return -EINVAL;
796a56aefcaSChristian Brauner
797ab838b3fSChristian Brauner /*
798ab838b3fSChristian Brauner * We're using error pointers to indicate to ->release() when we
799ab838b3fSChristian Brauner * failed to open that block device. Also this doesn't make sense.
800ab838b3fSChristian Brauner */
801ab838b3fSChristian Brauner if (WARN_ON_ONCE(IS_ERR(holder)))
802ab838b3fSChristian Brauner return -EINVAL;
803ab838b3fSChristian Brauner
804a56aefcaSChristian Brauner return 0;
805a56aefcaSChristian Brauner }
806a56aefcaSChristian Brauner
blkdev_put_part(struct block_device * part)807ae220766SChristoph Hellwig static void blkdev_put_part(struct block_device *part)
8080dca4462SChristoph Hellwig {
8090dca4462SChristoph Hellwig struct block_device *whole = bdev_whole(part);
8100dca4462SChristoph Hellwig
8119d1c9287SChristoph Hellwig if (atomic_dec_and_test(&part->bd_openers)) {
8120dca4462SChristoph Hellwig blkdev_flush_mapping(part);
8130dca4462SChristoph Hellwig whole->bd_disk->open_partitions--;
8149d1c9287SChristoph Hellwig }
815ae220766SChristoph Hellwig blkdev_put_whole(whole);
8160dca4462SChristoph Hellwig }
8170dca4462SChristoph Hellwig
blkdev_get_no_open(dev_t dev,bool autoload)818*5f33b522SChristoph Hellwig struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
8190dca4462SChristoph Hellwig {
8200dca4462SChristoph Hellwig struct block_device *bdev;
8210dca4462SChristoph Hellwig struct inode *inode;
8220dca4462SChristoph Hellwig
8230dca4462SChristoph Hellwig inode = ilookup(blockdev_superblock, dev);
824*5f33b522SChristoph Hellwig if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
8250dca4462SChristoph Hellwig blk_request_module(dev);
8260dca4462SChristoph Hellwig inode = ilookup(blockdev_superblock, dev);
827fbdee71bSChristoph Hellwig if (inode)
828fbdee71bSChristoph Hellwig pr_warn_ratelimited(
829451f0b6fSChristoph Hellwig "block device autoloading is deprecated and will be removed.\n");
830fbdee71bSChristoph Hellwig }
8310dca4462SChristoph Hellwig if (!inode)
8320dca4462SChristoph Hellwig return NULL;
8330dca4462SChristoph Hellwig
8340dca4462SChristoph Hellwig /* switch from the inode reference to a device mode one: */
8350dca4462SChristoph Hellwig bdev = &BDEV_I(inode)->bdev;
8360dca4462SChristoph Hellwig if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
8370dca4462SChristoph Hellwig bdev = NULL;
8380dca4462SChristoph Hellwig iput(inode);
8390dca4462SChristoph Hellwig return bdev;
8400dca4462SChristoph Hellwig }
8410dca4462SChristoph Hellwig
blkdev_put_no_open(struct block_device * bdev)8420dca4462SChristoph Hellwig void blkdev_put_no_open(struct block_device *bdev)
8430dca4462SChristoph Hellwig {
8440dca4462SChristoph Hellwig put_device(&bdev->bd_device);
8450dca4462SChristoph Hellwig }
8460dca4462SChristoph Hellwig
bdev_writes_blocked(struct block_device * bdev)847ed5cc702SJan Kara static bool bdev_writes_blocked(struct block_device *bdev)
848ed5cc702SJan Kara {
8493ff56e28SChristian Brauner return bdev->bd_writers < 0;
850ed5cc702SJan Kara }
851ed5cc702SJan Kara
bdev_block_writes(struct block_device * bdev)852ed5cc702SJan Kara static void bdev_block_writes(struct block_device *bdev)
853ed5cc702SJan Kara {
8543ff56e28SChristian Brauner bdev->bd_writers--;
855ed5cc702SJan Kara }
856ed5cc702SJan Kara
bdev_unblock_writes(struct block_device * bdev)857ed5cc702SJan Kara static void bdev_unblock_writes(struct block_device *bdev)
858ed5cc702SJan Kara {
8593ff56e28SChristian Brauner bdev->bd_writers++;
860ed5cc702SJan Kara }
861ed5cc702SJan Kara
bdev_may_open(struct block_device * bdev,blk_mode_t mode)862ed5cc702SJan Kara static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
863ed5cc702SJan Kara {
864ed5cc702SJan Kara if (bdev_allow_write_mounted)
865ed5cc702SJan Kara return true;
866ed5cc702SJan Kara /* Writes blocked? */
867ed5cc702SJan Kara if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
868ed5cc702SJan Kara return false;
869ed5cc702SJan Kara if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
870ed5cc702SJan Kara return false;
871ed5cc702SJan Kara return true;
872ed5cc702SJan Kara }
873ed5cc702SJan Kara
bdev_claim_write_access(struct block_device * bdev,blk_mode_t mode)874ed5cc702SJan Kara static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
875ed5cc702SJan Kara {
876ed5cc702SJan Kara if (bdev_allow_write_mounted)
877ed5cc702SJan Kara return;
878ed5cc702SJan Kara
879ed5cc702SJan Kara /* Claim exclusive or shared write access. */
880ed5cc702SJan Kara if (mode & BLK_OPEN_RESTRICT_WRITES)
881ed5cc702SJan Kara bdev_block_writes(bdev);
882ed5cc702SJan Kara else if (mode & BLK_OPEN_WRITE)
883ed5cc702SJan Kara bdev->bd_writers++;
884ed5cc702SJan Kara }
885ed5cc702SJan Kara
bdev_unclaimed(const struct file * bdev_file)88622650a99SChristian Brauner static inline bool bdev_unclaimed(const struct file *bdev_file)
88722650a99SChristian Brauner {
88822650a99SChristian Brauner return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
88922650a99SChristian Brauner }
89022650a99SChristian Brauner
bdev_yield_write_access(struct file * bdev_file)891ab838b3fSChristian Brauner static void bdev_yield_write_access(struct file *bdev_file)
892ed5cc702SJan Kara {
893321de651SChristian Brauner struct block_device *bdev;
894321de651SChristian Brauner
895ed5cc702SJan Kara if (bdev_allow_write_mounted)
896ed5cc702SJan Kara return;
897ed5cc702SJan Kara
89822650a99SChristian Brauner if (bdev_unclaimed(bdev_file))
89922650a99SChristian Brauner return;
90022650a99SChristian Brauner
901321de651SChristian Brauner bdev = file_bdev(bdev_file);
902ddd65e19SChristian Brauner
903ddd65e19SChristian Brauner if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
904ed5cc702SJan Kara bdev_unblock_writes(bdev);
905ddd65e19SChristian Brauner else if (bdev_file->f_mode & FMODE_WRITE)
906ed5cc702SJan Kara bdev->bd_writers--;
907ed5cc702SJan Kara }
908ed5cc702SJan Kara
9090dca4462SChristoph Hellwig /**
910a56aefcaSChristian Brauner * bdev_open - open a block device
911a56aefcaSChristian Brauner * @bdev: block device to open
91205bdb996SChristoph Hellwig * @mode: open mode (BLK_OPEN_*)
9130dca4462SChristoph Hellwig * @holder: exclusive holder identifier
9140718afd4SChristoph Hellwig * @hops: holder operations
915a56aefcaSChristian Brauner * @bdev_file: file for the block device
9160dca4462SChristoph Hellwig *
917a56aefcaSChristian Brauner * Open the block device. If @holder is not %NULL, the block device is opened
918a56aefcaSChristian Brauner * with exclusive access. Exclusive opens may nest for the same @holder.
9190dca4462SChristoph Hellwig *
9200dca4462SChristoph Hellwig * CONTEXT:
9210dca4462SChristoph Hellwig * Might sleep.
9220dca4462SChristoph Hellwig *
9230dca4462SChristoph Hellwig * RETURNS:
924a56aefcaSChristian Brauner * zero on success, -errno on failure.
9250dca4462SChristoph Hellwig */
bdev_open(struct block_device * bdev,blk_mode_t mode,void * holder,const struct blk_holder_ops * hops,struct file * bdev_file)926a56aefcaSChristian Brauner int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
927a56aefcaSChristian Brauner const struct blk_holder_ops *hops, struct file *bdev_file)
9280dca4462SChristoph Hellwig {
929cd34758cSJan Kara bool unblock_events = true;
930a56aefcaSChristian Brauner struct gendisk *disk = bdev->bd_disk;
9310dca4462SChristoph Hellwig int ret;
9320dca4462SChristoph Hellwig
9332736e8eeSChristoph Hellwig if (holder) {
93405bdb996SChristoph Hellwig mode |= BLK_OPEN_EXCL;
9350718afd4SChristoph Hellwig ret = bd_prepare_to_claim(bdev, holder, hops);
9360dca4462SChristoph Hellwig if (ret)
937ab838b3fSChristian Brauner return ret;
9382736e8eeSChristoph Hellwig } else {
939ab838b3fSChristian Brauner if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL))
940ab838b3fSChristian Brauner return -EIO;
9410dca4462SChristoph Hellwig }
9420dca4462SChristoph Hellwig
9430dca4462SChristoph Hellwig disk_block_events(disk);
9440dca4462SChristoph Hellwig
9450dca4462SChristoph Hellwig mutex_lock(&disk->open_mutex);
9460dca4462SChristoph Hellwig ret = -ENXIO;
9470dca4462SChristoph Hellwig if (!disk_live(disk))
9480dca4462SChristoph Hellwig goto abort_claiming;
949efcf5932SMing Lei if (!try_module_get(disk->fops->owner))
950efcf5932SMing Lei goto abort_claiming;
951ed5cc702SJan Kara ret = -EBUSY;
952ed5cc702SJan Kara if (!bdev_may_open(bdev, mode))
9539617cd6fSYu Kuai goto put_module;
9540dca4462SChristoph Hellwig if (bdev_is_partition(bdev))
9550dca4462SChristoph Hellwig ret = blkdev_get_part(bdev, mode);
9560dca4462SChristoph Hellwig else
9570dca4462SChristoph Hellwig ret = blkdev_get_whole(bdev, mode);
9580dca4462SChristoph Hellwig if (ret)
959efcf5932SMing Lei goto put_module;
960ed5cc702SJan Kara bdev_claim_write_access(bdev, mode);
9612736e8eeSChristoph Hellwig if (holder) {
9620718afd4SChristoph Hellwig bd_finish_claiming(bdev, holder, hops);
9630dca4462SChristoph Hellwig
9640dca4462SChristoph Hellwig /*
9650dca4462SChristoph Hellwig * Block event polling for write claims if requested. Any write
9660dca4462SChristoph Hellwig * holder makes the write_holder state stick until all are
9670dca4462SChristoph Hellwig * released. This is good enough and tracking individual
9680dca4462SChristoph Hellwig * writeable reference is too fragile given the way @mode is
9690dca4462SChristoph Hellwig * used in blkdev_get/put().
9700dca4462SChristoph Hellwig */
9714c80105eSAl Viro if ((mode & BLK_OPEN_WRITE) &&
9724c80105eSAl Viro !bdev_test_flag(bdev, BD_WRITE_HOLDER) &&
9731545e0b4SChristoph Hellwig (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
9744c80105eSAl Viro bdev_set_flag(bdev, BD_WRITE_HOLDER);
9750dca4462SChristoph Hellwig unblock_events = false;
9760dca4462SChristoph Hellwig }
9770dca4462SChristoph Hellwig }
9780dca4462SChristoph Hellwig mutex_unlock(&disk->open_mutex);
9790dca4462SChristoph Hellwig
9800dca4462SChristoph Hellwig if (unblock_events)
9810dca4462SChristoph Hellwig disk_unblock_events(disk);
982a56aefcaSChristian Brauner
983a56aefcaSChristian Brauner bdev_file->f_flags |= O_LARGEFILE;
984210a03c9SChristian Brauner bdev_file->f_mode |= FMODE_CAN_ODIRECT;
985a56aefcaSChristian Brauner if (bdev_nowait(bdev))
986a56aefcaSChristian Brauner bdev_file->f_mode |= FMODE_NOWAIT;
987ddd65e19SChristian Brauner if (mode & BLK_OPEN_RESTRICT_WRITES)
988ddd65e19SChristian Brauner bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
989224941e8SAl Viro bdev_file->f_mapping = bdev->bd_mapping;
990a56aefcaSChristian Brauner bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
991ab838b3fSChristian Brauner bdev_file->private_data = holder;
992a56aefcaSChristian Brauner
993a56aefcaSChristian Brauner return 0;
994efcf5932SMing Lei put_module:
995efcf5932SMing Lei module_put(disk->fops->owner);
9960dca4462SChristoph Hellwig abort_claiming:
9972736e8eeSChristoph Hellwig if (holder)
9980dca4462SChristoph Hellwig bd_abort_claiming(bdev, holder);
9990dca4462SChristoph Hellwig mutex_unlock(&disk->open_mutex);
10000dca4462SChristoph Hellwig disk_unblock_events(disk);
1001a56aefcaSChristian Brauner return ret;
1002e719b4d1SJan Kara }
1003e719b4d1SJan Kara
1004f3a60882SChristian Brauner /*
1005f3a60882SChristian Brauner * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
1006f3a60882SChristian Brauner * associated with the floppy driver where it has allowed ioctls if the
1007f3a60882SChristian Brauner * file was opened for writing, but does not allow reads or writes.
1008f3a60882SChristian Brauner * Make sure that this quirk is reflected in @f_flags.
1009f3a60882SChristian Brauner *
1010f3a60882SChristian Brauner * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
1011f3a60882SChristian Brauner */
blk_to_file_flags(blk_mode_t mode)1012f3a60882SChristian Brauner static unsigned blk_to_file_flags(blk_mode_t mode)
1013f3a60882SChristian Brauner {
1014f3a60882SChristian Brauner unsigned int flags = 0;
1015f3a60882SChristian Brauner
1016f3a60882SChristian Brauner if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
1017f3a60882SChristian Brauner (BLK_OPEN_READ | BLK_OPEN_WRITE))
1018f3a60882SChristian Brauner flags |= O_RDWR;
1019f3a60882SChristian Brauner else if (mode & BLK_OPEN_WRITE_IOCTL)
1020f3a60882SChristian Brauner flags |= O_RDWR | O_WRONLY;
1021f3a60882SChristian Brauner else if (mode & BLK_OPEN_WRITE)
1022f3a60882SChristian Brauner flags |= O_WRONLY;
1023f3a60882SChristian Brauner else if (mode & BLK_OPEN_READ)
1024f3a60882SChristian Brauner flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
1025f3a60882SChristian Brauner else
1026f3a60882SChristian Brauner WARN_ON_ONCE(true);
1027f3a60882SChristian Brauner
1028f3a60882SChristian Brauner if (mode & BLK_OPEN_NDELAY)
1029f3a60882SChristian Brauner flags |= O_NDELAY;
1030f3a60882SChristian Brauner
1031f3a60882SChristian Brauner return flags;
1032f3a60882SChristian Brauner }
1033f3a60882SChristian Brauner
bdev_file_open_by_dev(dev_t dev,blk_mode_t mode,void * holder,const struct blk_holder_ops * hops)1034f3a60882SChristian Brauner struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
1035f3a60882SChristian Brauner const struct blk_holder_ops *hops)
1036f3a60882SChristian Brauner {
1037f3a60882SChristian Brauner struct file *bdev_file;
1038a56aefcaSChristian Brauner struct block_device *bdev;
1039f3a60882SChristian Brauner unsigned int flags;
1040a56aefcaSChristian Brauner int ret;
1041f3a60882SChristian Brauner
1042a56aefcaSChristian Brauner ret = bdev_permission(dev, mode, holder);
1043a56aefcaSChristian Brauner if (ret)
1044a56aefcaSChristian Brauner return ERR_PTR(ret);
1045a56aefcaSChristian Brauner
1046*5f33b522SChristoph Hellwig bdev = blkdev_get_no_open(dev, true);
1047a56aefcaSChristian Brauner if (!bdev)
1048a56aefcaSChristian Brauner return ERR_PTR(-ENXIO);
1049f3a60882SChristian Brauner
1050f3a60882SChristian Brauner flags = blk_to_file_flags(mode);
1051df65f166SAl Viro bdev_file = alloc_file_pseudo_noaccount(BD_INODE(bdev),
1052f3a60882SChristian Brauner blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
1053f3a60882SChristian Brauner if (IS_ERR(bdev_file)) {
1054a56aefcaSChristian Brauner blkdev_put_no_open(bdev);
1055f3a60882SChristian Brauner return bdev_file;
1056f3a60882SChristian Brauner }
1057df65f166SAl Viro ihold(BD_INODE(bdev));
1058f3a60882SChristian Brauner
1059a56aefcaSChristian Brauner ret = bdev_open(bdev, mode, holder, hops, bdev_file);
1060a56aefcaSChristian Brauner if (ret) {
1061ab838b3fSChristian Brauner /* We failed to open the block device. Let ->release() know. */
1062ab838b3fSChristian Brauner bdev_file->private_data = ERR_PTR(ret);
1063a56aefcaSChristian Brauner fput(bdev_file);
1064a56aefcaSChristian Brauner return ERR_PTR(ret);
1065a56aefcaSChristian Brauner }
1066f3a60882SChristian Brauner return bdev_file;
1067f3a60882SChristian Brauner }
1068f3a60882SChristian Brauner EXPORT_SYMBOL(bdev_file_open_by_dev);
1069f3a60882SChristian Brauner
bdev_file_open_by_path(const char * path,blk_mode_t mode,void * holder,const struct blk_holder_ops * hops)1070f3a60882SChristian Brauner struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
1071f3a60882SChristian Brauner void *holder,
1072f3a60882SChristian Brauner const struct blk_holder_ops *hops)
1073f3a60882SChristian Brauner {
10747c09a4edSChristian Brauner struct file *file;
1075f3a60882SChristian Brauner dev_t dev;
1076f3a60882SChristian Brauner int error;
1077f3a60882SChristian Brauner
1078f3a60882SChristian Brauner error = lookup_bdev(path, &dev);
1079f3a60882SChristian Brauner if (error)
1080f3a60882SChristian Brauner return ERR_PTR(error);
1081f3a60882SChristian Brauner
10827c09a4edSChristian Brauner file = bdev_file_open_by_dev(dev, mode, holder, hops);
10837c09a4edSChristian Brauner if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) {
10847c09a4edSChristian Brauner if (bdev_read_only(file_bdev(file))) {
10857c09a4edSChristian Brauner fput(file);
10867c09a4edSChristian Brauner file = ERR_PTR(-EACCES);
1087f3a60882SChristian Brauner }
1088f3a60882SChristian Brauner }
1089f3a60882SChristian Brauner
10907c09a4edSChristian Brauner return file;
1091f3a60882SChristian Brauner }
1092f3a60882SChristian Brauner EXPORT_SYMBOL(bdev_file_open_by_path);
1093f3a60882SChristian Brauner
bd_yield_claim(struct file * bdev_file)109422650a99SChristian Brauner static inline void bd_yield_claim(struct file *bdev_file)
109522650a99SChristian Brauner {
109622650a99SChristian Brauner struct block_device *bdev = file_bdev(bdev_file);
109722650a99SChristian Brauner void *holder = bdev_file->private_data;
109822650a99SChristian Brauner
109922650a99SChristian Brauner lockdep_assert_held(&bdev->bd_disk->open_mutex);
110022650a99SChristian Brauner
110122650a99SChristian Brauner if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
110222650a99SChristian Brauner return;
110322650a99SChristian Brauner
110422650a99SChristian Brauner if (!bdev_unclaimed(bdev_file))
110522650a99SChristian Brauner bd_end_claim(bdev, holder);
110622650a99SChristian Brauner }
110722650a99SChristian Brauner
bdev_release(struct file * bdev_file)11087c09a4edSChristian Brauner void bdev_release(struct file *bdev_file)
11090dca4462SChristoph Hellwig {
11107c09a4edSChristian Brauner struct block_device *bdev = file_bdev(bdev_file);
1111ab838b3fSChristian Brauner void *holder = bdev_file->private_data;
11120dca4462SChristoph Hellwig struct gendisk *disk = bdev->bd_disk;
11130dca4462SChristoph Hellwig
1114ab838b3fSChristian Brauner /* We failed to open that block device. */
1115ab838b3fSChristian Brauner if (IS_ERR(holder))
1116ab838b3fSChristian Brauner goto put_no_open;
1117ab838b3fSChristian Brauner
11180dca4462SChristoph Hellwig /*
11190dca4462SChristoph Hellwig * Sync early if it looks like we're the last one. If someone else
11200dca4462SChristoph Hellwig * opens the block device between now and the decrement of bd_openers
11210dca4462SChristoph Hellwig * then we did a sync that we didn't need to, but that's not the end
11220dca4462SChristoph Hellwig * of the world and we want to avoid long (could be several minute)
11230dca4462SChristoph Hellwig * syncs while holding the mutex.
11240dca4462SChristoph Hellwig */
11259acf381fSChristoph Hellwig if (atomic_read(&bdev->bd_openers) == 1)
11260dca4462SChristoph Hellwig sync_blockdev(bdev);
11270dca4462SChristoph Hellwig
11280dca4462SChristoph Hellwig mutex_lock(&disk->open_mutex);
1129ab838b3fSChristian Brauner bdev_yield_write_access(bdev_file);
1130ed5cc702SJan Kara
1131ab838b3fSChristian Brauner if (holder)
113222650a99SChristian Brauner bd_yield_claim(bdev_file);
11330dca4462SChristoph Hellwig
11340dca4462SChristoph Hellwig /*
11350dca4462SChristoph Hellwig * Trigger event checking and tell drivers to flush MEDIA_CHANGE
11360dca4462SChristoph Hellwig * event. This is to ensure detection of media removal commanded
11370dca4462SChristoph Hellwig * from userland - e.g. eject(1).
11380dca4462SChristoph Hellwig */
11390dca4462SChristoph Hellwig disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
11400dca4462SChristoph Hellwig
11410dca4462SChristoph Hellwig if (bdev_is_partition(bdev))
1142ae220766SChristoph Hellwig blkdev_put_part(bdev);
11430dca4462SChristoph Hellwig else
1144ae220766SChristoph Hellwig blkdev_put_whole(bdev);
11450dca4462SChristoph Hellwig mutex_unlock(&disk->open_mutex);
11460dca4462SChristoph Hellwig
1147efcf5932SMing Lei module_put(disk->fops->owner);
1148ab838b3fSChristian Brauner put_no_open:
11490dca4462SChristoph Hellwig blkdev_put_no_open(bdev);
1150e719b4d1SJan Kara }
1151e719b4d1SJan Kara
11520dca4462SChristoph Hellwig /**
115322650a99SChristian Brauner * bdev_fput - yield claim to the block device and put the file
115422650a99SChristian Brauner * @bdev_file: open block device
115522650a99SChristian Brauner *
115622650a99SChristian Brauner * Yield claim on the block device and put the file. Ensure that the
115722650a99SChristian Brauner * block device can be reclaimed before the file is closed which is a
115822650a99SChristian Brauner * deferred operation.
115922650a99SChristian Brauner */
bdev_fput(struct file * bdev_file)116022650a99SChristian Brauner void bdev_fput(struct file *bdev_file)
116122650a99SChristian Brauner {
116222650a99SChristian Brauner if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
116322650a99SChristian Brauner return;
116422650a99SChristian Brauner
116522650a99SChristian Brauner if (bdev_file->private_data) {
116622650a99SChristian Brauner struct block_device *bdev = file_bdev(bdev_file);
116722650a99SChristian Brauner struct gendisk *disk = bdev->bd_disk;
116822650a99SChristian Brauner
116922650a99SChristian Brauner mutex_lock(&disk->open_mutex);
117022650a99SChristian Brauner bdev_yield_write_access(bdev_file);
117122650a99SChristian Brauner bd_yield_claim(bdev_file);
117222650a99SChristian Brauner /*
117322650a99SChristian Brauner * Tell release we already gave up our hold on the
117422650a99SChristian Brauner * device and if write restrictions are available that
117522650a99SChristian Brauner * we already gave up write access to the device.
117622650a99SChristian Brauner */
117722650a99SChristian Brauner bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
117822650a99SChristian Brauner mutex_unlock(&disk->open_mutex);
117922650a99SChristian Brauner }
118022650a99SChristian Brauner
118122650a99SChristian Brauner fput(bdev_file);
118222650a99SChristian Brauner }
118322650a99SChristian Brauner EXPORT_SYMBOL(bdev_fput);
118422650a99SChristian Brauner
118522650a99SChristian Brauner /**
11860ba4566cSMatthew Wilcox (Oracle) * lookup_bdev() - Look up a struct block_device by name.
11870ba4566cSMatthew Wilcox (Oracle) * @pathname: Name of the block device in the filesystem.
11880ba4566cSMatthew Wilcox (Oracle) * @dev: Pointer to the block device's dev_t, if found.
11890dca4462SChristoph Hellwig *
1190057178cfSJackie Liu * Lookup the block device's dev_t at @pathname in the current
11910ba4566cSMatthew Wilcox (Oracle) * namespace if possible and return it in @dev.
1192057178cfSJackie Liu *
11930ba4566cSMatthew Wilcox (Oracle) * Context: May sleep.
11940ba4566cSMatthew Wilcox (Oracle) * Return: 0 if succeeded, negative errno otherwise.
11950dca4462SChristoph Hellwig */
lookup_bdev(const char * pathname,dev_t * dev)11960dca4462SChristoph Hellwig int lookup_bdev(const char *pathname, dev_t *dev)
11970dca4462SChristoph Hellwig {
11980dca4462SChristoph Hellwig struct inode *inode;
11990dca4462SChristoph Hellwig struct path path;
12000dca4462SChristoph Hellwig int error;
12010dca4462SChristoph Hellwig
12020dca4462SChristoph Hellwig if (!pathname || !*pathname)
12030dca4462SChristoph Hellwig return -EINVAL;
12040dca4462SChristoph Hellwig
12050dca4462SChristoph Hellwig error = kern_path(pathname, LOOKUP_FOLLOW, &path);
12060dca4462SChristoph Hellwig if (error)
12070dca4462SChristoph Hellwig return error;
12080dca4462SChristoph Hellwig
12090dca4462SChristoph Hellwig inode = d_backing_inode(path.dentry);
12100dca4462SChristoph Hellwig error = -ENOTBLK;
12110dca4462SChristoph Hellwig if (!S_ISBLK(inode->i_mode))
12120dca4462SChristoph Hellwig goto out_path_put;
12130dca4462SChristoph Hellwig error = -EACCES;
12140dca4462SChristoph Hellwig if (!may_open_dev(&path))
12150dca4462SChristoph Hellwig goto out_path_put;
12160dca4462SChristoph Hellwig
12170dca4462SChristoph Hellwig *dev = inode->i_rdev;
12180dca4462SChristoph Hellwig error = 0;
12190dca4462SChristoph Hellwig out_path_put:
12200dca4462SChristoph Hellwig path_put(&path);
12210dca4462SChristoph Hellwig return error;
12220dca4462SChristoph Hellwig }
12230dca4462SChristoph Hellwig EXPORT_SYMBOL(lookup_bdev);
12240dca4462SChristoph Hellwig
1225560e20e4SChristoph Hellwig /**
1226560e20e4SChristoph Hellwig * bdev_mark_dead - mark a block device as dead
1227560e20e4SChristoph Hellwig * @bdev: block device to operate on
1228560e20e4SChristoph Hellwig * @surprise: indicate a surprise removal
1229560e20e4SChristoph Hellwig *
1230560e20e4SChristoph Hellwig * Tell the file system that this devices or media is dead. If @surprise is set
1231560e20e4SChristoph Hellwig * to %true the device or media is already gone, if not we are preparing for an
1232560e20e4SChristoph Hellwig * orderly removal.
1233560e20e4SChristoph Hellwig *
1234d8530de5SChristoph Hellwig * This calls into the file system, which then typicall syncs out all dirty data
1235d8530de5SChristoph Hellwig * and writes back inodes and then invalidates any cached data in the inodes on
1236d8530de5SChristoph Hellwig * the file system. In addition we also invalidate the block device mapping.
1237560e20e4SChristoph Hellwig */
bdev_mark_dead(struct block_device * bdev,bool surprise)1238560e20e4SChristoph Hellwig void bdev_mark_dead(struct block_device *bdev, bool surprise)
12390dca4462SChristoph Hellwig {
1240d8530de5SChristoph Hellwig mutex_lock(&bdev->bd_holder_lock);
1241d8530de5SChristoph Hellwig if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
1242d8530de5SChristoph Hellwig bdev->bd_holder_ops->mark_dead(bdev, surprise);
1243fd146410SJan Kara else {
1244d8530de5SChristoph Hellwig mutex_unlock(&bdev->bd_holder_lock);
1245fd146410SJan Kara sync_blockdev(bdev);
1246fd146410SJan Kara }
1247d8530de5SChristoph Hellwig
12480dca4462SChristoph Hellwig invalidate_bdev(bdev);
12490dca4462SChristoph Hellwig }
1250560e20e4SChristoph Hellwig /*
12516e57236eSChristoph Hellwig * New drivers should not use this directly. There are some drivers however
12526e57236eSChristoph Hellwig * that needs this for historical reasons. For example, the DASD driver has
12536e57236eSChristoph Hellwig * historically had a shutdown to offline mode that doesn't actually remove the
12546e57236eSChristoph Hellwig * gendisk that otherwise looks a lot like a safe device removal.
1255560e20e4SChristoph Hellwig */
1256560e20e4SChristoph Hellwig EXPORT_SYMBOL_GPL(bdev_mark_dead);
12570dca4462SChristoph Hellwig
sync_bdevs(bool wait)12581e03a36bSChristoph Hellwig void sync_bdevs(bool wait)
12590dca4462SChristoph Hellwig {
12600dca4462SChristoph Hellwig struct inode *inode, *old_inode = NULL;
12610dca4462SChristoph Hellwig
12620dca4462SChristoph Hellwig spin_lock(&blockdev_superblock->s_inode_list_lock);
12630dca4462SChristoph Hellwig list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
12640dca4462SChristoph Hellwig struct address_space *mapping = inode->i_mapping;
12650dca4462SChristoph Hellwig struct block_device *bdev;
12660dca4462SChristoph Hellwig
12670dca4462SChristoph Hellwig spin_lock(&inode->i_lock);
12680dca4462SChristoph Hellwig if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
12690dca4462SChristoph Hellwig mapping->nrpages == 0) {
12700dca4462SChristoph Hellwig spin_unlock(&inode->i_lock);
12710dca4462SChristoph Hellwig continue;
12720dca4462SChristoph Hellwig }
12730dca4462SChristoph Hellwig __iget(inode);
12740dca4462SChristoph Hellwig spin_unlock(&inode->i_lock);
12750dca4462SChristoph Hellwig spin_unlock(&blockdev_superblock->s_inode_list_lock);
12760dca4462SChristoph Hellwig /*
12770dca4462SChristoph Hellwig * We hold a reference to 'inode' so it couldn't have been
12780dca4462SChristoph Hellwig * removed from s_inodes list while we dropped the
12790dca4462SChristoph Hellwig * s_inode_list_lock We cannot iput the inode now as we can
12800dca4462SChristoph Hellwig * be holding the last reference and we cannot iput it under
12810dca4462SChristoph Hellwig * s_inode_list_lock. So we keep the reference and iput it
12820dca4462SChristoph Hellwig * later.
12830dca4462SChristoph Hellwig */
12840dca4462SChristoph Hellwig iput(old_inode);
12850dca4462SChristoph Hellwig old_inode = inode;
12860dca4462SChristoph Hellwig bdev = I_BDEV(inode);
12870dca4462SChristoph Hellwig
12880dca4462SChristoph Hellwig mutex_lock(&bdev->bd_disk->open_mutex);
12899acf381fSChristoph Hellwig if (!atomic_read(&bdev->bd_openers)) {
12901e03a36bSChristoph Hellwig ; /* skip */
12911e03a36bSChristoph Hellwig } else if (wait) {
12921e03a36bSChristoph Hellwig /*
12931e03a36bSChristoph Hellwig * We keep the error status of individual mapping so
12941e03a36bSChristoph Hellwig * that applications can catch the writeback error using
12951e03a36bSChristoph Hellwig * fsync(2). See filemap_fdatawait_keep_errors() for
12961e03a36bSChristoph Hellwig * details.
12971e03a36bSChristoph Hellwig */
12981e03a36bSChristoph Hellwig filemap_fdatawait_keep_errors(inode->i_mapping);
12991e03a36bSChristoph Hellwig } else {
13001e03a36bSChristoph Hellwig filemap_fdatawrite(inode->i_mapping);
13011e03a36bSChristoph Hellwig }
13020dca4462SChristoph Hellwig mutex_unlock(&bdev->bd_disk->open_mutex);
13030dca4462SChristoph Hellwig
13040dca4462SChristoph Hellwig spin_lock(&blockdev_superblock->s_inode_list_lock);
13050dca4462SChristoph Hellwig }
13060dca4462SChristoph Hellwig spin_unlock(&blockdev_superblock->s_inode_list_lock);
13070dca4462SChristoph Hellwig iput(old_inode);
13080dca4462SChristoph Hellwig }
13092d985f8cSEric Biggers
13102d985f8cSEric Biggers /*
13119abcfbd2SPrasad Singamsetty * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
13122d985f8cSEric Biggers */
bdev_statx(const struct path * path,struct kstat * stat,u32 request_mask)13133e781988SLinus Torvalds void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
13149abcfbd2SPrasad Singamsetty {
13152d985f8cSEric Biggers struct block_device *bdev;
13162d985f8cSEric Biggers
13172d985f8cSEric Biggers /*
13189abcfbd2SPrasad Singamsetty * Note that d_backing_inode() returns the block device node inode, not
1319d13b7090SChristoph Hellwig * the block device's internal inode. Therefore it is *not* valid to
1320d13b7090SChristoph Hellwig * use I_BDEV() here; the block device has to be looked up by i_rdev
1321d13b7090SChristoph Hellwig * instead.
13229abcfbd2SPrasad Singamsetty */
13239abcfbd2SPrasad Singamsetty bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
1324*5f33b522SChristoph Hellwig if (!bdev)
13252d985f8cSEric Biggers return;
13262d985f8cSEric Biggers
13272d985f8cSEric Biggers if (request_mask & STATX_DIOALIGN) {
13289abcfbd2SPrasad Singamsetty stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
13292d985f8cSEric Biggers stat->dio_offset_align = bdev_logical_block_size(bdev);
13302d985f8cSEric Biggers stat->result_mask |= STATX_DIOALIGN;
13312d985f8cSEric Biggers }
13329abcfbd2SPrasad Singamsetty
13339abcfbd2SPrasad Singamsetty if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
13349abcfbd2SPrasad Singamsetty struct request_queue *bd_queue = bdev->bd_queue;
13359abcfbd2SPrasad Singamsetty
13369abcfbd2SPrasad Singamsetty generic_fill_statx_atomic_writes(stat,
13379abcfbd2SPrasad Singamsetty queue_atomic_write_unit_min_bytes(bd_queue),
13389abcfbd2SPrasad Singamsetty queue_atomic_write_unit_max_bytes(bd_queue));
13399abcfbd2SPrasad Singamsetty }
13409abcfbd2SPrasad Singamsetty
13412d985f8cSEric Biggers stat->blksize = bdev_io_min(bdev);
1342425fbcd6SLuis Chamberlain
1343425fbcd6SLuis Chamberlain blkdev_put_no_open(bdev);
13442d985f8cSEric Biggers }
13452d985f8cSEric Biggers
disk_live(struct gendisk * disk)1346ed5cc702SJan Kara bool disk_live(struct gendisk *disk)
1347186ddac2SYu Kuai {
1348186ddac2SYu Kuai return !inode_unhashed(BD_INODE(disk->part0));
1349df65f166SAl Viro }
1350186ddac2SYu Kuai EXPORT_SYMBOL_GPL(disk_live);
1351186ddac2SYu Kuai
block_size(struct block_device * bdev)1352186ddac2SYu Kuai unsigned int block_size(struct block_device *bdev)
1353186ddac2SYu Kuai {
1354186ddac2SYu Kuai return 1 << BD_INODE(bdev)->i_blkbits;
1355df65f166SAl Viro }
1356186ddac2SYu Kuai EXPORT_SYMBOL_GPL(block_size);
1357186ddac2SYu Kuai
setup_bdev_allow_write_mounted(char * str)1358186ddac2SYu Kuai static int __init setup_bdev_allow_write_mounted(char *str)
1359ed5cc702SJan Kara {
1360ed5cc702SJan Kara if (kstrtobool(str, &bdev_allow_write_mounted))
1361ed5cc702SJan Kara pr_warn("Invalid option string for bdev_allow_write_mounted:"
1362ed5cc702SJan Kara " '%s'\n", str);
1363ed5cc702SJan Kara return 1;
1364ed5cc702SJan Kara }
1365ed5cc702SJan Kara __setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);
1366ed5cc702SJan Kara