xref: /linux-6.15/include/linux/dax.h (revision bbb03029)
1 #ifndef _LINUX_DAX_H
2 #define _LINUX_DAX_H
3 
4 #include <linux/fs.h>
5 #include <linux/mm.h>
6 #include <linux/radix-tree.h>
7 #include <asm/pgtable.h>
8 
9 struct iomap_ops;
10 struct dax_device;
11 struct dax_operations {
12 	/*
13 	 * direct_access: translate a device-relative
14 	 * logical-page-offset into an absolute physical pfn. Return the
15 	 * number of pages available for DAX at that pfn.
16 	 */
17 	long (*direct_access)(struct dax_device *, pgoff_t, long,
18 			void **, pfn_t *);
19 	/* copy_from_iter: required operation for fs-dax direct-i/o */
20 	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
21 			struct iov_iter *);
22 	/* flush: optional driver-specific cache management after writes */
23 	void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
24 };
25 
26 extern struct attribute_group dax_attribute_group;
27 
28 #if IS_ENABLED(CONFIG_DAX)
29 struct dax_device *dax_get_by_host(const char *host);
30 void put_dax(struct dax_device *dax_dev);
31 #else
32 static inline struct dax_device *dax_get_by_host(const char *host)
33 {
34 	return NULL;
35 }
36 
37 static inline void put_dax(struct dax_device *dax_dev)
38 {
39 }
40 #endif
41 
42 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
43 #if IS_ENABLED(CONFIG_FS_DAX)
44 int __bdev_dax_supported(struct super_block *sb, int blocksize);
45 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
46 {
47 	return __bdev_dax_supported(sb, blocksize);
48 }
49 
50 static inline struct dax_device *fs_dax_get_by_host(const char *host)
51 {
52 	return dax_get_by_host(host);
53 }
54 
55 static inline void fs_put_dax(struct dax_device *dax_dev)
56 {
57 	put_dax(dax_dev);
58 }
59 
60 #else
61 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
62 {
63 	return -EOPNOTSUPP;
64 }
65 
66 static inline struct dax_device *fs_dax_get_by_host(const char *host)
67 {
68 	return NULL;
69 }
70 
71 static inline void fs_put_dax(struct dax_device *dax_dev)
72 {
73 }
74 #endif
75 
76 int dax_read_lock(void);
77 void dax_read_unlock(int id);
78 struct dax_device *alloc_dax(void *private, const char *host,
79 		const struct dax_operations *ops);
80 bool dax_alive(struct dax_device *dax_dev);
81 void kill_dax(struct dax_device *dax_dev);
82 void *dax_get_private(struct dax_device *dax_dev);
83 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
84 		void **kaddr, pfn_t *pfn);
85 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
86 		size_t bytes, struct iov_iter *i);
87 void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
88 		size_t size);
89 void dax_write_cache(struct dax_device *dax_dev, bool wc);
90 bool dax_write_cache_enabled(struct dax_device *dax_dev);
91 
92 /*
93  * We use lowest available bit in exceptional entry for locking, one bit for
94  * the entry size (PMD) and two more to tell us if the entry is a huge zero
95  * page (HZP) or an empty entry that is just used for locking.  In total four
96  * special bits.
97  *
98  * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
99  * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
100  * block allocation.
101  */
102 #define RADIX_DAX_SHIFT	(RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
103 #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
104 #define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
105 #define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
106 #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
107 
108 static inline unsigned long dax_radix_sector(void *entry)
109 {
110 	return (unsigned long)entry >> RADIX_DAX_SHIFT;
111 }
112 
113 static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
114 {
115 	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
116 			((unsigned long)sector << RADIX_DAX_SHIFT) |
117 			RADIX_DAX_ENTRY_LOCK);
118 }
119 
120 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
121 		const struct iomap_ops *ops);
122 int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
123 		    const struct iomap_ops *ops);
124 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
125 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
126 				      pgoff_t index);
127 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
128 		pgoff_t index, void *entry, bool wake_all);
129 
130 #ifdef CONFIG_FS_DAX
131 int __dax_zero_page_range(struct block_device *bdev,
132 		struct dax_device *dax_dev, sector_t sector,
133 		unsigned int offset, unsigned int length);
134 #else
135 static inline int __dax_zero_page_range(struct block_device *bdev,
136 		struct dax_device *dax_dev, sector_t sector,
137 		unsigned int offset, unsigned int length)
138 {
139 	return -ENXIO;
140 }
141 #endif
142 
143 #ifdef CONFIG_FS_DAX_PMD
144 static inline unsigned int dax_radix_order(void *entry)
145 {
146 	if ((unsigned long)entry & RADIX_DAX_PMD)
147 		return PMD_SHIFT - PAGE_SHIFT;
148 	return 0;
149 }
150 #else
151 static inline unsigned int dax_radix_order(void *entry)
152 {
153 	return 0;
154 }
155 #endif
156 int dax_pfn_mkwrite(struct vm_fault *vmf);
157 
158 static inline bool dax_mapping(struct address_space *mapping)
159 {
160 	return mapping->host && IS_DAX(mapping->host);
161 }
162 
163 struct writeback_control;
164 int dax_writeback_mapping_range(struct address_space *mapping,
165 		struct block_device *bdev, struct writeback_control *wbc);
166 #endif
167