1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * include/linux/userfaultfd_k.h 4 * 5 * Copyright (C) 2015 Red Hat, Inc. 6 * 7 */ 8 9 #ifndef _LINUX_USERFAULTFD_K_H 10 #define _LINUX_USERFAULTFD_K_H 11 12 #ifdef CONFIG_USERFAULTFD 13 14 #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ 15 16 #include <linux/fcntl.h> 17 #include <linux/mm.h> 18 #include <linux/swap.h> 19 #include <linux/swapops.h> 20 #include <asm-generic/pgtable_uffd.h> 21 #include <linux/hugetlb_inline.h> 22 23 /* The set of all possible UFFD-related VM flags. */ 24 #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) 25 26 /* 27 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining 28 * new flags, since they might collide with O_* ones. We want 29 * to re-use O_* flags that couldn't possibly have a meaning 30 * from userfaultfd, in order to leave a free define-space for 31 * shared O_* flags. 32 */ 33 #define UFFD_CLOEXEC O_CLOEXEC 34 #define UFFD_NONBLOCK O_NONBLOCK 35 36 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) 37 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) 38 39 /* 40 * Start with fault_pending_wqh and fault_wqh so they're more likely 41 * to be in the same cacheline. 42 * 43 * Locking order: 44 * fd_wqh.lock 45 * fault_pending_wqh.lock 46 * fault_wqh.lock 47 * event_wqh.lock 48 * 49 * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, 50 * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's 51 * also taken in IRQ context. 52 */ 53 struct userfaultfd_ctx { 54 /* waitqueue head for the pending (i.e. not read) userfaults */ 55 wait_queue_head_t fault_pending_wqh; 56 /* waitqueue head for the userfaults */ 57 wait_queue_head_t fault_wqh; 58 /* waitqueue head for the pseudo fd to wakeup poll/read */ 59 wait_queue_head_t fd_wqh; 60 /* waitqueue head for events */ 61 wait_queue_head_t event_wqh; 62 /* a refile sequence protected by fault_pending_wqh lock */ 63 seqcount_spinlock_t refile_seq; 64 /* pseudo fd refcounting */ 65 refcount_t refcount; 66 /* userfaultfd syscall flags */ 67 unsigned int flags; 68 /* features requested from the userspace */ 69 unsigned int features; 70 /* released */ 71 bool released; 72 /* memory mappings are changing because of non-cooperative event */ 73 atomic_t mmap_changing; 74 /* mm with one ore more vmas attached to this userfaultfd_ctx */ 75 struct mm_struct *mm; 76 }; 77 78 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); 79 80 /* A combined operation mode + behavior flags. */ 81 typedef unsigned int __bitwise uffd_flags_t; 82 83 /* Mutually exclusive modes of operation. */ 84 enum mfill_atomic_mode { 85 MFILL_ATOMIC_COPY, 86 MFILL_ATOMIC_ZEROPAGE, 87 MFILL_ATOMIC_CONTINUE, 88 MFILL_ATOMIC_POISON, 89 NR_MFILL_ATOMIC_MODES, 90 }; 91 92 #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) 93 #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) 94 #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) 95 #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) 96 97 static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) 98 { 99 return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); 100 } 101 102 static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) 103 { 104 flags &= ~MFILL_ATOMIC_MODE_MASK; 105 return flags | ((__force uffd_flags_t) mode); 106 } 107 108 /* Flags controlling behavior. These behavior changes are mode-independent. */ 109 #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) 110 111 extern int mfill_atomic_install_pte(pmd_t *dst_pmd, 112 struct vm_area_struct *dst_vma, 113 unsigned long dst_addr, struct page *page, 114 bool newly_allocated, uffd_flags_t flags); 115 116 extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, 117 unsigned long src_start, unsigned long len, 118 atomic_t *mmap_changing, uffd_flags_t flags); 119 extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, 120 unsigned long dst_start, 121 unsigned long len, 122 atomic_t *mmap_changing); 123 extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, 124 unsigned long len, atomic_t *mmap_changing, 125 uffd_flags_t flags); 126 extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, 127 unsigned long len, atomic_t *mmap_changing, 128 uffd_flags_t flags); 129 extern int mwriteprotect_range(struct mm_struct *dst_mm, 130 unsigned long start, unsigned long len, 131 bool enable_wp, atomic_t *mmap_changing); 132 extern long uffd_wp_range(struct vm_area_struct *vma, 133 unsigned long start, unsigned long len, bool enable_wp); 134 135 /* move_pages */ 136 void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); 137 void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); 138 ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm, 139 unsigned long dst_start, unsigned long src_start, 140 unsigned long len, __u64 flags); 141 int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, 142 struct vm_area_struct *dst_vma, 143 struct vm_area_struct *src_vma, 144 unsigned long dst_addr, unsigned long src_addr); 145 146 /* mm helpers */ 147 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 148 struct vm_userfaultfd_ctx vm_ctx) 149 { 150 return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; 151 } 152 153 /* 154 * Never enable huge pmd sharing on some uffd registered vmas: 155 * 156 * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. 157 * 158 * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for 159 * VMAs which share huge pmds. (If you have two mappings to the same 160 * underlying pages, and fault in the non-UFFD-registered one with a write, 161 * with huge pmd sharing this would *also* setup the second UFFD-registered 162 * mapping, and we'd not get minor faults.) 163 */ 164 static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) 165 { 166 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 167 } 168 169 /* 170 * Don't do fault around for either WP or MINOR registered uffd range. For 171 * MINOR registered range, fault around will be a total disaster and ptes can 172 * be installed without notifications; for WP it should mostly be fine as long 173 * as the fault around checks for pte_none() before the installation, however 174 * to be super safe we just forbid it. 175 */ 176 static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 177 { 178 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 179 } 180 181 static inline bool userfaultfd_missing(struct vm_area_struct *vma) 182 { 183 return vma->vm_flags & VM_UFFD_MISSING; 184 } 185 186 static inline bool userfaultfd_wp(struct vm_area_struct *vma) 187 { 188 return vma->vm_flags & VM_UFFD_WP; 189 } 190 191 static inline bool userfaultfd_minor(struct vm_area_struct *vma) 192 { 193 return vma->vm_flags & VM_UFFD_MINOR; 194 } 195 196 static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 197 pte_t pte) 198 { 199 return userfaultfd_wp(vma) && pte_uffd_wp(pte); 200 } 201 202 static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 203 pmd_t pmd) 204 { 205 return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); 206 } 207 208 static inline bool userfaultfd_armed(struct vm_area_struct *vma) 209 { 210 return vma->vm_flags & __VM_UFFD_FLAGS; 211 } 212 213 static inline bool vma_can_userfault(struct vm_area_struct *vma, 214 unsigned long vm_flags, 215 bool wp_async) 216 { 217 vm_flags &= __VM_UFFD_FLAGS; 218 219 if ((vm_flags & VM_UFFD_MINOR) && 220 (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) 221 return false; 222 223 /* 224 * If wp async enabled, and WP is the only mode enabled, allow any 225 * memory type. 226 */ 227 if (wp_async && (vm_flags == VM_UFFD_WP)) 228 return true; 229 230 #ifndef CONFIG_PTE_MARKER_UFFD_WP 231 /* 232 * If user requested uffd-wp but not enabled pte markers for 233 * uffd-wp, then shmem & hugetlbfs are not supported but only 234 * anonymous. 235 */ 236 if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) 237 return false; 238 #endif 239 240 /* By default, allow any of anon|shmem|hugetlb */ 241 return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || 242 vma_is_shmem(vma); 243 } 244 245 extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); 246 extern void dup_userfaultfd_complete(struct list_head *); 247 248 extern void mremap_userfaultfd_prep(struct vm_area_struct *, 249 struct vm_userfaultfd_ctx *); 250 extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, 251 unsigned long from, unsigned long to, 252 unsigned long len); 253 254 extern bool userfaultfd_remove(struct vm_area_struct *vma, 255 unsigned long start, 256 unsigned long end); 257 258 extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, 259 unsigned long start, unsigned long end, struct list_head *uf); 260 extern void userfaultfd_unmap_complete(struct mm_struct *mm, 261 struct list_head *uf); 262 extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); 263 extern bool userfaultfd_wp_async(struct vm_area_struct *vma); 264 265 #else /* CONFIG_USERFAULTFD */ 266 267 /* mm helpers */ 268 static inline vm_fault_t handle_userfault(struct vm_fault *vmf, 269 unsigned long reason) 270 { 271 return VM_FAULT_SIGBUS; 272 } 273 274 static inline long uffd_wp_range(struct vm_area_struct *vma, 275 unsigned long start, unsigned long len, 276 bool enable_wp) 277 { 278 return false; 279 } 280 281 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 282 struct vm_userfaultfd_ctx vm_ctx) 283 { 284 return true; 285 } 286 287 static inline bool userfaultfd_missing(struct vm_area_struct *vma) 288 { 289 return false; 290 } 291 292 static inline bool userfaultfd_wp(struct vm_area_struct *vma) 293 { 294 return false; 295 } 296 297 static inline bool userfaultfd_minor(struct vm_area_struct *vma) 298 { 299 return false; 300 } 301 302 static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 303 pte_t pte) 304 { 305 return false; 306 } 307 308 static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 309 pmd_t pmd) 310 { 311 return false; 312 } 313 314 315 static inline bool userfaultfd_armed(struct vm_area_struct *vma) 316 { 317 return false; 318 } 319 320 static inline int dup_userfaultfd(struct vm_area_struct *vma, 321 struct list_head *l) 322 { 323 return 0; 324 } 325 326 static inline void dup_userfaultfd_complete(struct list_head *l) 327 { 328 } 329 330 static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, 331 struct vm_userfaultfd_ctx *ctx) 332 { 333 } 334 335 static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, 336 unsigned long from, 337 unsigned long to, 338 unsigned long len) 339 { 340 } 341 342 static inline bool userfaultfd_remove(struct vm_area_struct *vma, 343 unsigned long start, 344 unsigned long end) 345 { 346 return true; 347 } 348 349 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 350 unsigned long start, unsigned long end, 351 struct list_head *uf) 352 { 353 return 0; 354 } 355 356 static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 357 struct list_head *uf) 358 { 359 } 360 361 static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 362 { 363 return false; 364 } 365 366 static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) 367 { 368 return false; 369 } 370 371 static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) 372 { 373 return false; 374 } 375 376 #endif /* CONFIG_USERFAULTFD */ 377 378 static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 379 { 380 /* Only wr-protect mode uses pte markers */ 381 if (!userfaultfd_wp(vma)) 382 return false; 383 384 /* File-based uffd-wp always need markers */ 385 if (!vma_is_anonymous(vma)) 386 return true; 387 388 /* 389 * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED 390 * enabled (to apply markers on zero pages). 391 */ 392 return userfaultfd_wp_unpopulated(vma); 393 } 394 395 static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) 396 { 397 #ifdef CONFIG_PTE_MARKER_UFFD_WP 398 return is_pte_marker_entry(entry) && 399 (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); 400 #else 401 return false; 402 #endif 403 } 404 405 static inline bool pte_marker_uffd_wp(pte_t pte) 406 { 407 #ifdef CONFIG_PTE_MARKER_UFFD_WP 408 swp_entry_t entry; 409 410 if (!is_swap_pte(pte)) 411 return false; 412 413 entry = pte_to_swp_entry(pte); 414 415 return pte_marker_entry_uffd_wp(entry); 416 #else 417 return false; 418 #endif 419 } 420 421 /* 422 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 423 * forms (pte marker or a normal swap pte), false otherwise. 424 */ 425 static inline bool pte_swp_uffd_wp_any(pte_t pte) 426 { 427 #ifdef CONFIG_PTE_MARKER_UFFD_WP 428 if (!is_swap_pte(pte)) 429 return false; 430 431 if (pte_swp_uffd_wp(pte)) 432 return true; 433 434 if (pte_marker_uffd_wp(pte)) 435 return true; 436 #endif 437 return false; 438 } 439 440 #endif /* _LINUX_USERFAULTFD_K_H */ 441