1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_MMU_NOTIFIER_H 3 #define _LINUX_MMU_NOTIFIER_H 4 5 #include <linux/list.h> 6 #include <linux/spinlock.h> 7 #include <linux/mm_types.h> 8 #include <linux/srcu.h> 9 10 struct mmu_notifier; 11 struct mmu_notifier_ops; 12 13 #ifdef CONFIG_MMU_NOTIFIER 14 15 /* 16 * The mmu notifier_mm structure is allocated and installed in 17 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected 18 * critical section and it's released only when mm_count reaches zero 19 * in mmdrop(). 20 */ 21 struct mmu_notifier_mm { 22 /* all mmu notifiers registerd in this mm are queued in this list */ 23 struct hlist_head list; 24 /* to serialize the list modifications and hlist_unhashed */ 25 spinlock_t lock; 26 }; 27 28 struct mmu_notifier_ops { 29 /* 30 * Called either by mmu_notifier_unregister or when the mm is 31 * being destroyed by exit_mmap, always before all pages are 32 * freed. This can run concurrently with other mmu notifier 33 * methods (the ones invoked outside the mm context) and it 34 * should tear down all secondary mmu mappings and freeze the 35 * secondary mmu. If this method isn't implemented you've to 36 * be sure that nothing could possibly write to the pages 37 * through the secondary mmu by the time the last thread with 38 * tsk->mm == mm exits. 39 * 40 * As side note: the pages freed after ->release returns could 41 * be immediately reallocated by the gart at an alias physical 42 * address with a different cache model, so if ->release isn't 43 * implemented because all _software_ driven memory accesses 44 * through the secondary mmu are terminated by the time the 45 * last thread of this mm quits, you've also to be sure that 46 * speculative _hardware_ operations can't allocate dirty 47 * cachelines in the cpu that could not be snooped and made 48 * coherent with the other read and write operations happening 49 * through the gart alias address, so leading to memory 50 * corruption. 51 */ 52 void (*release)(struct mmu_notifier *mn, 53 struct mm_struct *mm); 54 55 /* 56 * clear_flush_young is called after the VM is 57 * test-and-clearing the young/accessed bitflag in the 58 * pte. This way the VM will provide proper aging to the 59 * accesses to the page through the secondary MMUs and not 60 * only to the ones through the Linux pte. 61 * Start-end is necessary in case the secondary MMU is mapping the page 62 * at a smaller granularity than the primary MMU. 63 */ 64 int (*clear_flush_young)(struct mmu_notifier *mn, 65 struct mm_struct *mm, 66 unsigned long start, 67 unsigned long end); 68 69 /* 70 * clear_young is a lightweight version of clear_flush_young. Like the 71 * latter, it is supposed to test-and-clear the young/accessed bitflag 72 * in the secondary pte, but it may omit flushing the secondary tlb. 73 */ 74 int (*clear_young)(struct mmu_notifier *mn, 75 struct mm_struct *mm, 76 unsigned long start, 77 unsigned long end); 78 79 /* 80 * test_young is called to check the young/accessed bitflag in 81 * the secondary pte. This is used to know if the page is 82 * frequently used without actually clearing the flag or tearing 83 * down the secondary mapping on the page. 84 */ 85 int (*test_young)(struct mmu_notifier *mn, 86 struct mm_struct *mm, 87 unsigned long address); 88 89 /* 90 * change_pte is called in cases that pte mapping to page is changed: 91 * for example, when ksm remaps pte to point to a new shared page. 92 */ 93 void (*change_pte)(struct mmu_notifier *mn, 94 struct mm_struct *mm, 95 unsigned long address, 96 pte_t pte); 97 98 /* 99 * invalidate_range_start() and invalidate_range_end() must be 100 * paired and are called only when the mmap_sem and/or the 101 * locks protecting the reverse maps are held. If the subsystem 102 * can't guarantee that no additional references are taken to 103 * the pages in the range, it has to implement the 104 * invalidate_range() notifier to remove any references taken 105 * after invalidate_range_start(). 106 * 107 * Invalidation of multiple concurrent ranges may be 108 * optionally permitted by the driver. Either way the 109 * establishment of sptes is forbidden in the range passed to 110 * invalidate_range_begin/end for the whole duration of the 111 * invalidate_range_begin/end critical section. 112 * 113 * invalidate_range_start() is called when all pages in the 114 * range are still mapped and have at least a refcount of one. 115 * 116 * invalidate_range_end() is called when all pages in the 117 * range have been unmapped and the pages have been freed by 118 * the VM. 119 * 120 * The VM will remove the page table entries and potentially 121 * the page between invalidate_range_start() and 122 * invalidate_range_end(). If the page must not be freed 123 * because of pending I/O or other circumstances then the 124 * invalidate_range_start() callback (or the initial mapping 125 * by the driver) must make sure that the refcount is kept 126 * elevated. 127 * 128 * If the driver increases the refcount when the pages are 129 * initially mapped into an address space then either 130 * invalidate_range_start() or invalidate_range_end() may 131 * decrease the refcount. If the refcount is decreased on 132 * invalidate_range_start() then the VM can free pages as page 133 * table entries are removed. If the refcount is only 134 * droppped on invalidate_range_end() then the driver itself 135 * will drop the last refcount but it must take care to flush 136 * any secondary tlb before doing the final free on the 137 * page. Pages will no longer be referenced by the linux 138 * address space but may still be referenced by sptes until 139 * the last refcount is dropped. 140 * 141 * If blockable argument is set to false then the callback cannot 142 * sleep and has to return with -EAGAIN. 0 should be returned 143 * otherwise. Please note that if invalidate_range_start approves 144 * a non-blocking behavior then the same applies to 145 * invalidate_range_end. 146 * 147 */ 148 int (*invalidate_range_start)(struct mmu_notifier *mn, 149 struct mm_struct *mm, 150 unsigned long start, unsigned long end, 151 bool blockable); 152 void (*invalidate_range_end)(struct mmu_notifier *mn, 153 struct mm_struct *mm, 154 unsigned long start, unsigned long end); 155 156 /* 157 * invalidate_range() is either called between 158 * invalidate_range_start() and invalidate_range_end() when the 159 * VM has to free pages that where unmapped, but before the 160 * pages are actually freed, or outside of _start()/_end() when 161 * a (remote) TLB is necessary. 162 * 163 * If invalidate_range() is used to manage a non-CPU TLB with 164 * shared page-tables, it not necessary to implement the 165 * invalidate_range_start()/end() notifiers, as 166 * invalidate_range() alread catches the points in time when an 167 * external TLB range needs to be flushed. For more in depth 168 * discussion on this see Documentation/vm/mmu_notifier.rst 169 * 170 * Note that this function might be called with just a sub-range 171 * of what was passed to invalidate_range_start()/end(), if 172 * called between those functions. 173 */ 174 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, 175 unsigned long start, unsigned long end); 176 }; 177 178 /* 179 * The notifier chains are protected by mmap_sem and/or the reverse map 180 * semaphores. Notifier chains are only changed when all reverse maps and 181 * the mmap_sem locks are taken. 182 * 183 * Therefore notifier chains can only be traversed when either 184 * 185 * 1. mmap_sem is held. 186 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). 187 * 3. No other concurrent thread can access the list (release) 188 */ 189 struct mmu_notifier { 190 struct hlist_node hlist; 191 const struct mmu_notifier_ops *ops; 192 }; 193 194 static inline int mm_has_notifiers(struct mm_struct *mm) 195 { 196 return unlikely(mm->mmu_notifier_mm); 197 } 198 199 extern int mmu_notifier_register(struct mmu_notifier *mn, 200 struct mm_struct *mm); 201 extern int __mmu_notifier_register(struct mmu_notifier *mn, 202 struct mm_struct *mm); 203 extern void mmu_notifier_unregister(struct mmu_notifier *mn, 204 struct mm_struct *mm); 205 extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 206 struct mm_struct *mm); 207 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 208 extern void __mmu_notifier_release(struct mm_struct *mm); 209 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 210 unsigned long start, 211 unsigned long end); 212 extern int __mmu_notifier_clear_young(struct mm_struct *mm, 213 unsigned long start, 214 unsigned long end); 215 extern int __mmu_notifier_test_young(struct mm_struct *mm, 216 unsigned long address); 217 extern void __mmu_notifier_change_pte(struct mm_struct *mm, 218 unsigned long address, pte_t pte); 219 extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 220 unsigned long start, unsigned long end, 221 bool blockable); 222 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 223 unsigned long start, unsigned long end, 224 bool only_end); 225 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, 226 unsigned long start, unsigned long end); 227 228 static inline void mmu_notifier_release(struct mm_struct *mm) 229 { 230 if (mm_has_notifiers(mm)) 231 __mmu_notifier_release(mm); 232 } 233 234 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 235 unsigned long start, 236 unsigned long end) 237 { 238 if (mm_has_notifiers(mm)) 239 return __mmu_notifier_clear_flush_young(mm, start, end); 240 return 0; 241 } 242 243 static inline int mmu_notifier_clear_young(struct mm_struct *mm, 244 unsigned long start, 245 unsigned long end) 246 { 247 if (mm_has_notifiers(mm)) 248 return __mmu_notifier_clear_young(mm, start, end); 249 return 0; 250 } 251 252 static inline int mmu_notifier_test_young(struct mm_struct *mm, 253 unsigned long address) 254 { 255 if (mm_has_notifiers(mm)) 256 return __mmu_notifier_test_young(mm, address); 257 return 0; 258 } 259 260 static inline void mmu_notifier_change_pte(struct mm_struct *mm, 261 unsigned long address, pte_t pte) 262 { 263 if (mm_has_notifiers(mm)) 264 __mmu_notifier_change_pte(mm, address, pte); 265 } 266 267 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 268 unsigned long start, unsigned long end) 269 { 270 if (mm_has_notifiers(mm)) 271 __mmu_notifier_invalidate_range_start(mm, start, end, true); 272 } 273 274 static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, 275 unsigned long start, unsigned long end) 276 { 277 if (mm_has_notifiers(mm)) 278 return __mmu_notifier_invalidate_range_start(mm, start, end, false); 279 return 0; 280 } 281 282 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 283 unsigned long start, unsigned long end) 284 { 285 if (mm_has_notifiers(mm)) 286 __mmu_notifier_invalidate_range_end(mm, start, end, false); 287 } 288 289 static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, 290 unsigned long start, unsigned long end) 291 { 292 if (mm_has_notifiers(mm)) 293 __mmu_notifier_invalidate_range_end(mm, start, end, true); 294 } 295 296 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 297 unsigned long start, unsigned long end) 298 { 299 if (mm_has_notifiers(mm)) 300 __mmu_notifier_invalidate_range(mm, start, end); 301 } 302 303 static inline void mmu_notifier_mm_init(struct mm_struct *mm) 304 { 305 mm->mmu_notifier_mm = NULL; 306 } 307 308 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 309 { 310 if (mm_has_notifiers(mm)) 311 __mmu_notifier_mm_destroy(mm); 312 } 313 314 #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 315 ({ \ 316 int __young; \ 317 struct vm_area_struct *___vma = __vma; \ 318 unsigned long ___address = __address; \ 319 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 320 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 321 ___address, \ 322 ___address + \ 323 PAGE_SIZE); \ 324 __young; \ 325 }) 326 327 #define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ 328 ({ \ 329 int __young; \ 330 struct vm_area_struct *___vma = __vma; \ 331 unsigned long ___address = __address; \ 332 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 333 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 334 ___address, \ 335 ___address + \ 336 PMD_SIZE); \ 337 __young; \ 338 }) 339 340 #define ptep_clear_young_notify(__vma, __address, __ptep) \ 341 ({ \ 342 int __young; \ 343 struct vm_area_struct *___vma = __vma; \ 344 unsigned long ___address = __address; \ 345 __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ 346 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 347 ___address + PAGE_SIZE); \ 348 __young; \ 349 }) 350 351 #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ 352 ({ \ 353 int __young; \ 354 struct vm_area_struct *___vma = __vma; \ 355 unsigned long ___address = __address; \ 356 __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ 357 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 358 ___address + PMD_SIZE); \ 359 __young; \ 360 }) 361 362 #define ptep_clear_flush_notify(__vma, __address, __ptep) \ 363 ({ \ 364 unsigned long ___addr = __address & PAGE_MASK; \ 365 struct mm_struct *___mm = (__vma)->vm_mm; \ 366 pte_t ___pte; \ 367 \ 368 ___pte = ptep_clear_flush(__vma, __address, __ptep); \ 369 mmu_notifier_invalidate_range(___mm, ___addr, \ 370 ___addr + PAGE_SIZE); \ 371 \ 372 ___pte; \ 373 }) 374 375 #define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ 376 ({ \ 377 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 378 struct mm_struct *___mm = (__vma)->vm_mm; \ 379 pmd_t ___pmd; \ 380 \ 381 ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ 382 mmu_notifier_invalidate_range(___mm, ___haddr, \ 383 ___haddr + HPAGE_PMD_SIZE); \ 384 \ 385 ___pmd; \ 386 }) 387 388 #define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ 389 ({ \ 390 unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ 391 struct mm_struct *___mm = (__vma)->vm_mm; \ 392 pud_t ___pud; \ 393 \ 394 ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ 395 mmu_notifier_invalidate_range(___mm, ___haddr, \ 396 ___haddr + HPAGE_PUD_SIZE); \ 397 \ 398 ___pud; \ 399 }) 400 401 /* 402 * set_pte_at_notify() sets the pte _after_ running the notifier. 403 * This is safe to start by updating the secondary MMUs, because the primary MMU 404 * pte invalidate must have already happened with a ptep_clear_flush() before 405 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is 406 * required when we change both the protection of the mapping from read-only to 407 * read-write and the pfn (like during copy on write page faults). Otherwise the 408 * old page would remain mapped readonly in the secondary MMUs after the new 409 * page is already writable by some CPU through the primary MMU. 410 */ 411 #define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 412 ({ \ 413 struct mm_struct *___mm = __mm; \ 414 unsigned long ___address = __address; \ 415 pte_t ___pte = __pte; \ 416 \ 417 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 418 set_pte_at(___mm, ___address, __ptep, ___pte); \ 419 }) 420 421 extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 422 void (*func)(struct rcu_head *rcu)); 423 extern void mmu_notifier_synchronize(void); 424 425 #else /* CONFIG_MMU_NOTIFIER */ 426 427 static inline int mm_has_notifiers(struct mm_struct *mm) 428 { 429 return 0; 430 } 431 432 static inline void mmu_notifier_release(struct mm_struct *mm) 433 { 434 } 435 436 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 437 unsigned long start, 438 unsigned long end) 439 { 440 return 0; 441 } 442 443 static inline int mmu_notifier_test_young(struct mm_struct *mm, 444 unsigned long address) 445 { 446 return 0; 447 } 448 449 static inline void mmu_notifier_change_pte(struct mm_struct *mm, 450 unsigned long address, pte_t pte) 451 { 452 } 453 454 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 455 unsigned long start, unsigned long end) 456 { 457 } 458 459 static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, 460 unsigned long start, unsigned long end) 461 { 462 return 0; 463 } 464 465 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 466 unsigned long start, unsigned long end) 467 { 468 } 469 470 static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, 471 unsigned long start, unsigned long end) 472 { 473 } 474 475 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 476 unsigned long start, unsigned long end) 477 { 478 } 479 480 static inline void mmu_notifier_mm_init(struct mm_struct *mm) 481 { 482 } 483 484 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 485 { 486 } 487 488 #define ptep_clear_flush_young_notify ptep_clear_flush_young 489 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young 490 #define ptep_clear_young_notify ptep_test_and_clear_young 491 #define pmdp_clear_young_notify pmdp_test_and_clear_young 492 #define ptep_clear_flush_notify ptep_clear_flush 493 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush 494 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush 495 #define set_pte_at_notify set_pte_at 496 497 #endif /* CONFIG_MMU_NOTIFIER */ 498 499 #endif /* _LINUX_MMU_NOTIFIER_H */ 500