1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2002 Richard Henderson 4 * Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM. 5 * Copyright (C) 2023 Luis Chamberlain <[email protected]> 6 * Copyright (C) 2024 Mike Rapoport IBM. 7 */ 8 9 #define pr_fmt(fmt) "execmem: " fmt 10 11 #include <linux/mm.h> 12 #include <linux/mutex.h> 13 #include <linux/vmalloc.h> 14 #include <linux/execmem.h> 15 #include <linux/maple_tree.h> 16 #include <linux/set_memory.h> 17 #include <linux/moduleloader.h> 18 #include <linux/text-patching.h> 19 20 #include <asm/tlbflush.h> 21 22 #include "internal.h" 23 24 static struct execmem_info *execmem_info __ro_after_init; 25 static struct execmem_info default_execmem_info __ro_after_init; 26 27 #ifdef CONFIG_MMU 28 static void *execmem_vmalloc(struct execmem_range *range, size_t size, 29 pgprot_t pgprot, unsigned long vm_flags) 30 { 31 bool kasan = range->flags & EXECMEM_KASAN_SHADOW; 32 gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN; 33 unsigned int align = range->alignment; 34 unsigned long start = range->start; 35 unsigned long end = range->end; 36 void *p; 37 38 if (kasan) 39 vm_flags |= VM_DEFER_KMEMLEAK; 40 41 if (vm_flags & VM_ALLOW_HUGE_VMAP) 42 align = PMD_SIZE; 43 44 p = __vmalloc_node_range(size, align, start, end, gfp_flags, 45 pgprot, vm_flags, NUMA_NO_NODE, 46 __builtin_return_address(0)); 47 if (!p && range->fallback_start) { 48 start = range->fallback_start; 49 end = range->fallback_end; 50 p = __vmalloc_node_range(size, align, start, end, gfp_flags, 51 pgprot, vm_flags, NUMA_NO_NODE, 52 __builtin_return_address(0)); 53 } 54 55 if (!p) { 56 pr_warn_ratelimited("unable to allocate memory\n"); 57 return NULL; 58 } 59 60 if (kasan && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { 61 vfree(p); 62 return NULL; 63 } 64 65 return p; 66 } 67 #else 68 static void *execmem_vmalloc(struct execmem_range *range, size_t size, 69 pgprot_t pgprot, unsigned long vm_flags) 70 { 71 return vmalloc(size); 72 } 73 #endif /* CONFIG_MMU */ 74 75 #ifdef CONFIG_ARCH_HAS_EXECMEM_ROX 76 struct execmem_cache { 77 struct mutex mutex; 78 struct maple_tree busy_areas; 79 struct maple_tree free_areas; 80 }; 81 82 static struct execmem_cache execmem_cache = { 83 .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex), 84 .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN, 85 execmem_cache.mutex), 86 .free_areas = MTREE_INIT_EXT(free_areas, MT_FLAGS_LOCK_EXTERN, 87 execmem_cache.mutex), 88 }; 89 90 static inline unsigned long mas_range_len(struct ma_state *mas) 91 { 92 return mas->last - mas->index + 1; 93 } 94 95 static int execmem_set_direct_map_valid(struct vm_struct *vm, bool valid) 96 { 97 unsigned int nr = (1 << get_vm_area_page_order(vm)); 98 unsigned int updated = 0; 99 int err = 0; 100 101 for (int i = 0; i < vm->nr_pages; i += nr) { 102 err = set_direct_map_valid_noflush(vm->pages[i], nr, valid); 103 if (err) 104 goto err_restore; 105 updated += nr; 106 } 107 108 return 0; 109 110 err_restore: 111 for (int i = 0; i < updated; i += nr) 112 set_direct_map_valid_noflush(vm->pages[i], nr, !valid); 113 114 return err; 115 } 116 117 static void execmem_cache_clean(struct work_struct *work) 118 { 119 struct maple_tree *free_areas = &execmem_cache.free_areas; 120 struct mutex *mutex = &execmem_cache.mutex; 121 MA_STATE(mas, free_areas, 0, ULONG_MAX); 122 void *area; 123 124 mutex_lock(mutex); 125 mas_for_each(&mas, area, ULONG_MAX) { 126 size_t size = mas_range_len(&mas); 127 128 if (IS_ALIGNED(size, PMD_SIZE) && 129 IS_ALIGNED(mas.index, PMD_SIZE)) { 130 struct vm_struct *vm = find_vm_area(area); 131 132 execmem_set_direct_map_valid(vm, true); 133 mas_store_gfp(&mas, NULL, GFP_KERNEL); 134 vfree(area); 135 } 136 } 137 mutex_unlock(mutex); 138 } 139 140 static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean); 141 142 static int execmem_cache_add(void *ptr, size_t size) 143 { 144 struct maple_tree *free_areas = &execmem_cache.free_areas; 145 struct mutex *mutex = &execmem_cache.mutex; 146 unsigned long addr = (unsigned long)ptr; 147 MA_STATE(mas, free_areas, addr - 1, addr + 1); 148 unsigned long lower, upper; 149 void *area = NULL; 150 int err; 151 152 lower = addr; 153 upper = addr + size - 1; 154 155 mutex_lock(mutex); 156 area = mas_walk(&mas); 157 if (area && mas.last == addr - 1) 158 lower = mas.index; 159 160 area = mas_next(&mas, ULONG_MAX); 161 if (area && mas.index == addr + size) 162 upper = mas.last; 163 164 mas_set_range(&mas, lower, upper); 165 err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL); 166 mutex_unlock(mutex); 167 if (err) 168 return err; 169 170 return 0; 171 } 172 173 static bool within_range(struct execmem_range *range, struct ma_state *mas, 174 size_t size) 175 { 176 unsigned long addr = mas->index; 177 178 if (addr >= range->start && addr + size < range->end) 179 return true; 180 181 if (range->fallback_start && 182 addr >= range->fallback_start && addr + size < range->fallback_end) 183 return true; 184 185 return false; 186 } 187 188 static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) 189 { 190 struct maple_tree *free_areas = &execmem_cache.free_areas; 191 struct maple_tree *busy_areas = &execmem_cache.busy_areas; 192 MA_STATE(mas_free, free_areas, 0, ULONG_MAX); 193 MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); 194 struct mutex *mutex = &execmem_cache.mutex; 195 unsigned long addr, last, area_size = 0; 196 void *area, *ptr = NULL; 197 int err; 198 199 mutex_lock(mutex); 200 mas_for_each(&mas_free, area, ULONG_MAX) { 201 area_size = mas_range_len(&mas_free); 202 203 if (area_size >= size && within_range(range, &mas_free, size)) 204 break; 205 } 206 207 if (area_size < size) 208 goto out_unlock; 209 210 addr = mas_free.index; 211 last = mas_free.last; 212 213 /* insert allocated size to busy_areas at range [addr, addr + size) */ 214 mas_set_range(&mas_busy, addr, addr + size - 1); 215 err = mas_store_gfp(&mas_busy, (void *)addr, GFP_KERNEL); 216 if (err) 217 goto out_unlock; 218 219 mas_store_gfp(&mas_free, NULL, GFP_KERNEL); 220 if (area_size > size) { 221 void *ptr = (void *)(addr + size); 222 223 /* 224 * re-insert remaining free size to free_areas at range 225 * [addr + size, last] 226 */ 227 mas_set_range(&mas_free, addr + size, last); 228 err = mas_store_gfp(&mas_free, ptr, GFP_KERNEL); 229 if (err) { 230 mas_store_gfp(&mas_busy, NULL, GFP_KERNEL); 231 goto out_unlock; 232 } 233 } 234 ptr = (void *)addr; 235 236 out_unlock: 237 mutex_unlock(mutex); 238 return ptr; 239 } 240 241 static int execmem_cache_populate(struct execmem_range *range, size_t size) 242 { 243 unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; 244 unsigned long start, end; 245 struct vm_struct *vm; 246 size_t alloc_size; 247 int err = -ENOMEM; 248 void *p; 249 250 alloc_size = round_up(size, PMD_SIZE); 251 p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); 252 if (!p) 253 return err; 254 255 vm = find_vm_area(p); 256 if (!vm) 257 goto err_free_mem; 258 259 /* fill memory with instructions that will trap */ 260 execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); 261 262 start = (unsigned long)p; 263 end = start + alloc_size; 264 265 vunmap_range(start, end); 266 267 err = execmem_set_direct_map_valid(vm, false); 268 if (err) 269 goto err_free_mem; 270 271 err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages, 272 PMD_SHIFT); 273 if (err) 274 goto err_free_mem; 275 276 err = execmem_cache_add(p, alloc_size); 277 if (err) 278 goto err_free_mem; 279 280 return 0; 281 282 err_free_mem: 283 vfree(p); 284 return err; 285 } 286 287 static void *execmem_cache_alloc(struct execmem_range *range, size_t size) 288 { 289 void *p; 290 int err; 291 292 p = __execmem_cache_alloc(range, size); 293 if (p) 294 return p; 295 296 err = execmem_cache_populate(range, size); 297 if (err) 298 return NULL; 299 300 return __execmem_cache_alloc(range, size); 301 } 302 303 static bool execmem_cache_free(void *ptr) 304 { 305 struct maple_tree *busy_areas = &execmem_cache.busy_areas; 306 struct mutex *mutex = &execmem_cache.mutex; 307 unsigned long addr = (unsigned long)ptr; 308 MA_STATE(mas, busy_areas, addr, addr); 309 size_t size; 310 void *area; 311 312 mutex_lock(mutex); 313 area = mas_walk(&mas); 314 if (!area) { 315 mutex_unlock(mutex); 316 return false; 317 } 318 size = mas_range_len(&mas); 319 320 mas_store_gfp(&mas, NULL, GFP_KERNEL); 321 mutex_unlock(mutex); 322 323 execmem_fill_trapping_insns(ptr, size, /* writable = */ false); 324 325 execmem_cache_add(ptr, size); 326 327 schedule_work(&execmem_cache_clean_work); 328 329 return true; 330 } 331 #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ 332 static void *execmem_cache_alloc(struct execmem_range *range, size_t size) 333 { 334 return NULL; 335 } 336 337 static bool execmem_cache_free(void *ptr) 338 { 339 return false; 340 } 341 #endif /* CONFIG_ARCH_HAS_EXECMEM_ROX */ 342 343 void *execmem_alloc(enum execmem_type type, size_t size) 344 { 345 struct execmem_range *range = &execmem_info->ranges[type]; 346 bool use_cache = range->flags & EXECMEM_ROX_CACHE; 347 unsigned long vm_flags = VM_FLUSH_RESET_PERMS; 348 pgprot_t pgprot = range->pgprot; 349 void *p; 350 351 if (use_cache) 352 p = execmem_cache_alloc(range, size); 353 else 354 p = execmem_vmalloc(range, size, pgprot, vm_flags); 355 356 return kasan_reset_tag(p); 357 } 358 359 void execmem_free(void *ptr) 360 { 361 /* 362 * This memory may be RO, and freeing RO memory in an interrupt is not 363 * supported by vmalloc. 364 */ 365 WARN_ON(in_interrupt()); 366 367 if (!execmem_cache_free(ptr)) 368 vfree(ptr); 369 } 370 371 void *execmem_update_copy(void *dst, const void *src, size_t size) 372 { 373 return text_poke_copy(dst, src, size); 374 } 375 376 bool execmem_is_rox(enum execmem_type type) 377 { 378 return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); 379 } 380 381 static bool execmem_validate(struct execmem_info *info) 382 { 383 struct execmem_range *r = &info->ranges[EXECMEM_DEFAULT]; 384 385 if (!r->alignment || !r->start || !r->end || !pgprot_val(r->pgprot)) { 386 pr_crit("Invalid parameters for execmem allocator, module loading will fail"); 387 return false; 388 } 389 390 if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { 391 for (int i = EXECMEM_DEFAULT; i < EXECMEM_TYPE_MAX; i++) { 392 r = &info->ranges[i]; 393 394 if (r->flags & EXECMEM_ROX_CACHE) { 395 pr_warn_once("ROX cache is not supported\n"); 396 r->flags &= ~EXECMEM_ROX_CACHE; 397 } 398 } 399 } 400 401 return true; 402 } 403 404 static void execmem_init_missing(struct execmem_info *info) 405 { 406 struct execmem_range *default_range = &info->ranges[EXECMEM_DEFAULT]; 407 408 for (int i = EXECMEM_DEFAULT + 1; i < EXECMEM_TYPE_MAX; i++) { 409 struct execmem_range *r = &info->ranges[i]; 410 411 if (!r->start) { 412 if (i == EXECMEM_MODULE_DATA) 413 r->pgprot = PAGE_KERNEL; 414 else 415 r->pgprot = default_range->pgprot; 416 r->alignment = default_range->alignment; 417 r->start = default_range->start; 418 r->end = default_range->end; 419 r->flags = default_range->flags; 420 r->fallback_start = default_range->fallback_start; 421 r->fallback_end = default_range->fallback_end; 422 } 423 } 424 } 425 426 struct execmem_info * __weak execmem_arch_setup(void) 427 { 428 return NULL; 429 } 430 431 static void __init __execmem_init(void) 432 { 433 struct execmem_info *info = execmem_arch_setup(); 434 435 if (!info) { 436 info = execmem_info = &default_execmem_info; 437 info->ranges[EXECMEM_DEFAULT].start = VMALLOC_START; 438 info->ranges[EXECMEM_DEFAULT].end = VMALLOC_END; 439 info->ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL_EXEC; 440 info->ranges[EXECMEM_DEFAULT].alignment = 1; 441 } 442 443 if (!execmem_validate(info)) 444 return; 445 446 execmem_init_missing(info); 447 448 execmem_info = info; 449 } 450 451 #ifdef CONFIG_ARCH_WANTS_EXECMEM_LATE 452 static int __init execmem_late_init(void) 453 { 454 __execmem_init(); 455 return 0; 456 } 457 core_initcall(execmem_late_init); 458 #else 459 void __init execmem_init(void) 460 { 461 __execmem_init(); 462 } 463 #endif 464