1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <[email protected]> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/gfp.h> 18 #include <linux/jhash.h> 19 #include <linux/kernel.h> 20 #include <linux/kmsan.h> 21 #include <linux/mm.h> 22 #include <linux/mutex.h> 23 #include <linux/percpu.h> 24 #include <linux/printk.h> 25 #include <linux/slab.h> 26 #include <linux/stacktrace.h> 27 #include <linux/stackdepot.h> 28 #include <linux/string.h> 29 #include <linux/types.h> 30 #include <linux/memblock.h> 31 #include <linux/kasan-enabled.h> 32 33 #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) 34 35 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ 36 #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) 37 #define DEPOT_STACK_ALIGN 4 38 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) 39 #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ 40 STACK_DEPOT_EXTRA_BITS) 41 #define DEPOT_POOLS_CAP 8192 42 #define DEPOT_MAX_POOLS \ 43 (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ 44 (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) 45 46 /* Compact structure that stores a reference to a stack. */ 47 union handle_parts { 48 depot_stack_handle_t handle; 49 struct { 50 u32 pool_index : DEPOT_POOL_INDEX_BITS; 51 u32 offset : DEPOT_OFFSET_BITS; 52 u32 extra : STACK_DEPOT_EXTRA_BITS; 53 }; 54 }; 55 56 struct stack_record { 57 struct stack_record *next; /* Link in the hash table */ 58 u32 hash; /* Hash in the hash table */ 59 u32 size; /* Number of stored frames */ 60 union handle_parts handle; 61 unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ 62 }; 63 64 #define DEPOT_STACK_RECORD_SIZE \ 65 ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) 66 67 static bool stack_depot_disabled; 68 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 69 static bool __stack_depot_early_init_passed __initdata; 70 71 /* Use one hash table bucket per 16 KB of memory. */ 72 #define STACK_HASH_TABLE_SCALE 14 73 /* Limit the number of buckets between 4K and 1M. */ 74 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 75 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 76 /* Initial seed for jhash2. */ 77 #define STACK_HASH_SEED 0x9747b28c 78 79 /* Hash table of pointers to stored stack traces. */ 80 static struct stack_record **stack_table; 81 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 82 static unsigned int stack_bucket_number_order; 83 /* Hash mask for indexing the table. */ 84 static unsigned int stack_hash_mask; 85 86 /* Array of memory regions that store stack traces. */ 87 static void *stack_pools[DEPOT_MAX_POOLS]; 88 /* Currently used pool in stack_pools. */ 89 static int pool_index; 90 /* Offset to the unused space in the currently used pool. */ 91 static size_t pool_offset; 92 /* Lock that protects the variables above. */ 93 static DEFINE_RAW_SPINLOCK(pool_lock); 94 /* 95 * Stack depot tries to keep an extra pool allocated even before it runs out 96 * of space in the currently used pool. This flag marks whether this extra pool 97 * needs to be allocated. It has the value 0 when either an extra pool is not 98 * yet allocated or if the limit on the number of pools is reached. 99 */ 100 static int new_pool_required = 1; 101 102 static int __init disable_stack_depot(char *str) 103 { 104 return kstrtobool(str, &stack_depot_disabled); 105 } 106 early_param("stack_depot_disable", disable_stack_depot); 107 108 void __init stack_depot_request_early_init(void) 109 { 110 /* Too late to request early init now. */ 111 WARN_ON(__stack_depot_early_init_passed); 112 113 __stack_depot_early_init_requested = true; 114 } 115 116 /* Allocates a hash table via memblock. Can only be used during early boot. */ 117 int __init stack_depot_early_init(void) 118 { 119 unsigned long entries = 0; 120 121 /* This function must be called only once, from mm_init(). */ 122 if (WARN_ON(__stack_depot_early_init_passed)) 123 return 0; 124 __stack_depot_early_init_passed = true; 125 126 /* 127 * Print disabled message even if early init has not been requested: 128 * stack_depot_init() will not print one. 129 */ 130 if (stack_depot_disabled) { 131 pr_info("disabled\n"); 132 return 0; 133 } 134 135 /* 136 * If KASAN is enabled, use the maximum order: KASAN is frequently used 137 * in fuzzing scenarios, which leads to a large number of different 138 * stack traces being stored in stack depot. 139 */ 140 if (kasan_enabled() && !stack_bucket_number_order) 141 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 142 143 /* 144 * Check if early init has been requested after setting 145 * stack_bucket_number_order: stack_depot_init() uses its value. 146 */ 147 if (!__stack_depot_early_init_requested) 148 return 0; 149 150 /* 151 * If stack_bucket_number_order is not set, leave entries as 0 to rely 152 * on the automatic calculations performed by alloc_large_system_hash. 153 */ 154 if (stack_bucket_number_order) 155 entries = 1UL << stack_bucket_number_order; 156 pr_info("allocating hash table via alloc_large_system_hash\n"); 157 stack_table = alloc_large_system_hash("stackdepot", 158 sizeof(struct stack_record *), 159 entries, 160 STACK_HASH_TABLE_SCALE, 161 HASH_EARLY | HASH_ZERO, 162 NULL, 163 &stack_hash_mask, 164 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 165 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 166 if (!stack_table) { 167 pr_err("hash table allocation failed, disabling\n"); 168 stack_depot_disabled = true; 169 return -ENOMEM; 170 } 171 172 return 0; 173 } 174 175 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 176 int stack_depot_init(void) 177 { 178 static DEFINE_MUTEX(stack_depot_init_mutex); 179 unsigned long entries; 180 int ret = 0; 181 182 mutex_lock(&stack_depot_init_mutex); 183 184 if (stack_depot_disabled || stack_table) 185 goto out_unlock; 186 187 /* 188 * Similarly to stack_depot_early_init, use stack_bucket_number_order 189 * if assigned, and rely on automatic scaling otherwise. 190 */ 191 if (stack_bucket_number_order) { 192 entries = 1UL << stack_bucket_number_order; 193 } else { 194 int scale = STACK_HASH_TABLE_SCALE; 195 196 entries = nr_free_buffer_pages(); 197 entries = roundup_pow_of_two(entries); 198 199 if (scale > PAGE_SHIFT) 200 entries >>= (scale - PAGE_SHIFT); 201 else 202 entries <<= (PAGE_SHIFT - scale); 203 } 204 205 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 206 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 207 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 208 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 209 210 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 211 stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); 212 if (!stack_table) { 213 pr_err("hash table allocation failed, disabling\n"); 214 stack_depot_disabled = true; 215 ret = -ENOMEM; 216 goto out_unlock; 217 } 218 stack_hash_mask = entries - 1; 219 220 out_unlock: 221 mutex_unlock(&stack_depot_init_mutex); 222 223 return ret; 224 } 225 EXPORT_SYMBOL_GPL(stack_depot_init); 226 227 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 228 static void depot_keep_new_pool(void **prealloc) 229 { 230 /* 231 * If a new pool is already saved or the maximum number of 232 * pools is reached, do not use the preallocated memory. 233 * Access new_pool_required non-atomically, as there are no concurrent 234 * write accesses to this variable. 235 */ 236 if (!new_pool_required) 237 return; 238 239 /* 240 * Use the preallocated memory for the new pool 241 * as long as we do not exceed the maximum number of pools. 242 */ 243 if (pool_index + 1 < DEPOT_MAX_POOLS) { 244 stack_pools[pool_index + 1] = *prealloc; 245 *prealloc = NULL; 246 } 247 248 /* 249 * At this point, either a new pool is kept or the maximum 250 * number of pools is reached. In either case, take note that 251 * keeping another pool is not required. 252 * smp_store_release() pairs with smp_load_acquire() in 253 * stack_depot_save(). 254 */ 255 smp_store_release(&new_pool_required, 0); 256 } 257 258 /* Updates references to the current and the next stack depot pools. */ 259 static bool depot_update_pools(size_t required_size, void **prealloc) 260 { 261 /* Check if there is not enough space in the current pool. */ 262 if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { 263 /* Bail out if we reached the pool limit. */ 264 if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { 265 WARN_ONCE(1, "Stack depot reached limit capacity"); 266 return false; 267 } 268 269 /* 270 * Move on to the new pool. 271 * WRITE_ONCE() pairs with potential concurrent read in 272 * stack_depot_fetch(). 273 */ 274 WRITE_ONCE(pool_index, pool_index + 1); 275 pool_offset = 0; 276 277 /* 278 * If the maximum number of pools is not reached, take note 279 * that yet another new pool needs to be allocated. 280 * smp_store_release() pairs with smp_load_acquire() in 281 * stack_depot_save(). 282 */ 283 if (pool_index + 1 < DEPOT_MAX_POOLS) 284 smp_store_release(&new_pool_required, 1); 285 } 286 287 /* Check if the current pool is not yet allocated. */ 288 if (*prealloc && stack_pools[pool_index] == NULL) { 289 /* Use the preallocated memory for the current pool. */ 290 stack_pools[pool_index] = *prealloc; 291 *prealloc = NULL; 292 return true; 293 } 294 295 /* Otherwise, try using the preallocated memory for a new pool. */ 296 if (*prealloc) 297 depot_keep_new_pool(prealloc); 298 return true; 299 } 300 301 /* Allocates a new stack in a stack depot pool. */ 302 static struct stack_record * 303 depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) 304 { 305 struct stack_record *stack; 306 size_t required_size = DEPOT_STACK_RECORD_SIZE; 307 308 /* Update current and new pools if required and possible. */ 309 if (!depot_update_pools(required_size, prealloc)) 310 return NULL; 311 312 /* Check if we have a pool to save the stack trace. */ 313 if (stack_pools[pool_index] == NULL) 314 return NULL; 315 316 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 317 if (size > CONFIG_STACKDEPOT_MAX_FRAMES) 318 size = CONFIG_STACKDEPOT_MAX_FRAMES; 319 320 /* Save the stack trace. */ 321 stack = stack_pools[pool_index] + pool_offset; 322 stack->hash = hash; 323 stack->size = size; 324 stack->handle.pool_index = pool_index; 325 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 326 stack->handle.extra = 0; 327 memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); 328 pool_offset += required_size; 329 330 /* 331 * Let KMSAN know the stored stack record is initialized. This shall 332 * prevent false positive reports if instrumented code accesses it. 333 */ 334 kmsan_unpoison_memory(stack, required_size); 335 336 return stack; 337 } 338 339 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 340 { 341 union handle_parts parts = { .handle = handle }; 342 /* 343 * READ_ONCE() pairs with potential concurrent write in 344 * depot_update_pools(). 345 */ 346 int pool_index_cached = READ_ONCE(pool_index); 347 void *pool; 348 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 349 struct stack_record *stack; 350 351 if (parts.pool_index > pool_index_cached) { 352 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 353 parts.pool_index, pool_index_cached, handle); 354 return NULL; 355 } 356 357 pool = stack_pools[parts.pool_index]; 358 if (!pool) 359 return NULL; 360 361 stack = pool + offset; 362 return stack; 363 } 364 365 /* Calculates the hash for a stack. */ 366 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 367 { 368 return jhash2((u32 *)entries, 369 array_size(size, sizeof(*entries)) / sizeof(u32), 370 STACK_HASH_SEED); 371 } 372 373 /* 374 * Non-instrumented version of memcmp(). 375 * Does not check the lexicographical order, only the equality. 376 */ 377 static inline 378 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 379 unsigned int n) 380 { 381 for ( ; n-- ; u1++, u2++) { 382 if (*u1 != *u2) 383 return 1; 384 } 385 return 0; 386 } 387 388 /* Finds a stack in a bucket of the hash table. */ 389 static inline struct stack_record *find_stack(struct stack_record *bucket, 390 unsigned long *entries, int size, 391 u32 hash) 392 { 393 struct stack_record *found; 394 395 for (found = bucket; found; found = found->next) { 396 if (found->hash == hash && 397 found->size == size && 398 !stackdepot_memcmp(entries, found->entries, size)) 399 return found; 400 } 401 return NULL; 402 } 403 404 depot_stack_handle_t __stack_depot_save(unsigned long *entries, 405 unsigned int nr_entries, 406 gfp_t alloc_flags, bool can_alloc) 407 { 408 struct stack_record *found = NULL, **bucket; 409 depot_stack_handle_t handle = 0; 410 struct page *page = NULL; 411 void *prealloc = NULL; 412 unsigned long flags; 413 u32 hash; 414 415 /* 416 * If this stack trace is from an interrupt, including anything before 417 * interrupt entry usually leads to unbounded stack depot growth. 418 * 419 * Since use of filter_irq_stacks() is a requirement to ensure stack 420 * depot can efficiently deduplicate interrupt stacks, always 421 * filter_irq_stacks() to simplify all callers' use of stack depot. 422 */ 423 nr_entries = filter_irq_stacks(entries, nr_entries); 424 425 if (unlikely(nr_entries == 0) || stack_depot_disabled) 426 return 0; 427 428 hash = hash_stack(entries, nr_entries); 429 bucket = &stack_table[hash & stack_hash_mask]; 430 431 /* 432 * Fast path: look the stack trace up without locking. 433 * smp_load_acquire() pairs with smp_store_release() to |bucket| below. 434 */ 435 found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); 436 if (found) 437 goto exit; 438 439 /* 440 * Check if another stack pool needs to be allocated. If so, allocate 441 * the memory now: we won't be able to do that under the lock. 442 * 443 * smp_load_acquire() pairs with smp_store_release() in 444 * depot_update_pools() and depot_keep_new_pool(). 445 */ 446 if (unlikely(can_alloc && smp_load_acquire(&new_pool_required))) { 447 /* 448 * Zero out zone modifiers, as we don't have specific zone 449 * requirements. Keep the flags related to allocation in atomic 450 * contexts and I/O. 451 */ 452 alloc_flags &= ~GFP_ZONEMASK; 453 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 454 alloc_flags |= __GFP_NOWARN; 455 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); 456 if (page) 457 prealloc = page_address(page); 458 } 459 460 raw_spin_lock_irqsave(&pool_lock, flags); 461 462 found = find_stack(*bucket, entries, nr_entries, hash); 463 if (!found) { 464 struct stack_record *new = 465 depot_alloc_stack(entries, nr_entries, hash, &prealloc); 466 467 if (new) { 468 new->next = *bucket; 469 /* 470 * smp_store_release() pairs with smp_load_acquire() 471 * from |bucket| above. 472 */ 473 smp_store_release(bucket, new); 474 found = new; 475 } 476 } else if (prealloc) { 477 /* 478 * Stack depot already contains this stack trace, but let's 479 * keep the preallocated memory for future. 480 */ 481 depot_keep_new_pool(&prealloc); 482 } 483 484 raw_spin_unlock_irqrestore(&pool_lock, flags); 485 exit: 486 if (prealloc) { 487 /* Stack depot didn't use this memory, free it. */ 488 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 489 } 490 if (found) 491 handle = found->handle.handle; 492 return handle; 493 } 494 EXPORT_SYMBOL_GPL(__stack_depot_save); 495 496 depot_stack_handle_t stack_depot_save(unsigned long *entries, 497 unsigned int nr_entries, 498 gfp_t alloc_flags) 499 { 500 return __stack_depot_save(entries, nr_entries, alloc_flags, true); 501 } 502 EXPORT_SYMBOL_GPL(stack_depot_save); 503 504 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 505 unsigned long **entries) 506 { 507 struct stack_record *stack; 508 509 *entries = NULL; 510 /* 511 * Let KMSAN know *entries is initialized. This shall prevent false 512 * positive reports if instrumented code accesses it. 513 */ 514 kmsan_unpoison_memory(entries, sizeof(*entries)); 515 516 if (!handle || stack_depot_disabled) 517 return 0; 518 519 stack = depot_fetch_stack(handle); 520 521 *entries = stack->entries; 522 return stack->size; 523 } 524 EXPORT_SYMBOL_GPL(stack_depot_fetch); 525 526 void stack_depot_print(depot_stack_handle_t stack) 527 { 528 unsigned long *entries; 529 unsigned int nr_entries; 530 531 nr_entries = stack_depot_fetch(stack, &entries); 532 if (nr_entries > 0) 533 stack_trace_print(entries, nr_entries, 0); 534 } 535 EXPORT_SYMBOL_GPL(stack_depot_print); 536 537 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 538 int spaces) 539 { 540 unsigned long *entries; 541 unsigned int nr_entries; 542 543 nr_entries = stack_depot_fetch(handle, &entries); 544 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 545 spaces) : 0; 546 } 547 EXPORT_SYMBOL_GPL(stack_depot_snprint); 548 549 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 550 depot_stack_handle_t handle, unsigned int extra_bits) 551 { 552 union handle_parts parts = { .handle = handle }; 553 554 /* Don't set extra bits on empty handles. */ 555 if (!handle) 556 return 0; 557 558 parts.extra = extra_bits; 559 return parts.handle; 560 } 561 EXPORT_SYMBOL(stack_depot_set_extra_bits); 562 563 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 564 { 565 union handle_parts parts = { .handle = handle }; 566 567 return parts.extra; 568 } 569 EXPORT_SYMBOL(stack_depot_get_extra_bits); 570