1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <[email protected]> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/debugfs.h> 18 #include <linux/gfp.h> 19 #include <linux/jhash.h> 20 #include <linux/kernel.h> 21 #include <linux/kmsan.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/mutex.h> 25 #include <linux/poison.h> 26 #include <linux/printk.h> 27 #include <linux/rculist.h> 28 #include <linux/rcupdate.h> 29 #include <linux/refcount.h> 30 #include <linux/slab.h> 31 #include <linux/spinlock.h> 32 #include <linux/stacktrace.h> 33 #include <linux/stackdepot.h> 34 #include <linux/string.h> 35 #include <linux/types.h> 36 #include <linux/memblock.h> 37 #include <linux/kasan-enabled.h> 38 39 #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) 40 41 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ 42 #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) 43 #define DEPOT_STACK_ALIGN 4 44 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) 45 #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ 46 STACK_DEPOT_EXTRA_BITS) 47 #define DEPOT_POOLS_CAP 8192 48 /* The pool_index is offset by 1 so the first record does not have a 0 handle. */ 49 #define DEPOT_MAX_POOLS \ 50 (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ 51 (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) 52 53 /* Compact structure that stores a reference to a stack. */ 54 union handle_parts { 55 depot_stack_handle_t handle; 56 struct { 57 u32 pool_index : DEPOT_POOL_INDEX_BITS; /* pool_index is offset by 1 */ 58 u32 offset : DEPOT_OFFSET_BITS; 59 u32 extra : STACK_DEPOT_EXTRA_BITS; 60 }; 61 }; 62 63 struct stack_record { 64 struct list_head hash_list; /* Links in the hash table */ 65 u32 hash; /* Hash in hash table */ 66 u32 size; /* Number of stored frames */ 67 union handle_parts handle; /* Constant after initialization */ 68 refcount_t count; 69 union { 70 unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ 71 struct { 72 /* 73 * An important invariant of the implementation is to 74 * only place a stack record onto the freelist iff its 75 * refcount is zero. Because stack records with a zero 76 * refcount are never considered as valid, it is safe to 77 * union @entries and freelist management state below. 78 * Conversely, as soon as an entry is off the freelist 79 * and its refcount becomes non-zero, the below must not 80 * be accessed until being placed back on the freelist. 81 */ 82 struct list_head free_list; /* Links in the freelist */ 83 unsigned long rcu_state; /* RCU cookie */ 84 }; 85 }; 86 }; 87 88 static bool stack_depot_disabled; 89 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 90 static bool __stack_depot_early_init_passed __initdata; 91 92 /* Use one hash table bucket per 16 KB of memory. */ 93 #define STACK_HASH_TABLE_SCALE 14 94 /* Limit the number of buckets between 4K and 1M. */ 95 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 96 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 97 /* Initial seed for jhash2. */ 98 #define STACK_HASH_SEED 0x9747b28c 99 100 /* Hash table of stored stack records. */ 101 static struct list_head *stack_table; 102 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 103 static unsigned int stack_bucket_number_order; 104 /* Hash mask for indexing the table. */ 105 static unsigned int stack_hash_mask; 106 107 /* Array of memory regions that store stack records. */ 108 static void *stack_pools[DEPOT_MAX_POOLS]; 109 /* Newly allocated pool that is not yet added to stack_pools. */ 110 static void *new_pool; 111 /* Number of pools in stack_pools. */ 112 static int pools_num; 113 /* Offset to the unused space in the currently used pool. */ 114 static size_t pool_offset = DEPOT_POOL_SIZE; 115 /* Freelist of stack records within stack_pools. */ 116 static LIST_HEAD(free_stacks); 117 /* The lock must be held when performing pool or freelist modifications. */ 118 static DEFINE_RAW_SPINLOCK(pool_lock); 119 120 /* Statistics counters for debugfs. */ 121 enum depot_counter_id { 122 DEPOT_COUNTER_REFD_ALLOCS, 123 DEPOT_COUNTER_REFD_FREES, 124 DEPOT_COUNTER_REFD_INUSE, 125 DEPOT_COUNTER_FREELIST_SIZE, 126 DEPOT_COUNTER_PERSIST_COUNT, 127 DEPOT_COUNTER_PERSIST_BYTES, 128 DEPOT_COUNTER_COUNT, 129 }; 130 static long counters[DEPOT_COUNTER_COUNT]; 131 static const char *const counter_names[] = { 132 [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", 133 [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", 134 [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", 135 [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", 136 [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", 137 [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", 138 }; 139 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); 140 141 static int __init disable_stack_depot(char *str) 142 { 143 return kstrtobool(str, &stack_depot_disabled); 144 } 145 early_param("stack_depot_disable", disable_stack_depot); 146 147 void __init stack_depot_request_early_init(void) 148 { 149 /* Too late to request early init now. */ 150 WARN_ON(__stack_depot_early_init_passed); 151 152 __stack_depot_early_init_requested = true; 153 } 154 155 /* Initialize list_head's within the hash table. */ 156 static void init_stack_table(unsigned long entries) 157 { 158 unsigned long i; 159 160 for (i = 0; i < entries; i++) 161 INIT_LIST_HEAD(&stack_table[i]); 162 } 163 164 /* Allocates a hash table via memblock. Can only be used during early boot. */ 165 int __init stack_depot_early_init(void) 166 { 167 unsigned long entries = 0; 168 169 /* This function must be called only once, from mm_init(). */ 170 if (WARN_ON(__stack_depot_early_init_passed)) 171 return 0; 172 __stack_depot_early_init_passed = true; 173 174 /* 175 * Print disabled message even if early init has not been requested: 176 * stack_depot_init() will not print one. 177 */ 178 if (stack_depot_disabled) { 179 pr_info("disabled\n"); 180 return 0; 181 } 182 183 /* 184 * If KASAN is enabled, use the maximum order: KASAN is frequently used 185 * in fuzzing scenarios, which leads to a large number of different 186 * stack traces being stored in stack depot. 187 */ 188 if (kasan_enabled() && !stack_bucket_number_order) 189 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 190 191 /* 192 * Check if early init has been requested after setting 193 * stack_bucket_number_order: stack_depot_init() uses its value. 194 */ 195 if (!__stack_depot_early_init_requested) 196 return 0; 197 198 /* 199 * If stack_bucket_number_order is not set, leave entries as 0 to rely 200 * on the automatic calculations performed by alloc_large_system_hash(). 201 */ 202 if (stack_bucket_number_order) 203 entries = 1UL << stack_bucket_number_order; 204 pr_info("allocating hash table via alloc_large_system_hash\n"); 205 stack_table = alloc_large_system_hash("stackdepot", 206 sizeof(struct list_head), 207 entries, 208 STACK_HASH_TABLE_SCALE, 209 HASH_EARLY, 210 NULL, 211 &stack_hash_mask, 212 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 213 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 214 if (!stack_table) { 215 pr_err("hash table allocation failed, disabling\n"); 216 stack_depot_disabled = true; 217 return -ENOMEM; 218 } 219 if (!entries) { 220 /* 221 * Obtain the number of entries that was calculated by 222 * alloc_large_system_hash(). 223 */ 224 entries = stack_hash_mask + 1; 225 } 226 init_stack_table(entries); 227 228 return 0; 229 } 230 231 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 232 int stack_depot_init(void) 233 { 234 static DEFINE_MUTEX(stack_depot_init_mutex); 235 unsigned long entries; 236 int ret = 0; 237 238 mutex_lock(&stack_depot_init_mutex); 239 240 if (stack_depot_disabled || stack_table) 241 goto out_unlock; 242 243 /* 244 * Similarly to stack_depot_early_init, use stack_bucket_number_order 245 * if assigned, and rely on automatic scaling otherwise. 246 */ 247 if (stack_bucket_number_order) { 248 entries = 1UL << stack_bucket_number_order; 249 } else { 250 int scale = STACK_HASH_TABLE_SCALE; 251 252 entries = nr_free_buffer_pages(); 253 entries = roundup_pow_of_two(entries); 254 255 if (scale > PAGE_SHIFT) 256 entries >>= (scale - PAGE_SHIFT); 257 else 258 entries <<= (PAGE_SHIFT - scale); 259 } 260 261 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 262 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 263 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 264 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 265 266 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 267 stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); 268 if (!stack_table) { 269 pr_err("hash table allocation failed, disabling\n"); 270 stack_depot_disabled = true; 271 ret = -ENOMEM; 272 goto out_unlock; 273 } 274 stack_hash_mask = entries - 1; 275 init_stack_table(entries); 276 277 out_unlock: 278 mutex_unlock(&stack_depot_init_mutex); 279 280 return ret; 281 } 282 EXPORT_SYMBOL_GPL(stack_depot_init); 283 284 /* 285 * Initializes new stack pool, and updates the list of pools. 286 */ 287 static bool depot_init_pool(void **prealloc) 288 { 289 lockdep_assert_held(&pool_lock); 290 291 if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { 292 /* Bail out if we reached the pool limit. */ 293 WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ 294 WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ 295 WARN_ONCE(1, "Stack depot reached limit capacity"); 296 return false; 297 } 298 299 if (!new_pool && *prealloc) { 300 /* We have preallocated memory, use it. */ 301 WRITE_ONCE(new_pool, *prealloc); 302 *prealloc = NULL; 303 } 304 305 if (!new_pool) 306 return false; /* new_pool and *prealloc are NULL */ 307 308 /* Save reference to the pool to be used by depot_fetch_stack(). */ 309 stack_pools[pools_num] = new_pool; 310 311 /* 312 * Stack depot tries to keep an extra pool allocated even before it runs 313 * out of space in the currently used pool. 314 * 315 * To indicate that a new preallocation is needed new_pool is reset to 316 * NULL; do not reset to NULL if we have reached the maximum number of 317 * pools. 318 */ 319 if (pools_num < DEPOT_MAX_POOLS) 320 WRITE_ONCE(new_pool, NULL); 321 else 322 WRITE_ONCE(new_pool, STACK_DEPOT_POISON); 323 324 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ 325 WRITE_ONCE(pools_num, pools_num + 1); 326 ASSERT_EXCLUSIVE_WRITER(pools_num); 327 328 pool_offset = 0; 329 330 return true; 331 } 332 333 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 334 static void depot_keep_new_pool(void **prealloc) 335 { 336 lockdep_assert_held(&pool_lock); 337 338 /* 339 * If a new pool is already saved or the maximum number of 340 * pools is reached, do not use the preallocated memory. 341 */ 342 if (new_pool) 343 return; 344 345 WRITE_ONCE(new_pool, *prealloc); 346 *prealloc = NULL; 347 } 348 349 /* 350 * Try to initialize a new stack record from the current pool, a cached pool, or 351 * the current pre-allocation. 352 */ 353 static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) 354 { 355 struct stack_record *stack; 356 void *current_pool; 357 u32 pool_index; 358 359 lockdep_assert_held(&pool_lock); 360 361 if (pool_offset + size > DEPOT_POOL_SIZE) { 362 if (!depot_init_pool(prealloc)) 363 return NULL; 364 } 365 366 if (WARN_ON_ONCE(pools_num < 1)) 367 return NULL; 368 pool_index = pools_num - 1; 369 current_pool = stack_pools[pool_index]; 370 if (WARN_ON_ONCE(!current_pool)) 371 return NULL; 372 373 stack = current_pool + pool_offset; 374 375 /* Pre-initialize handle once. */ 376 stack->handle.pool_index = pool_index + 1; 377 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 378 stack->handle.extra = 0; 379 INIT_LIST_HEAD(&stack->hash_list); 380 381 pool_offset += size; 382 383 return stack; 384 } 385 386 /* Try to find next free usable entry from the freelist. */ 387 static struct stack_record *depot_pop_free(void) 388 { 389 struct stack_record *stack; 390 391 lockdep_assert_held(&pool_lock); 392 393 if (list_empty(&free_stacks)) 394 return NULL; 395 396 /* 397 * We maintain the invariant that the elements in front are least 398 * recently used, and are therefore more likely to be associated with an 399 * RCU grace period in the past. Consequently it is sufficient to only 400 * check the first entry. 401 */ 402 stack = list_first_entry(&free_stacks, struct stack_record, free_list); 403 if (!poll_state_synchronize_rcu(stack->rcu_state)) 404 return NULL; 405 406 list_del(&stack->free_list); 407 counters[DEPOT_COUNTER_FREELIST_SIZE]--; 408 409 return stack; 410 } 411 412 static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) 413 { 414 const size_t used = flex_array_size(s, entries, nr_entries); 415 const size_t unused = sizeof(s->entries) - used; 416 417 WARN_ON_ONCE(sizeof(s->entries) < used); 418 419 return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); 420 } 421 422 /* Allocates a new stack in a stack depot pool. */ 423 static struct stack_record * 424 depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) 425 { 426 struct stack_record *stack = NULL; 427 size_t record_size; 428 429 lockdep_assert_held(&pool_lock); 430 431 /* This should already be checked by public API entry points. */ 432 if (WARN_ON_ONCE(!nr_entries)) 433 return NULL; 434 435 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 436 if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) 437 nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; 438 439 if (flags & STACK_DEPOT_FLAG_GET) { 440 /* 441 * Evictable entries have to allocate the max. size so they may 442 * safely be re-used by differently sized allocations. 443 */ 444 record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); 445 stack = depot_pop_free(); 446 } else { 447 record_size = depot_stack_record_size(stack, nr_entries); 448 } 449 450 if (!stack) { 451 stack = depot_pop_free_pool(prealloc, record_size); 452 if (!stack) 453 return NULL; 454 } 455 456 /* Save the stack trace. */ 457 stack->hash = hash; 458 stack->size = nr_entries; 459 /* stack->handle is already filled in by depot_pop_free_pool(). */ 460 memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); 461 462 if (flags & STACK_DEPOT_FLAG_GET) { 463 refcount_set(&stack->count, 1); 464 counters[DEPOT_COUNTER_REFD_ALLOCS]++; 465 counters[DEPOT_COUNTER_REFD_INUSE]++; 466 } else { 467 /* Warn on attempts to switch to refcounting this entry. */ 468 refcount_set(&stack->count, REFCOUNT_SATURATED); 469 counters[DEPOT_COUNTER_PERSIST_COUNT]++; 470 counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; 471 } 472 473 /* 474 * Let KMSAN know the stored stack record is initialized. This shall 475 * prevent false positive reports if instrumented code accesses it. 476 */ 477 kmsan_unpoison_memory(stack, record_size); 478 479 return stack; 480 } 481 482 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 483 { 484 const int pools_num_cached = READ_ONCE(pools_num); 485 union handle_parts parts = { .handle = handle }; 486 void *pool; 487 u32 pool_index = parts.pool_index - 1; 488 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 489 struct stack_record *stack; 490 491 lockdep_assert_not_held(&pool_lock); 492 493 if (pool_index > pools_num_cached) { 494 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 495 pool_index, pools_num_cached, handle); 496 return NULL; 497 } 498 499 pool = stack_pools[pool_index]; 500 if (WARN_ON(!pool)) 501 return NULL; 502 503 stack = pool + offset; 504 if (WARN_ON(!refcount_read(&stack->count))) 505 return NULL; 506 507 return stack; 508 } 509 510 /* Links stack into the freelist. */ 511 static void depot_free_stack(struct stack_record *stack) 512 { 513 unsigned long flags; 514 515 lockdep_assert_not_held(&pool_lock); 516 517 raw_spin_lock_irqsave(&pool_lock, flags); 518 printk_deferred_enter(); 519 520 /* 521 * Remove the entry from the hash list. Concurrent list traversal may 522 * still observe the entry, but since the refcount is zero, this entry 523 * will no longer be considered as valid. 524 */ 525 list_del_rcu(&stack->hash_list); 526 527 /* 528 * Due to being used from constrained contexts such as the allocators, 529 * NMI, or even RCU itself, stack depot cannot rely on primitives that 530 * would sleep (such as synchronize_rcu()) or recursively call into 531 * stack depot again (such as call_rcu()). 532 * 533 * Instead, get an RCU cookie, so that we can ensure this entry isn't 534 * moved onto another list until the next grace period, and concurrent 535 * RCU list traversal remains safe. 536 */ 537 stack->rcu_state = get_state_synchronize_rcu(); 538 539 /* 540 * Add the entry to the freelist tail, so that older entries are 541 * considered first - their RCU cookie is more likely to no longer be 542 * associated with the current grace period. 543 */ 544 list_add_tail(&stack->free_list, &free_stacks); 545 546 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 547 counters[DEPOT_COUNTER_REFD_FREES]++; 548 counters[DEPOT_COUNTER_REFD_INUSE]--; 549 550 printk_deferred_exit(); 551 raw_spin_unlock_irqrestore(&pool_lock, flags); 552 } 553 554 /* Calculates the hash for a stack. */ 555 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 556 { 557 return jhash2((u32 *)entries, 558 array_size(size, sizeof(*entries)) / sizeof(u32), 559 STACK_HASH_SEED); 560 } 561 562 /* 563 * Non-instrumented version of memcmp(). 564 * Does not check the lexicographical order, only the equality. 565 */ 566 static inline 567 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 568 unsigned int n) 569 { 570 for ( ; n-- ; u1++, u2++) { 571 if (*u1 != *u2) 572 return 1; 573 } 574 return 0; 575 } 576 577 /* Finds a stack in a bucket of the hash table. */ 578 static inline struct stack_record *find_stack(struct list_head *bucket, 579 unsigned long *entries, int size, 580 u32 hash, depot_flags_t flags) 581 { 582 struct stack_record *stack, *ret = NULL; 583 584 /* 585 * Stack depot may be used from instrumentation that instruments RCU or 586 * tracing itself; use variant that does not call into RCU and cannot be 587 * traced. 588 * 589 * Note: Such use cases must take care when using refcounting to evict 590 * unused entries, because the stack record free-then-reuse code paths 591 * do call into RCU. 592 */ 593 rcu_read_lock_sched_notrace(); 594 595 list_for_each_entry_rcu(stack, bucket, hash_list) { 596 if (stack->hash != hash || stack->size != size) 597 continue; 598 599 /* 600 * This may race with depot_free_stack() accessing the freelist 601 * management state unioned with @entries. The refcount is zero 602 * in that case and the below refcount_inc_not_zero() will fail. 603 */ 604 if (data_race(stackdepot_memcmp(entries, stack->entries, size))) 605 continue; 606 607 /* 608 * Try to increment refcount. If this succeeds, the stack record 609 * is valid and has not yet been freed. 610 * 611 * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior 612 * to then call stack_depot_put() later, and we can assume that 613 * a stack record is never placed back on the freelist. 614 */ 615 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) 616 continue; 617 618 ret = stack; 619 break; 620 } 621 622 rcu_read_unlock_sched_notrace(); 623 624 return ret; 625 } 626 627 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, 628 unsigned int nr_entries, 629 gfp_t alloc_flags, 630 depot_flags_t depot_flags) 631 { 632 struct list_head *bucket; 633 struct stack_record *found = NULL; 634 depot_stack_handle_t handle = 0; 635 struct page *page = NULL; 636 void *prealloc = NULL; 637 bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; 638 unsigned long flags; 639 u32 hash; 640 641 if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) 642 return 0; 643 644 /* 645 * If this stack trace is from an interrupt, including anything before 646 * interrupt entry usually leads to unbounded stack depot growth. 647 * 648 * Since use of filter_irq_stacks() is a requirement to ensure stack 649 * depot can efficiently deduplicate interrupt stacks, always 650 * filter_irq_stacks() to simplify all callers' use of stack depot. 651 */ 652 nr_entries = filter_irq_stacks(entries, nr_entries); 653 654 if (unlikely(nr_entries == 0) || stack_depot_disabled) 655 return 0; 656 657 hash = hash_stack(entries, nr_entries); 658 bucket = &stack_table[hash & stack_hash_mask]; 659 660 /* Fast path: look the stack trace up without locking. */ 661 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 662 if (found) 663 goto exit; 664 665 /* 666 * Allocate memory for a new pool if required now: 667 * we won't be able to do that under the lock. 668 */ 669 if (unlikely(can_alloc && !READ_ONCE(new_pool))) { 670 /* 671 * Zero out zone modifiers, as we don't have specific zone 672 * requirements. Keep the flags related to allocation in atomic 673 * contexts and I/O. 674 */ 675 alloc_flags &= ~GFP_ZONEMASK; 676 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 677 alloc_flags |= __GFP_NOWARN; 678 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); 679 if (page) 680 prealloc = page_address(page); 681 } 682 683 raw_spin_lock_irqsave(&pool_lock, flags); 684 printk_deferred_enter(); 685 686 /* Try to find again, to avoid concurrently inserting duplicates. */ 687 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 688 if (!found) { 689 struct stack_record *new = 690 depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); 691 692 if (new) { 693 /* 694 * This releases the stack record into the bucket and 695 * makes it visible to readers in find_stack(). 696 */ 697 list_add_rcu(&new->hash_list, bucket); 698 found = new; 699 } 700 } 701 702 if (prealloc) { 703 /* 704 * Either stack depot already contains this stack trace, or 705 * depot_alloc_stack() did not consume the preallocated memory. 706 * Try to keep the preallocated memory for future. 707 */ 708 depot_keep_new_pool(&prealloc); 709 } 710 711 printk_deferred_exit(); 712 raw_spin_unlock_irqrestore(&pool_lock, flags); 713 exit: 714 if (prealloc) { 715 /* Stack depot didn't use this memory, free it. */ 716 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 717 } 718 if (found) 719 handle = found->handle.handle; 720 return handle; 721 } 722 EXPORT_SYMBOL_GPL(stack_depot_save_flags); 723 724 depot_stack_handle_t stack_depot_save(unsigned long *entries, 725 unsigned int nr_entries, 726 gfp_t alloc_flags) 727 { 728 return stack_depot_save_flags(entries, nr_entries, alloc_flags, 729 STACK_DEPOT_FLAG_CAN_ALLOC); 730 } 731 EXPORT_SYMBOL_GPL(stack_depot_save); 732 733 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 734 unsigned long **entries) 735 { 736 struct stack_record *stack; 737 738 *entries = NULL; 739 /* 740 * Let KMSAN know *entries is initialized. This shall prevent false 741 * positive reports if instrumented code accesses it. 742 */ 743 kmsan_unpoison_memory(entries, sizeof(*entries)); 744 745 if (!handle || stack_depot_disabled) 746 return 0; 747 748 stack = depot_fetch_stack(handle); 749 /* 750 * Should never be NULL, otherwise this is a use-after-put (or just a 751 * corrupt handle). 752 */ 753 if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) 754 return 0; 755 756 *entries = stack->entries; 757 return stack->size; 758 } 759 EXPORT_SYMBOL_GPL(stack_depot_fetch); 760 761 void stack_depot_put(depot_stack_handle_t handle) 762 { 763 struct stack_record *stack; 764 765 if (!handle || stack_depot_disabled) 766 return; 767 768 stack = depot_fetch_stack(handle); 769 /* 770 * Should always be able to find the stack record, otherwise this is an 771 * unbalanced put attempt (or corrupt handle). 772 */ 773 if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) 774 return; 775 776 if (refcount_dec_and_test(&stack->count)) 777 depot_free_stack(stack); 778 } 779 EXPORT_SYMBOL_GPL(stack_depot_put); 780 781 void stack_depot_print(depot_stack_handle_t stack) 782 { 783 unsigned long *entries; 784 unsigned int nr_entries; 785 786 nr_entries = stack_depot_fetch(stack, &entries); 787 if (nr_entries > 0) 788 stack_trace_print(entries, nr_entries, 0); 789 } 790 EXPORT_SYMBOL_GPL(stack_depot_print); 791 792 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 793 int spaces) 794 { 795 unsigned long *entries; 796 unsigned int nr_entries; 797 798 nr_entries = stack_depot_fetch(handle, &entries); 799 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 800 spaces) : 0; 801 } 802 EXPORT_SYMBOL_GPL(stack_depot_snprint); 803 804 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 805 depot_stack_handle_t handle, unsigned int extra_bits) 806 { 807 union handle_parts parts = { .handle = handle }; 808 809 /* Don't set extra bits on empty handles. */ 810 if (!handle) 811 return 0; 812 813 parts.extra = extra_bits; 814 return parts.handle; 815 } 816 EXPORT_SYMBOL(stack_depot_set_extra_bits); 817 818 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 819 { 820 union handle_parts parts = { .handle = handle }; 821 822 return parts.extra; 823 } 824 EXPORT_SYMBOL(stack_depot_get_extra_bits); 825 826 static int stats_show(struct seq_file *seq, void *v) 827 { 828 /* 829 * data race ok: These are just statistics counters, and approximate 830 * statistics are ok for debugging. 831 */ 832 seq_printf(seq, "pools: %d\n", data_race(pools_num)); 833 for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) 834 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); 835 836 return 0; 837 } 838 DEFINE_SHOW_ATTRIBUTE(stats); 839 840 static int depot_debugfs_init(void) 841 { 842 struct dentry *dir; 843 844 if (stack_depot_disabled) 845 return 0; 846 847 dir = debugfs_create_dir("stackdepot", NULL); 848 debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); 849 return 0; 850 } 851 late_initcall(depot_debugfs_init); 852