1 //===-- memprof_allocator.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler, a memory profiler. 10 // 11 // Implementation of MemProf's memory allocator, which uses the allocator 12 // from sanitizer_common. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "memprof_allocator.h" 17 #include "memprof_mapping.h" 18 #include "memprof_stack.h" 19 #include "memprof_thread.h" 20 #include "sanitizer_common/sanitizer_allocator_checks.h" 21 #include "sanitizer_common/sanitizer_allocator_interface.h" 22 #include "sanitizer_common/sanitizer_allocator_report.h" 23 #include "sanitizer_common/sanitizer_errno.h" 24 #include "sanitizer_common/sanitizer_file.h" 25 #include "sanitizer_common/sanitizer_flags.h" 26 #include "sanitizer_common/sanitizer_internal_defs.h" 27 #include "sanitizer_common/sanitizer_list.h" 28 #include "sanitizer_common/sanitizer_stackdepot.h" 29 30 #include <sched.h> 31 #include <stdlib.h> 32 #include <time.h> 33 34 namespace __memprof { 35 36 static int GetCpuId(void) { 37 // _memprof_preinit is called via the preinit_array, which subsequently calls 38 // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu 39 // will seg fault as the address of __vdso_getcpu will be null. 40 if (!memprof_init_done) 41 return -1; 42 return sched_getcpu(); 43 } 44 45 // Compute the timestamp in ms. 46 static int GetTimestamp(void) { 47 // timespec_get will segfault if called from dl_init 48 if (!memprof_timestamp_inited) { 49 // By returning 0, this will be effectively treated as being 50 // timestamped at memprof init time (when memprof_init_timestamp_s 51 // is initialized). 52 return 0; 53 } 54 timespec ts; 55 clock_gettime(CLOCK_REALTIME, &ts); 56 return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000; 57 } 58 59 static MemprofAllocator &get_allocator(); 60 61 // The memory chunk allocated from the underlying allocator looks like this: 62 // H H U U U U U U 63 // H -- ChunkHeader (32 bytes) 64 // U -- user memory. 65 66 // If there is left padding before the ChunkHeader (due to use of memalign), 67 // we store a magic value in the first uptr word of the memory block and 68 // store the address of ChunkHeader in the next uptr. 69 // M B L L L L L L L L L H H U U U U U U 70 // | ^ 71 // ---------------------| 72 // M -- magic value kAllocBegMagic 73 // B -- address of ChunkHeader pointing to the first 'H' 74 75 constexpr uptr kMaxAllowedMallocBits = 40; 76 77 // Should be no more than 32-bytes 78 struct ChunkHeader { 79 // 1-st 4 bytes. 80 u32 alloc_context_id; 81 // 2-nd 4 bytes 82 u32 cpu_id; 83 // 3-rd 4 bytes 84 u32 timestamp_ms; 85 // 4-th 4 bytes 86 // Note only 1 bit is needed for this flag if we need space in the future for 87 // more fields. 88 u32 from_memalign; 89 // 5-th and 6-th 4 bytes 90 // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this 91 // could be shrunk to kMaxAllowedMallocBits if we need space in the future for 92 // more fields. 93 atomic_uint64_t user_requested_size; 94 // 23 bits available 95 // 7-th and 8-th 4 bytes 96 u64 data_type_id; // TODO: hash of type name 97 }; 98 99 static const uptr kChunkHeaderSize = sizeof(ChunkHeader); 100 COMPILER_CHECK(kChunkHeaderSize == 32); 101 102 struct MemprofChunk : ChunkHeader { 103 uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; } 104 uptr UsedSize() { 105 return atomic_load(&user_requested_size, memory_order_relaxed); 106 } 107 void *AllocBeg() { 108 if (from_memalign) 109 return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this)); 110 return reinterpret_cast<void *>(this); 111 } 112 }; 113 114 class LargeChunkHeader { 115 static constexpr uptr kAllocBegMagic = 116 FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); 117 atomic_uintptr_t magic; 118 MemprofChunk *chunk_header; 119 120 public: 121 MemprofChunk *Get() const { 122 return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic 123 ? chunk_header 124 : nullptr; 125 } 126 127 void Set(MemprofChunk *p) { 128 if (p) { 129 chunk_header = p; 130 atomic_store(&magic, kAllocBegMagic, memory_order_release); 131 return; 132 } 133 134 uptr old = kAllocBegMagic; 135 if (!atomic_compare_exchange_strong(&magic, &old, 0, 136 memory_order_release)) { 137 CHECK_EQ(old, kAllocBegMagic); 138 } 139 } 140 }; 141 142 void FlushUnneededMemProfShadowMemory(uptr p, uptr size) { 143 // Since memprof's mapping is compacting, the shadow chunk may be 144 // not page-aligned, so we only flush the page-aligned portion. 145 ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size)); 146 } 147 148 void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const { 149 // Statistics. 150 MemprofStats &thread_stats = GetCurrentThreadStats(); 151 thread_stats.mmaps++; 152 thread_stats.mmaped += size; 153 } 154 void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const { 155 // We are about to unmap a chunk of user memory. 156 // Mark the corresponding shadow memory as not needed. 157 FlushUnneededMemProfShadowMemory(p, size); 158 // Statistics. 159 MemprofStats &thread_stats = GetCurrentThreadStats(); 160 thread_stats.munmaps++; 161 thread_stats.munmaped += size; 162 } 163 164 AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) { 165 CHECK(ms); 166 return &ms->allocator_cache; 167 } 168 169 struct MemInfoBlock { 170 u32 alloc_count; 171 u64 total_access_count, min_access_count, max_access_count; 172 u64 total_size; 173 u32 min_size, max_size; 174 u32 alloc_timestamp, dealloc_timestamp; 175 u64 total_lifetime; 176 u32 min_lifetime, max_lifetime; 177 u32 alloc_cpu_id, dealloc_cpu_id; 178 u32 num_migrated_cpu; 179 180 // Only compared to prior deallocated object currently. 181 u32 num_lifetime_overlaps; 182 u32 num_same_alloc_cpu; 183 u32 num_same_dealloc_cpu; 184 185 u64 data_type_id; // TODO: hash of type name 186 187 MemInfoBlock() : alloc_count(0) {} 188 189 MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, 190 u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) 191 : alloc_count(1), total_access_count(access_count), 192 min_access_count(access_count), max_access_count(access_count), 193 total_size(size), min_size(size), max_size(size), 194 alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), 195 total_lifetime(dealloc_timestamp - alloc_timestamp), 196 min_lifetime(total_lifetime), max_lifetime(total_lifetime), 197 alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), 198 num_lifetime_overlaps(0), num_same_alloc_cpu(0), 199 num_same_dealloc_cpu(0) { 200 num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; 201 } 202 203 void Print(u64 id) { 204 u64 p; 205 if (flags()->print_terse) { 206 p = total_size * 100 / alloc_count; 207 Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, alloc_count, p / 100, p % 100, 208 min_size, max_size); 209 p = total_access_count * 100 / alloc_count; 210 Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, min_access_count, 211 max_access_count); 212 p = total_lifetime * 100 / alloc_count; 213 Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); 214 Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, 215 num_same_alloc_cpu, num_same_dealloc_cpu); 216 } else { 217 p = total_size * 100 / alloc_count; 218 Printf("Memory allocation stack id = %llu\n", id); 219 Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n", 220 alloc_count, p / 100, p % 100, min_size, max_size); 221 p = total_access_count * 100 / alloc_count; 222 Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", p / 100, 223 p % 100, min_access_count, max_access_count); 224 p = total_lifetime * 100 / alloc_count; 225 Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100, p % 100, 226 min_lifetime, max_lifetime); 227 Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " 228 "cpu: %u, num same dealloc_cpu: %u\n", 229 num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, 230 num_same_dealloc_cpu); 231 } 232 } 233 234 static void printHeader() { 235 CHECK(flags()->print_terse); 236 Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" 237 "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" 238 "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" 239 "NumSameDeallocCpu\n"); 240 } 241 242 void Merge(MemInfoBlock &newMIB) { 243 alloc_count += newMIB.alloc_count; 244 245 total_access_count += newMIB.total_access_count; 246 min_access_count = Min(min_access_count, newMIB.min_access_count); 247 max_access_count = Max(max_access_count, newMIB.max_access_count); 248 249 total_size += newMIB.total_size; 250 min_size = Min(min_size, newMIB.min_size); 251 max_size = Max(max_size, newMIB.max_size); 252 253 total_lifetime += newMIB.total_lifetime; 254 min_lifetime = Min(min_lifetime, newMIB.min_lifetime); 255 max_lifetime = Max(max_lifetime, newMIB.max_lifetime); 256 257 // We know newMIB was deallocated later, so just need to check if it was 258 // allocated before last one deallocated. 259 num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; 260 alloc_timestamp = newMIB.alloc_timestamp; 261 dealloc_timestamp = newMIB.dealloc_timestamp; 262 263 num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; 264 num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; 265 alloc_cpu_id = newMIB.alloc_cpu_id; 266 dealloc_cpu_id = newMIB.dealloc_cpu_id; 267 } 268 }; 269 270 struct SetEntry { 271 SetEntry() : id(0), MIB() {} 272 bool Empty() { return id == 0; } 273 void Print() { 274 CHECK(!Empty()); 275 MIB.Print(id); 276 } 277 // The stack id 278 u64 id; 279 MemInfoBlock MIB; 280 }; 281 282 struct CacheSet { 283 enum { kSetSize = 4 }; 284 285 void PrintAll() { 286 for (int i = 0; i < kSetSize; i++) { 287 if (Entries[i].Empty()) 288 continue; 289 Entries[i].Print(); 290 } 291 } 292 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { 293 SpinMutexLock l(&SetMutex); 294 AccessCount++; 295 296 for (int i = 0; i < kSetSize; i++) { 297 auto id = Entries[i].id; 298 // Check if this is a hit or an empty entry. Since we always move any 299 // filled locations to the front of the array (see below), we don't need 300 // to look after finding the first empty entry. 301 if (id == new_id || !id) { 302 if (id == 0) { 303 Entries[i].id = new_id; 304 Entries[i].MIB = newMIB; 305 } else { 306 Entries[i].MIB.Merge(newMIB); 307 } 308 // Assuming some id locality, we try to swap the matching entry 309 // into the first set position. 310 if (i != 0) { 311 auto tmp = Entries[0]; 312 Entries[0] = Entries[i]; 313 Entries[i] = tmp; 314 } 315 return; 316 } 317 } 318 319 // Miss 320 MissCount++; 321 322 // We try to find the entries with the lowest alloc count to be evicted: 323 int min_idx = 0; 324 u64 min_count = Entries[0].MIB.alloc_count; 325 for (int i = 1; i < kSetSize; i++) { 326 CHECK(!Entries[i].Empty()); 327 if (Entries[i].MIB.alloc_count < min_count) { 328 min_idx = i; 329 min_count = Entries[i].MIB.alloc_count; 330 } 331 } 332 333 // Print the evicted entry profile information 334 if (!flags()->print_terse) 335 Printf("Evicted:\n"); 336 Entries[min_idx].Print(); 337 338 // Similar to the hit case, put new MIB in first set position. 339 if (min_idx != 0) 340 Entries[min_idx] = Entries[0]; 341 Entries[0].id = new_id; 342 Entries[0].MIB = newMIB; 343 } 344 345 void PrintMissRate(int i) { 346 u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; 347 Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount, 348 AccessCount, p / 100, p % 100); 349 } 350 351 SetEntry Entries[kSetSize]; 352 u32 AccessCount = 0; 353 u32 MissCount = 0; 354 SpinMutex SetMutex; 355 }; 356 357 struct MemInfoBlockCache { 358 MemInfoBlockCache() { 359 if (common_flags()->print_module_map) 360 DumpProcessMap(); 361 if (flags()->print_terse) 362 MemInfoBlock::printHeader(); 363 Sets = 364 (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); 365 Constructed = true; 366 } 367 368 ~MemInfoBlockCache() { free(Sets); } 369 370 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { 371 u64 hv = new_id; 372 373 // Use mod method where number of entries should be a prime close to power 374 // of 2. 375 hv %= flags()->mem_info_cache_entries; 376 377 return Sets[hv].insertOrMerge(new_id, newMIB); 378 } 379 380 void PrintAll() { 381 for (int i = 0; i < flags()->mem_info_cache_entries; i++) { 382 Sets[i].PrintAll(); 383 } 384 } 385 386 void PrintMissRate() { 387 if (!flags()->print_mem_info_cache_miss_rate) 388 return; 389 u64 MissCountSum = 0; 390 u64 AccessCountSum = 0; 391 for (int i = 0; i < flags()->mem_info_cache_entries; i++) { 392 MissCountSum += Sets[i].MissCount; 393 AccessCountSum += Sets[i].AccessCount; 394 } 395 u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0; 396 Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum, 397 AccessCountSum, p / 100, p % 100); 398 if (flags()->print_mem_info_cache_miss_rate_details) 399 for (int i = 0; i < flags()->mem_info_cache_entries; i++) 400 Sets[i].PrintMissRate(i); 401 } 402 403 CacheSet *Sets; 404 // Flag when the Sets have been allocated, in case a deallocation is called 405 // very early before the static init of the Allocator and therefore this table 406 // have completed. 407 bool Constructed = false; 408 }; 409 410 // Accumulates the access count from the shadow for the given pointer and size. 411 u64 GetShadowCount(uptr p, u32 size) { 412 u64 *shadow = (u64 *)MEM_TO_SHADOW(p); 413 u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size); 414 u64 count = 0; 415 for (; shadow <= shadow_end; shadow++) 416 count += *shadow; 417 return count; 418 } 419 420 // Clears the shadow counters (when memory is allocated). 421 void ClearShadow(uptr addr, uptr size) { 422 CHECK(AddrIsAlignedByGranularity(addr)); 423 CHECK(AddrIsInMem(addr)); 424 CHECK(AddrIsAlignedByGranularity(addr + size)); 425 CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); 426 CHECK(REAL(memset)); 427 uptr shadow_beg = MEM_TO_SHADOW(addr); 428 uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; 429 if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { 430 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); 431 } else { 432 uptr page_size = GetPageSizeCached(); 433 uptr page_beg = RoundUpTo(shadow_beg, page_size); 434 uptr page_end = RoundDownTo(shadow_end, page_size); 435 436 if (page_beg >= page_end) { 437 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); 438 } else { 439 if (page_beg != shadow_beg) { 440 REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg); 441 } 442 if (page_end != shadow_end) { 443 REAL(memset)((void *)page_end, 0, shadow_end - page_end); 444 } 445 ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr); 446 } 447 } 448 } 449 450 struct Allocator { 451 static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits; 452 453 MemprofAllocator allocator; 454 StaticSpinMutex fallback_mutex; 455 AllocatorCache fallback_allocator_cache; 456 457 uptr max_user_defined_malloc_size; 458 atomic_uint8_t rss_limit_exceeded; 459 460 MemInfoBlockCache MemInfoBlockTable; 461 bool destructing; 462 463 // ------------------- Initialization ------------------------ 464 explicit Allocator(LinkerInitialized) : destructing(false) {} 465 466 ~Allocator() { FinishAndPrint(); } 467 468 void FinishAndPrint() { 469 if (!flags()->print_terse) 470 Printf("Live on exit:\n"); 471 allocator.ForceLock(); 472 allocator.ForEachChunk( 473 [](uptr chunk, void *alloc) { 474 u64 user_requested_size; 475 MemprofChunk *m = 476 ((Allocator *)alloc) 477 ->GetMemprofChunk((void *)chunk, user_requested_size); 478 if (!m) 479 return; 480 uptr user_beg = ((uptr)m) + kChunkHeaderSize; 481 u64 c = GetShadowCount(user_beg, user_requested_size); 482 long curtime = GetTimestamp(); 483 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, 484 m->cpu_id, GetCpuId()); 485 ((Allocator *)alloc) 486 ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); 487 }, 488 this); 489 allocator.ForceUnlock(); 490 491 destructing = true; 492 MemInfoBlockTable.PrintMissRate(); 493 MemInfoBlockTable.PrintAll(); 494 StackDepotPrintAll(); 495 } 496 497 void InitLinkerInitialized() { 498 SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); 499 allocator.InitLinkerInitialized( 500 common_flags()->allocator_release_to_os_interval_ms); 501 max_user_defined_malloc_size = common_flags()->max_allocation_size_mb 502 ? common_flags()->max_allocation_size_mb 503 << 20 504 : kMaxAllowedMallocSize; 505 } 506 507 bool RssLimitExceeded() { 508 return atomic_load(&rss_limit_exceeded, memory_order_relaxed); 509 } 510 511 void SetRssLimitExceeded(bool limit_exceeded) { 512 atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed); 513 } 514 515 // -------------------- Allocation/Deallocation routines --------------- 516 void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack, 517 AllocType alloc_type) { 518 if (UNLIKELY(!memprof_inited)) 519 MemprofInitFromRtl(); 520 if (RssLimitExceeded()) { 521 if (AllocatorMayReturnNull()) 522 return nullptr; 523 ReportRssLimitExceeded(stack); 524 } 525 CHECK(stack); 526 const uptr min_alignment = MEMPROF_ALIGNMENT; 527 if (alignment < min_alignment) 528 alignment = min_alignment; 529 if (size == 0) { 530 // We'd be happy to avoid allocating memory for zero-size requests, but 531 // some programs/tests depend on this behavior and assume that malloc 532 // would not return NULL even for zero-size allocations. Moreover, it 533 // looks like operator new should never return NULL, and results of 534 // consecutive "new" calls must be different even if the allocated size 535 // is zero. 536 size = 1; 537 } 538 CHECK(IsPowerOfTwo(alignment)); 539 uptr rounded_size = RoundUpTo(size, alignment); 540 uptr needed_size = rounded_size + kChunkHeaderSize; 541 if (alignment > min_alignment) 542 needed_size += alignment; 543 CHECK(IsAligned(needed_size, min_alignment)); 544 if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || 545 size > max_user_defined_malloc_size) { 546 if (AllocatorMayReturnNull()) { 547 Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", size); 548 return nullptr; 549 } 550 uptr malloc_limit = 551 Min(kMaxAllowedMallocSize, max_user_defined_malloc_size); 552 ReportAllocationSizeTooBig(size, malloc_limit, stack); 553 } 554 555 MemprofThread *t = GetCurrentThread(); 556 void *allocated; 557 if (t) { 558 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); 559 allocated = allocator.Allocate(cache, needed_size, 8); 560 } else { 561 SpinMutexLock l(&fallback_mutex); 562 AllocatorCache *cache = &fallback_allocator_cache; 563 allocated = allocator.Allocate(cache, needed_size, 8); 564 } 565 if (UNLIKELY(!allocated)) { 566 SetAllocatorOutOfMemory(); 567 if (AllocatorMayReturnNull()) 568 return nullptr; 569 ReportOutOfMemory(size, stack); 570 } 571 572 uptr alloc_beg = reinterpret_cast<uptr>(allocated); 573 uptr alloc_end = alloc_beg + needed_size; 574 uptr beg_plus_header = alloc_beg + kChunkHeaderSize; 575 uptr user_beg = beg_plus_header; 576 if (!IsAligned(user_beg, alignment)) 577 user_beg = RoundUpTo(user_beg, alignment); 578 uptr user_end = user_beg + size; 579 CHECK_LE(user_end, alloc_end); 580 uptr chunk_beg = user_beg - kChunkHeaderSize; 581 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 582 m->from_memalign = alloc_beg != chunk_beg; 583 CHECK(size); 584 585 m->cpu_id = GetCpuId(); 586 m->timestamp_ms = GetTimestamp(); 587 m->alloc_context_id = StackDepotPut(*stack); 588 589 uptr size_rounded_down_to_granularity = 590 RoundDownTo(size, SHADOW_GRANULARITY); 591 if (size_rounded_down_to_granularity) 592 ClearShadow(user_beg, size_rounded_down_to_granularity); 593 594 MemprofStats &thread_stats = GetCurrentThreadStats(); 595 thread_stats.mallocs++; 596 thread_stats.malloced += size; 597 thread_stats.malloced_overhead += needed_size - size; 598 if (needed_size > SizeClassMap::kMaxSize) 599 thread_stats.malloc_large++; 600 else 601 thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++; 602 603 void *res = reinterpret_cast<void *>(user_beg); 604 atomic_store(&m->user_requested_size, size, memory_order_release); 605 if (alloc_beg != chunk_beg) { 606 CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); 607 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m); 608 } 609 MEMPROF_MALLOC_HOOK(res, size); 610 return res; 611 } 612 613 void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment, 614 BufferedStackTrace *stack, AllocType alloc_type) { 615 uptr p = reinterpret_cast<uptr>(ptr); 616 if (p == 0) 617 return; 618 619 MEMPROF_FREE_HOOK(ptr); 620 621 uptr chunk_beg = p - kChunkHeaderSize; 622 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 623 624 u64 user_requested_size = 625 atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); 626 if (memprof_inited && memprof_init_done && !destructing && 627 MemInfoBlockTable.Constructed) { 628 u64 c = GetShadowCount(p, user_requested_size); 629 long curtime = GetTimestamp(); 630 631 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, 632 m->cpu_id, GetCpuId()); 633 MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); 634 } 635 636 MemprofStats &thread_stats = GetCurrentThreadStats(); 637 thread_stats.frees++; 638 thread_stats.freed += user_requested_size; 639 640 void *alloc_beg = m->AllocBeg(); 641 if (alloc_beg != m) { 642 // Clear the magic value, as allocator internals may overwrite the 643 // contents of deallocated chunk, confusing GetMemprofChunk lookup. 644 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr); 645 } 646 647 MemprofThread *t = GetCurrentThread(); 648 if (t) { 649 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); 650 allocator.Deallocate(cache, alloc_beg); 651 } else { 652 SpinMutexLock l(&fallback_mutex); 653 AllocatorCache *cache = &fallback_allocator_cache; 654 allocator.Deallocate(cache, alloc_beg); 655 } 656 } 657 658 void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) { 659 CHECK(old_ptr && new_size); 660 uptr p = reinterpret_cast<uptr>(old_ptr); 661 uptr chunk_beg = p - kChunkHeaderSize; 662 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 663 664 MemprofStats &thread_stats = GetCurrentThreadStats(); 665 thread_stats.reallocs++; 666 thread_stats.realloced += new_size; 667 668 void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC); 669 if (new_ptr) { 670 CHECK_NE(REAL(memcpy), nullptr); 671 uptr memcpy_size = Min(new_size, m->UsedSize()); 672 REAL(memcpy)(new_ptr, old_ptr, memcpy_size); 673 Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC); 674 } 675 return new_ptr; 676 } 677 678 void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { 679 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { 680 if (AllocatorMayReturnNull()) 681 return nullptr; 682 ReportCallocOverflow(nmemb, size, stack); 683 } 684 void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC); 685 // If the memory comes from the secondary allocator no need to clear it 686 // as it comes directly from mmap. 687 if (ptr && allocator.FromPrimary(ptr)) 688 REAL(memset)(ptr, 0, nmemb * size); 689 return ptr; 690 } 691 692 void CommitBack(MemprofThreadLocalMallocStorage *ms, 693 BufferedStackTrace *stack) { 694 AllocatorCache *ac = GetAllocatorCache(ms); 695 allocator.SwallowCache(ac); 696 } 697 698 // -------------------------- Chunk lookup ---------------------- 699 700 // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). 701 MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) { 702 if (!alloc_beg) 703 return nullptr; 704 MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get(); 705 if (!p) { 706 if (!allocator.FromPrimary(alloc_beg)) 707 return nullptr; 708 p = reinterpret_cast<MemprofChunk *>(alloc_beg); 709 } 710 // The size is reset to 0 on deallocation (and a min of 1 on 711 // allocation). 712 user_requested_size = 713 atomic_load(&p->user_requested_size, memory_order_acquire); 714 if (user_requested_size) 715 return p; 716 return nullptr; 717 } 718 719 MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) { 720 void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p)); 721 return GetMemprofChunk(alloc_beg, user_requested_size); 722 } 723 724 uptr AllocationSize(uptr p) { 725 u64 user_requested_size; 726 MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size); 727 if (!m) 728 return 0; 729 if (m->Beg() != p) 730 return 0; 731 return user_requested_size; 732 } 733 734 void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); } 735 736 void PrintStats() { allocator.PrintStats(); } 737 738 void ForceLock() NO_THREAD_SAFETY_ANALYSIS { 739 allocator.ForceLock(); 740 fallback_mutex.Lock(); 741 } 742 743 void ForceUnlock() NO_THREAD_SAFETY_ANALYSIS { 744 fallback_mutex.Unlock(); 745 allocator.ForceUnlock(); 746 } 747 }; 748 749 static Allocator instance(LINKER_INITIALIZED); 750 751 static MemprofAllocator &get_allocator() { return instance.allocator; } 752 753 void InitializeAllocator() { instance.InitLinkerInitialized(); } 754 755 void MemprofThreadLocalMallocStorage::CommitBack() { 756 GET_STACK_TRACE_MALLOC; 757 instance.CommitBack(this, &stack); 758 } 759 760 void PrintInternalAllocatorStats() { instance.PrintStats(); } 761 762 void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { 763 instance.Deallocate(ptr, 0, 0, stack, alloc_type); 764 } 765 766 void memprof_delete(void *ptr, uptr size, uptr alignment, 767 BufferedStackTrace *stack, AllocType alloc_type) { 768 instance.Deallocate(ptr, size, alignment, stack, alloc_type); 769 } 770 771 void *memprof_malloc(uptr size, BufferedStackTrace *stack) { 772 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); 773 } 774 775 void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { 776 return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); 777 } 778 779 void *memprof_reallocarray(void *p, uptr nmemb, uptr size, 780 BufferedStackTrace *stack) { 781 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { 782 errno = errno_ENOMEM; 783 if (AllocatorMayReturnNull()) 784 return nullptr; 785 ReportReallocArrayOverflow(nmemb, size, stack); 786 } 787 return memprof_realloc(p, nmemb * size, stack); 788 } 789 790 void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) { 791 if (!p) 792 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); 793 if (size == 0) { 794 if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { 795 instance.Deallocate(p, 0, 0, stack, FROM_MALLOC); 796 return nullptr; 797 } 798 // Allocate a size of 1 if we shouldn't free() on Realloc to 0 799 size = 1; 800 } 801 return SetErrnoOnNull(instance.Reallocate(p, size, stack)); 802 } 803 804 void *memprof_valloc(uptr size, BufferedStackTrace *stack) { 805 return SetErrnoOnNull( 806 instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC)); 807 } 808 809 void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) { 810 uptr PageSize = GetPageSizeCached(); 811 if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { 812 errno = errno_ENOMEM; 813 if (AllocatorMayReturnNull()) 814 return nullptr; 815 ReportPvallocOverflow(size, stack); 816 } 817 // pvalloc(0) should allocate one page. 818 size = size ? RoundUpTo(size, PageSize) : PageSize; 819 return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC)); 820 } 821 822 void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, 823 AllocType alloc_type) { 824 if (UNLIKELY(!IsPowerOfTwo(alignment))) { 825 errno = errno_EINVAL; 826 if (AllocatorMayReturnNull()) 827 return nullptr; 828 ReportInvalidAllocationAlignment(alignment, stack); 829 } 830 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type)); 831 } 832 833 void *memprof_aligned_alloc(uptr alignment, uptr size, 834 BufferedStackTrace *stack) { 835 if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { 836 errno = errno_EINVAL; 837 if (AllocatorMayReturnNull()) 838 return nullptr; 839 ReportInvalidAlignedAllocAlignment(size, alignment, stack); 840 } 841 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC)); 842 } 843 844 int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, 845 BufferedStackTrace *stack) { 846 if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { 847 if (AllocatorMayReturnNull()) 848 return errno_EINVAL; 849 ReportInvalidPosixMemalignAlignment(alignment, stack); 850 } 851 void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC); 852 if (UNLIKELY(!ptr)) 853 // OOM error is already taken care of by Allocate. 854 return errno_ENOMEM; 855 CHECK(IsAligned((uptr)ptr, alignment)); 856 *memptr = ptr; 857 return 0; 858 } 859 860 uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) { 861 if (!ptr) 862 return 0; 863 uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr)); 864 return usable_size; 865 } 866 867 void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) { 868 instance.SetRssLimitExceeded(limit_exceeded); 869 } 870 871 } // namespace __memprof 872 873 // ---------------------- Interface ---------------- {{{1 874 using namespace __memprof; 875 876 #if !SANITIZER_SUPPORTS_WEAK_HOOKS 877 // Provide default (no-op) implementation of malloc hooks. 878 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr, 879 uptr size) { 880 (void)ptr; 881 (void)size; 882 } 883 884 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) { 885 (void)ptr; 886 } 887 #endif 888 889 uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } 890 891 int __sanitizer_get_ownership(const void *p) { 892 return memprof_malloc_usable_size(p, 0, 0) != 0; 893 } 894 895 uptr __sanitizer_get_allocated_size(const void *p) { 896 return memprof_malloc_usable_size(p, 0, 0); 897 } 898 899 int __memprof_profile_dump() { 900 instance.FinishAndPrint(); 901 // In the future we may want to return non-zero if there are any errors 902 // detected during the dumping process. 903 return 0; 904 } 905