1 //===-- memprof_allocator.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler, a memory profiler.
10 //
11 // Implementation of MemProf's memory allocator, which uses the allocator
12 // from sanitizer_common.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "memprof_allocator.h"
17 #include "memprof_mapping.h"
18 #include "memprof_meminfoblock.h"
19 #include "memprof_stack.h"
20 #include "memprof_thread.h"
21 #include "sanitizer_common/sanitizer_allocator_checks.h"
22 #include "sanitizer_common/sanitizer_allocator_interface.h"
23 #include "sanitizer_common/sanitizer_allocator_report.h"
24 #include "sanitizer_common/sanitizer_errno.h"
25 #include "sanitizer_common/sanitizer_file.h"
26 #include "sanitizer_common/sanitizer_flags.h"
27 #include "sanitizer_common/sanitizer_internal_defs.h"
28 #include "sanitizer_common/sanitizer_list.h"
29 #include "sanitizer_common/sanitizer_stackdepot.h"
30 
31 #include <sched.h>
32 #include <stdlib.h>
33 #include <time.h>
34 
35 namespace __memprof {
36 
37 static int GetCpuId(void) {
38   // _memprof_preinit is called via the preinit_array, which subsequently calls
39   // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
40   // will seg fault as the address of __vdso_getcpu will be null.
41   if (!memprof_init_done)
42     return -1;
43   return sched_getcpu();
44 }
45 
46 // Compute the timestamp in ms.
47 static int GetTimestamp(void) {
48   // timespec_get will segfault if called from dl_init
49   if (!memprof_timestamp_inited) {
50     // By returning 0, this will be effectively treated as being
51     // timestamped at memprof init time (when memprof_init_timestamp_s
52     // is initialized).
53     return 0;
54   }
55   timespec ts;
56   clock_gettime(CLOCK_REALTIME, &ts);
57   return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000;
58 }
59 
60 static MemprofAllocator &get_allocator();
61 
62 // The memory chunk allocated from the underlying allocator looks like this:
63 // H H U U U U U U
64 //   H -- ChunkHeader (32 bytes)
65 //   U -- user memory.
66 
67 // If there is left padding before the ChunkHeader (due to use of memalign),
68 // we store a magic value in the first uptr word of the memory block and
69 // store the address of ChunkHeader in the next uptr.
70 // M B L L L L L L L L L  H H U U U U U U
71 //   |                    ^
72 //   ---------------------|
73 //   M -- magic value kAllocBegMagic
74 //   B -- address of ChunkHeader pointing to the first 'H'
75 
76 constexpr uptr kMaxAllowedMallocBits = 40;
77 
78 // Should be no more than 32-bytes
79 struct ChunkHeader {
80   // 1-st 4 bytes.
81   u32 alloc_context_id;
82   // 2-nd 4 bytes
83   u32 cpu_id;
84   // 3-rd 4 bytes
85   u32 timestamp_ms;
86   // 4-th 4 bytes
87   // Note only 1 bit is needed for this flag if we need space in the future for
88   // more fields.
89   u32 from_memalign;
90   // 5-th and 6-th 4 bytes
91   // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this
92   // could be shrunk to kMaxAllowedMallocBits if we need space in the future for
93   // more fields.
94   atomic_uint64_t user_requested_size;
95   // 23 bits available
96   // 7-th and 8-th 4 bytes
97   u64 data_type_id; // TODO: hash of type name
98 };
99 
100 static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
101 COMPILER_CHECK(kChunkHeaderSize == 32);
102 
103 struct MemprofChunk : ChunkHeader {
104   uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
105   uptr UsedSize() {
106     return atomic_load(&user_requested_size, memory_order_relaxed);
107   }
108   void *AllocBeg() {
109     if (from_memalign)
110       return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
111     return reinterpret_cast<void *>(this);
112   }
113 };
114 
115 class LargeChunkHeader {
116   static constexpr uptr kAllocBegMagic =
117       FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL);
118   atomic_uintptr_t magic;
119   MemprofChunk *chunk_header;
120 
121 public:
122   MemprofChunk *Get() const {
123     return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic
124                ? chunk_header
125                : nullptr;
126   }
127 
128   void Set(MemprofChunk *p) {
129     if (p) {
130       chunk_header = p;
131       atomic_store(&magic, kAllocBegMagic, memory_order_release);
132       return;
133     }
134 
135     uptr old = kAllocBegMagic;
136     if (!atomic_compare_exchange_strong(&magic, &old, 0,
137                                         memory_order_release)) {
138       CHECK_EQ(old, kAllocBegMagic);
139     }
140   }
141 };
142 
143 void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
144   // Since memprof's mapping is compacting, the shadow chunk may be
145   // not page-aligned, so we only flush the page-aligned portion.
146   ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
147 }
148 
149 void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
150   // Statistics.
151   MemprofStats &thread_stats = GetCurrentThreadStats();
152   thread_stats.mmaps++;
153   thread_stats.mmaped += size;
154 }
155 void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
156   // We are about to unmap a chunk of user memory.
157   // Mark the corresponding shadow memory as not needed.
158   FlushUnneededMemProfShadowMemory(p, size);
159   // Statistics.
160   MemprofStats &thread_stats = GetCurrentThreadStats();
161   thread_stats.munmaps++;
162   thread_stats.munmaped += size;
163 }
164 
165 AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
166   CHECK(ms);
167   return &ms->allocator_cache;
168 }
169 
170 struct SetEntry {
171   SetEntry() : id(0), MIB() {}
172   bool Empty() { return id == 0; }
173   void Print() {
174     CHECK(!Empty());
175     MIB.Print(id, flags()->print_terse);
176   }
177   // The stack id
178   u64 id;
179   MemInfoBlock MIB;
180 };
181 
182 struct CacheSet {
183   enum { kSetSize = 4 };
184 
185   void PrintAll() {
186     for (int i = 0; i < kSetSize; i++) {
187       if (Entries[i].Empty())
188         continue;
189       Entries[i].Print();
190     }
191   }
192   void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
193     SpinMutexLock l(&SetMutex);
194     AccessCount++;
195 
196     for (int i = 0; i < kSetSize; i++) {
197       auto id = Entries[i].id;
198       // Check if this is a hit or an empty entry. Since we always move any
199       // filled locations to the front of the array (see below), we don't need
200       // to look after finding the first empty entry.
201       if (id == new_id || !id) {
202         if (id == 0) {
203           Entries[i].id = new_id;
204           Entries[i].MIB = newMIB;
205         } else {
206           Entries[i].MIB.Merge(newMIB);
207         }
208         // Assuming some id locality, we try to swap the matching entry
209         // into the first set position.
210         if (i != 0) {
211           auto tmp = Entries[0];
212           Entries[0] = Entries[i];
213           Entries[i] = tmp;
214         }
215         return;
216       }
217     }
218 
219     // Miss
220     MissCount++;
221 
222     // We try to find the entries with the lowest alloc count to be evicted:
223     int min_idx = 0;
224     u64 min_count = Entries[0].MIB.alloc_count;
225     for (int i = 1; i < kSetSize; i++) {
226       CHECK(!Entries[i].Empty());
227       if (Entries[i].MIB.alloc_count < min_count) {
228         min_idx = i;
229         min_count = Entries[i].MIB.alloc_count;
230       }
231     }
232 
233     // Print the evicted entry profile information
234     if (!flags()->print_terse)
235       Printf("Evicted:\n");
236     Entries[min_idx].Print();
237 
238     // Similar to the hit case, put new MIB in first set position.
239     if (min_idx != 0)
240       Entries[min_idx] = Entries[0];
241     Entries[0].id = new_id;
242     Entries[0].MIB = newMIB;
243   }
244 
245   void PrintMissRate(int i) {
246     u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
247     Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount,
248            AccessCount, p / 100, p % 100);
249   }
250 
251   SetEntry Entries[kSetSize];
252   u32 AccessCount = 0;
253   u32 MissCount = 0;
254   SpinMutex SetMutex;
255 };
256 
257 struct MemInfoBlockCache {
258   MemInfoBlockCache() {
259     if (common_flags()->print_module_map)
260       DumpProcessMap();
261     if (flags()->print_terse)
262       MemInfoBlock::printHeader();
263     Sets =
264         (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
265     Constructed = true;
266   }
267 
268   ~MemInfoBlockCache() { free(Sets); }
269 
270   void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
271     u64 hv = new_id;
272 
273     // Use mod method where number of entries should be a prime close to power
274     // of 2.
275     hv %= flags()->mem_info_cache_entries;
276 
277     return Sets[hv].insertOrMerge(new_id, newMIB);
278   }
279 
280   void PrintAll() {
281     for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
282       Sets[i].PrintAll();
283     }
284   }
285 
286   void PrintMissRate() {
287     if (!flags()->print_mem_info_cache_miss_rate)
288       return;
289     u64 MissCountSum = 0;
290     u64 AccessCountSum = 0;
291     for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
292       MissCountSum += Sets[i].MissCount;
293       AccessCountSum += Sets[i].AccessCount;
294     }
295     u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0;
296     Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum,
297            AccessCountSum, p / 100, p % 100);
298     if (flags()->print_mem_info_cache_miss_rate_details)
299       for (int i = 0; i < flags()->mem_info_cache_entries; i++)
300         Sets[i].PrintMissRate(i);
301   }
302 
303   CacheSet *Sets;
304   // Flag when the Sets have been allocated, in case a deallocation is called
305   // very early before the static init of the Allocator and therefore this table
306   // have completed.
307   bool Constructed = false;
308 };
309 
310 // Accumulates the access count from the shadow for the given pointer and size.
311 u64 GetShadowCount(uptr p, u32 size) {
312   u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
313   u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
314   u64 count = 0;
315   for (; shadow <= shadow_end; shadow++)
316     count += *shadow;
317   return count;
318 }
319 
320 // Clears the shadow counters (when memory is allocated).
321 void ClearShadow(uptr addr, uptr size) {
322   CHECK(AddrIsAlignedByGranularity(addr));
323   CHECK(AddrIsInMem(addr));
324   CHECK(AddrIsAlignedByGranularity(addr + size));
325   CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
326   CHECK(REAL(memset));
327   uptr shadow_beg = MEM_TO_SHADOW(addr);
328   uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
329   if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
330     REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
331   } else {
332     uptr page_size = GetPageSizeCached();
333     uptr page_beg = RoundUpTo(shadow_beg, page_size);
334     uptr page_end = RoundDownTo(shadow_end, page_size);
335 
336     if (page_beg >= page_end) {
337       REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
338     } else {
339       if (page_beg != shadow_beg) {
340         REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
341       }
342       if (page_end != shadow_end) {
343         REAL(memset)((void *)page_end, 0, shadow_end - page_end);
344       }
345       ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
346     }
347   }
348 }
349 
350 struct Allocator {
351   static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
352 
353   MemprofAllocator allocator;
354   StaticSpinMutex fallback_mutex;
355   AllocatorCache fallback_allocator_cache;
356 
357   uptr max_user_defined_malloc_size;
358   atomic_uint8_t rss_limit_exceeded;
359 
360   MemInfoBlockCache MemInfoBlockTable;
361   bool destructing;
362 
363   // ------------------- Initialization ------------------------
364   explicit Allocator(LinkerInitialized) : destructing(false) {}
365 
366   ~Allocator() { FinishAndPrint(); }
367 
368   void FinishAndPrint() {
369     if (!flags()->print_terse)
370       Printf("Live on exit:\n");
371     allocator.ForceLock();
372     allocator.ForEachChunk(
373         [](uptr chunk, void *alloc) {
374           u64 user_requested_size;
375           MemprofChunk *m =
376               ((Allocator *)alloc)
377                   ->GetMemprofChunk((void *)chunk, user_requested_size);
378           if (!m)
379             return;
380           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
381           u64 c = GetShadowCount(user_beg, user_requested_size);
382           long curtime = GetTimestamp();
383           MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
384                               m->cpu_id, GetCpuId());
385           ((Allocator *)alloc)
386               ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
387         },
388         this);
389     allocator.ForceUnlock();
390 
391     destructing = true;
392     MemInfoBlockTable.PrintMissRate();
393     MemInfoBlockTable.PrintAll();
394     StackDepotPrintAll();
395   }
396 
397   void InitLinkerInitialized() {
398     SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
399     allocator.InitLinkerInitialized(
400         common_flags()->allocator_release_to_os_interval_ms);
401     max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
402                                        ? common_flags()->max_allocation_size_mb
403                                              << 20
404                                        : kMaxAllowedMallocSize;
405   }
406 
407   bool RssLimitExceeded() {
408     return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
409   }
410 
411   void SetRssLimitExceeded(bool limit_exceeded) {
412     atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
413   }
414 
415   // -------------------- Allocation/Deallocation routines ---------------
416   void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
417                  AllocType alloc_type) {
418     if (UNLIKELY(!memprof_inited))
419       MemprofInitFromRtl();
420     if (RssLimitExceeded()) {
421       if (AllocatorMayReturnNull())
422         return nullptr;
423       ReportRssLimitExceeded(stack);
424     }
425     CHECK(stack);
426     const uptr min_alignment = MEMPROF_ALIGNMENT;
427     if (alignment < min_alignment)
428       alignment = min_alignment;
429     if (size == 0) {
430       // We'd be happy to avoid allocating memory for zero-size requests, but
431       // some programs/tests depend on this behavior and assume that malloc
432       // would not return NULL even for zero-size allocations. Moreover, it
433       // looks like operator new should never return NULL, and results of
434       // consecutive "new" calls must be different even if the allocated size
435       // is zero.
436       size = 1;
437     }
438     CHECK(IsPowerOfTwo(alignment));
439     uptr rounded_size = RoundUpTo(size, alignment);
440     uptr needed_size = rounded_size + kChunkHeaderSize;
441     if (alignment > min_alignment)
442       needed_size += alignment;
443     CHECK(IsAligned(needed_size, min_alignment));
444     if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
445         size > max_user_defined_malloc_size) {
446       if (AllocatorMayReturnNull()) {
447         Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", size);
448         return nullptr;
449       }
450       uptr malloc_limit =
451           Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
452       ReportAllocationSizeTooBig(size, malloc_limit, stack);
453     }
454 
455     MemprofThread *t = GetCurrentThread();
456     void *allocated;
457     if (t) {
458       AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
459       allocated = allocator.Allocate(cache, needed_size, 8);
460     } else {
461       SpinMutexLock l(&fallback_mutex);
462       AllocatorCache *cache = &fallback_allocator_cache;
463       allocated = allocator.Allocate(cache, needed_size, 8);
464     }
465     if (UNLIKELY(!allocated)) {
466       SetAllocatorOutOfMemory();
467       if (AllocatorMayReturnNull())
468         return nullptr;
469       ReportOutOfMemory(size, stack);
470     }
471 
472     uptr alloc_beg = reinterpret_cast<uptr>(allocated);
473     uptr alloc_end = alloc_beg + needed_size;
474     uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
475     uptr user_beg = beg_plus_header;
476     if (!IsAligned(user_beg, alignment))
477       user_beg = RoundUpTo(user_beg, alignment);
478     uptr user_end = user_beg + size;
479     CHECK_LE(user_end, alloc_end);
480     uptr chunk_beg = user_beg - kChunkHeaderSize;
481     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
482     m->from_memalign = alloc_beg != chunk_beg;
483     CHECK(size);
484 
485     m->cpu_id = GetCpuId();
486     m->timestamp_ms = GetTimestamp();
487     m->alloc_context_id = StackDepotPut(*stack);
488 
489     uptr size_rounded_down_to_granularity =
490         RoundDownTo(size, SHADOW_GRANULARITY);
491     if (size_rounded_down_to_granularity)
492       ClearShadow(user_beg, size_rounded_down_to_granularity);
493 
494     MemprofStats &thread_stats = GetCurrentThreadStats();
495     thread_stats.mallocs++;
496     thread_stats.malloced += size;
497     thread_stats.malloced_overhead += needed_size - size;
498     if (needed_size > SizeClassMap::kMaxSize)
499       thread_stats.malloc_large++;
500     else
501       thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
502 
503     void *res = reinterpret_cast<void *>(user_beg);
504     atomic_store(&m->user_requested_size, size, memory_order_release);
505     if (alloc_beg != chunk_beg) {
506       CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg);
507       reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m);
508     }
509     MEMPROF_MALLOC_HOOK(res, size);
510     return res;
511   }
512 
513   void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
514                   BufferedStackTrace *stack, AllocType alloc_type) {
515     uptr p = reinterpret_cast<uptr>(ptr);
516     if (p == 0)
517       return;
518 
519     MEMPROF_FREE_HOOK(ptr);
520 
521     uptr chunk_beg = p - kChunkHeaderSize;
522     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
523 
524     u64 user_requested_size =
525         atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
526     if (memprof_inited && memprof_init_done && !destructing &&
527         MemInfoBlockTable.Constructed) {
528       u64 c = GetShadowCount(p, user_requested_size);
529       long curtime = GetTimestamp();
530 
531       MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
532                           m->cpu_id, GetCpuId());
533         MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
534     }
535 
536     MemprofStats &thread_stats = GetCurrentThreadStats();
537     thread_stats.frees++;
538     thread_stats.freed += user_requested_size;
539 
540     void *alloc_beg = m->AllocBeg();
541     if (alloc_beg != m) {
542       // Clear the magic value, as allocator internals may overwrite the
543       // contents of deallocated chunk, confusing GetMemprofChunk lookup.
544       reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr);
545     }
546 
547     MemprofThread *t = GetCurrentThread();
548     if (t) {
549       AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
550       allocator.Deallocate(cache, alloc_beg);
551     } else {
552       SpinMutexLock l(&fallback_mutex);
553       AllocatorCache *cache = &fallback_allocator_cache;
554       allocator.Deallocate(cache, alloc_beg);
555     }
556   }
557 
558   void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
559     CHECK(old_ptr && new_size);
560     uptr p = reinterpret_cast<uptr>(old_ptr);
561     uptr chunk_beg = p - kChunkHeaderSize;
562     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
563 
564     MemprofStats &thread_stats = GetCurrentThreadStats();
565     thread_stats.reallocs++;
566     thread_stats.realloced += new_size;
567 
568     void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
569     if (new_ptr) {
570       CHECK_NE(REAL(memcpy), nullptr);
571       uptr memcpy_size = Min(new_size, m->UsedSize());
572       REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
573       Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
574     }
575     return new_ptr;
576   }
577 
578   void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
579     if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
580       if (AllocatorMayReturnNull())
581         return nullptr;
582       ReportCallocOverflow(nmemb, size, stack);
583     }
584     void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
585     // If the memory comes from the secondary allocator no need to clear it
586     // as it comes directly from mmap.
587     if (ptr && allocator.FromPrimary(ptr))
588       REAL(memset)(ptr, 0, nmemb * size);
589     return ptr;
590   }
591 
592   void CommitBack(MemprofThreadLocalMallocStorage *ms,
593                   BufferedStackTrace *stack) {
594     AllocatorCache *ac = GetAllocatorCache(ms);
595     allocator.SwallowCache(ac);
596   }
597 
598   // -------------------------- Chunk lookup ----------------------
599 
600   // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
601   MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) {
602     if (!alloc_beg)
603       return nullptr;
604     MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get();
605     if (!p) {
606       if (!allocator.FromPrimary(alloc_beg))
607         return nullptr;
608       p = reinterpret_cast<MemprofChunk *>(alloc_beg);
609     }
610     // The size is reset to 0 on deallocation (and a min of 1 on
611     // allocation).
612     user_requested_size =
613         atomic_load(&p->user_requested_size, memory_order_acquire);
614     if (user_requested_size)
615       return p;
616     return nullptr;
617   }
618 
619   MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) {
620     void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
621     return GetMemprofChunk(alloc_beg, user_requested_size);
622   }
623 
624   uptr AllocationSize(uptr p) {
625     u64 user_requested_size;
626     MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size);
627     if (!m)
628       return 0;
629     if (m->Beg() != p)
630       return 0;
631     return user_requested_size;
632   }
633 
634   void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); }
635 
636   void PrintStats() { allocator.PrintStats(); }
637 
638   void ForceLock() NO_THREAD_SAFETY_ANALYSIS {
639     allocator.ForceLock();
640     fallback_mutex.Lock();
641   }
642 
643   void ForceUnlock() NO_THREAD_SAFETY_ANALYSIS {
644     fallback_mutex.Unlock();
645     allocator.ForceUnlock();
646   }
647 };
648 
649 static Allocator instance(LINKER_INITIALIZED);
650 
651 static MemprofAllocator &get_allocator() { return instance.allocator; }
652 
653 void InitializeAllocator() { instance.InitLinkerInitialized(); }
654 
655 void MemprofThreadLocalMallocStorage::CommitBack() {
656   GET_STACK_TRACE_MALLOC;
657   instance.CommitBack(this, &stack);
658 }
659 
660 void PrintInternalAllocatorStats() { instance.PrintStats(); }
661 
662 void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
663   instance.Deallocate(ptr, 0, 0, stack, alloc_type);
664 }
665 
666 void memprof_delete(void *ptr, uptr size, uptr alignment,
667                     BufferedStackTrace *stack, AllocType alloc_type) {
668   instance.Deallocate(ptr, size, alignment, stack, alloc_type);
669 }
670 
671 void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
672   return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
673 }
674 
675 void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
676   return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
677 }
678 
679 void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
680                            BufferedStackTrace *stack) {
681   if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
682     errno = errno_ENOMEM;
683     if (AllocatorMayReturnNull())
684       return nullptr;
685     ReportReallocArrayOverflow(nmemb, size, stack);
686   }
687   return memprof_realloc(p, nmemb * size, stack);
688 }
689 
690 void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
691   if (!p)
692     return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
693   if (size == 0) {
694     if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
695       instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
696       return nullptr;
697     }
698     // Allocate a size of 1 if we shouldn't free() on Realloc to 0
699     size = 1;
700   }
701   return SetErrnoOnNull(instance.Reallocate(p, size, stack));
702 }
703 
704 void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
705   return SetErrnoOnNull(
706       instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
707 }
708 
709 void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
710   uptr PageSize = GetPageSizeCached();
711   if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
712     errno = errno_ENOMEM;
713     if (AllocatorMayReturnNull())
714       return nullptr;
715     ReportPvallocOverflow(size, stack);
716   }
717   // pvalloc(0) should allocate one page.
718   size = size ? RoundUpTo(size, PageSize) : PageSize;
719   return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
720 }
721 
722 void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
723                        AllocType alloc_type) {
724   if (UNLIKELY(!IsPowerOfTwo(alignment))) {
725     errno = errno_EINVAL;
726     if (AllocatorMayReturnNull())
727       return nullptr;
728     ReportInvalidAllocationAlignment(alignment, stack);
729   }
730   return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
731 }
732 
733 void *memprof_aligned_alloc(uptr alignment, uptr size,
734                             BufferedStackTrace *stack) {
735   if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
736     errno = errno_EINVAL;
737     if (AllocatorMayReturnNull())
738       return nullptr;
739     ReportInvalidAlignedAllocAlignment(size, alignment, stack);
740   }
741   return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
742 }
743 
744 int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
745                            BufferedStackTrace *stack) {
746   if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
747     if (AllocatorMayReturnNull())
748       return errno_EINVAL;
749     ReportInvalidPosixMemalignAlignment(alignment, stack);
750   }
751   void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
752   if (UNLIKELY(!ptr))
753     // OOM error is already taken care of by Allocate.
754     return errno_ENOMEM;
755   CHECK(IsAligned((uptr)ptr, alignment));
756   *memptr = ptr;
757   return 0;
758 }
759 
760 uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
761   if (!ptr)
762     return 0;
763   uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
764   return usable_size;
765 }
766 
767 void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) {
768   instance.SetRssLimitExceeded(limit_exceeded);
769 }
770 
771 } // namespace __memprof
772 
773 // ---------------------- Interface ---------------- {{{1
774 using namespace __memprof;
775 
776 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
777 // Provide default (no-op) implementation of malloc hooks.
778 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr,
779                              uptr size) {
780   (void)ptr;
781   (void)size;
782 }
783 
784 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
785   (void)ptr;
786 }
787 #endif
788 
789 uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
790 
791 int __sanitizer_get_ownership(const void *p) {
792   return memprof_malloc_usable_size(p, 0, 0) != 0;
793 }
794 
795 uptr __sanitizer_get_allocated_size(const void *p) {
796   return memprof_malloc_usable_size(p, 0, 0);
797 }
798 
799 int __memprof_profile_dump() {
800   instance.FinishAndPrint();
801   // In the future we may want to return non-zero if there are any errors
802   // detected during the dumping process.
803   return 0;
804 }
805