1*d6d569fcSNico Weber #include <stdio.h>
2*d6d569fcSNico Weber #include <vector>
3*d6d569fcSNico Weber #include <pthread.h>
4*d6d569fcSNico Weber #include <malloc.h>
5*d6d569fcSNico Weber #include <algorithm>
6*d6d569fcSNico Weber
7*d6d569fcSNico Weber using namespace std;
8*d6d569fcSNico Weber
9*d6d569fcSNico Weber const size_t kNumThreds = 16;
10*d6d569fcSNico Weber const size_t kNumIters = 1 << 23;
11*d6d569fcSNico Weber
break_optimization(void * arg)12*d6d569fcSNico Weber inline void break_optimization(void *arg) {
13*d6d569fcSNico Weber __asm__ __volatile__("" : : "r" (arg) : "memory");
14*d6d569fcSNico Weber }
15*d6d569fcSNico Weber
16*d6d569fcSNico Weber __attribute__((noinline))
MallocThread(void * t)17*d6d569fcSNico Weber static void *MallocThread(void *t) {
18*d6d569fcSNico Weber size_t total_malloced = 0, total_freed = 0;
19*d6d569fcSNico Weber size_t max_in_use = 0;
20*d6d569fcSNico Weber size_t tid = reinterpret_cast<size_t>(t);
21*d6d569fcSNico Weber vector<pair<char *, size_t> > allocated;
22*d6d569fcSNico Weber allocated.reserve(kNumIters);
23*d6d569fcSNico Weber for (size_t i = 1; i < kNumIters; i++) {
24*d6d569fcSNico Weber if ((i % (kNumIters / 4)) == 0 && tid == 0)
25*d6d569fcSNico Weber fprintf(stderr, " T[%ld] iter %ld\n", tid, i);
26*d6d569fcSNico Weber bool allocate = (i % 5) <= 2; // 60% malloc, 40% free
27*d6d569fcSNico Weber if (i > kNumIters / 4)
28*d6d569fcSNico Weber allocate = i % 2; // then switch to 50% malloc, 50% free
29*d6d569fcSNico Weber if (allocate) {
30*d6d569fcSNico Weber size_t size = 1 + (i % 200);
31*d6d569fcSNico Weber if ((i % 10001) == 0)
32*d6d569fcSNico Weber size *= 4096;
33*d6d569fcSNico Weber total_malloced += size;
34*d6d569fcSNico Weber char *x = new char[size];
35*d6d569fcSNico Weber x[0] = x[size - 1] = x[size / 2] = 0;
36*d6d569fcSNico Weber allocated.push_back(make_pair(x, size));
37*d6d569fcSNico Weber max_in_use = max(max_in_use, total_malloced - total_freed);
38*d6d569fcSNico Weber } else {
39*d6d569fcSNico Weber if (allocated.empty()) continue;
40*d6d569fcSNico Weber size_t slot = i % allocated.size();
41*d6d569fcSNico Weber char *p = allocated[slot].first;
42*d6d569fcSNico Weber p[0] = 0; // emulate last user touch of the block
43*d6d569fcSNico Weber size_t size = allocated[slot].second;
44*d6d569fcSNico Weber total_freed += size;
45*d6d569fcSNico Weber swap(allocated[slot], allocated.back());
46*d6d569fcSNico Weber allocated.pop_back();
47*d6d569fcSNico Weber delete [] p;
48*d6d569fcSNico Weber }
49*d6d569fcSNico Weber }
50*d6d569fcSNico Weber if (tid == 0)
51*d6d569fcSNico Weber fprintf(stderr, " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
52*d6d569fcSNico Weber tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
53*d6d569fcSNico Weber max_in_use >> 20);
54*d6d569fcSNico Weber for (size_t i = 0; i < allocated.size(); i++)
55*d6d569fcSNico Weber delete [] allocated[i].first;
56*d6d569fcSNico Weber return 0;
57*d6d569fcSNico Weber }
58*d6d569fcSNico Weber
59*d6d569fcSNico Weber template <int depth>
60*d6d569fcSNico Weber struct DeepStack {
61*d6d569fcSNico Weber __attribute__((noinline))
runDeepStack62*d6d569fcSNico Weber static void *run(void *t) {
63*d6d569fcSNico Weber break_optimization(0);
64*d6d569fcSNico Weber DeepStack<depth - 1>::run(t);
65*d6d569fcSNico Weber break_optimization(0);
66*d6d569fcSNico Weber return 0;
67*d6d569fcSNico Weber }
68*d6d569fcSNico Weber };
69*d6d569fcSNico Weber
70*d6d569fcSNico Weber template<>
71*d6d569fcSNico Weber struct DeepStack<0> {
runDeepStack72*d6d569fcSNico Weber static void *run(void *t) {
73*d6d569fcSNico Weber MallocThread(t);
74*d6d569fcSNico Weber return 0;
75*d6d569fcSNico Weber }
76*d6d569fcSNico Weber };
77*d6d569fcSNico Weber
78*d6d569fcSNico Weber // Build with -Dstandalone_malloc_test=main to make it a separate program.
standalone_malloc_test()79*d6d569fcSNico Weber int standalone_malloc_test() {
80*d6d569fcSNico Weber pthread_t t[kNumThreds];
81*d6d569fcSNico Weber for (size_t i = 0; i < kNumThreds; i++)
82*d6d569fcSNico Weber pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
83*d6d569fcSNico Weber for (size_t i = 0; i < kNumThreds; i++)
84*d6d569fcSNico Weber pthread_join(t[i], 0);
85*d6d569fcSNico Weber malloc_stats();
86*d6d569fcSNico Weber return 0;
87*d6d569fcSNico Weber }
88