xref: /oneTBB/src/tbbmalloc/backref.cpp (revision b15aabb3)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include <new>        /* for placement new */
19 
20 namespace rml {
21 namespace internal {
22 
23 
24 /********* backreferences ***********************/
25 /* Each slab block and each large memory object header contains BackRefIdx
26  * that points out in some BackRefBlock which points back to this block or header.
27  */
28 struct BackRefBlock : public BlockI {
29     BackRefBlock *nextForUse;     // the next in the chain of blocks with free items
30     FreeObject   *bumpPtr;        // bump pointer moves from the end to the beginning of the block
31     FreeObject   *freeList;
32     // list of all blocks that were allocated from raw mem (i.e., not from backend)
33     BackRefBlock *nextRawMemBlock;
34     int           allocatedCount; // the number of objects allocated
35     BackRefIdx::master_t myNum;   // the index in the master
36     MallocMutex   blockMutex;
37     // true if this block has been added to the listForUse chain,
38     // modifications protected by masterMutex
39     bool          addedToForUse;
40 
41     BackRefBlock(const BackRefBlock *blockToUse, intptr_t num) :
42         nextForUse(NULL), bumpPtr((FreeObject*)((uintptr_t)blockToUse + slabSize - sizeof(void*))),
43         freeList(NULL), nextRawMemBlock(NULL), allocatedCount(0), myNum(num),
44         addedToForUse(false) {
45         memset(&blockMutex, 0, sizeof(MallocMutex));
46 
47         MALLOC_ASSERT(!(num >> CHAR_BIT*sizeof(BackRefIdx::master_t)),
48                       "index in BackRefMaster must fit to BackRefIdx::master");
49     }
50     // clean all but header
51     void zeroSet() { memset(this+1, 0, BackRefBlock::bytes-sizeof(BackRefBlock)); }
52     static const int bytes = slabSize;
53 };
54 
55 // max number of backreference pointers in slab block
56 static const int BR_MAX_CNT = (BackRefBlock::bytes-sizeof(BackRefBlock))/sizeof(void*);
57 
58 struct BackRefMaster {
59 /* On 64-bit systems a slab block can hold up to ~2K back pointers to slab blocks
60  * or large objects, so it can address at least 32MB. The master array of 256KB
61  * holds 32K pointers to such blocks, addressing ~1 TB.
62  * On 32-bit systems there is ~4K back pointers in a slab block, so ~64MB can be addressed.
63  * The master array of 8KB holds 2K pointers to leaves, so ~128 GB can addressed.
64  */
65     static const size_t bytes = sizeof(uintptr_t)>4? 256*1024 : 8*1024;
66     static const int dataSz;
67 /* space is reserved for master table and 4 leaves
68    taking into account VirtualAlloc allocation granularity */
69     static const int leaves = 4;
70     static const size_t masterSize = BackRefMaster::bytes+leaves*BackRefBlock::bytes;
71     // The size of memory request for a few more leaf blocks;
72     // selected to match VirtualAlloc granularity
73     static const size_t blockSpaceSize = 64*1024;
74 
75     Backend       *backend;
76     BackRefBlock  *active;         // if defined, use it for allocations
77     BackRefBlock  *listForUse;     // the chain of data blocks with free items
78     BackRefBlock  *allRawMemBlocks;
79     std::atomic <intptr_t> lastUsed; // index of the last used block
80     bool           rawMemUsed;
81     MallocMutex    requestNewSpaceMutex;
82     BackRefBlock  *backRefBl[1];   // the real size of the array is dataSz
83 
84     BackRefBlock *findFreeBlock();
85     void          addToForUseList(BackRefBlock *bl);
86     void          initEmptyBackRefBlock(BackRefBlock *newBl);
87     bool          requestNewSpace();
88 };
89 
90 const int BackRefMaster::dataSz
91     = 1+(BackRefMaster::bytes-sizeof(BackRefMaster))/sizeof(BackRefBlock*);
92 
93 static MallocMutex masterMutex;
94 static std::atomic<BackRefMaster*> backRefMaster;
95 
96 bool initBackRefMaster(Backend *backend)
97 {
98     bool rawMemUsed;
99     BackRefMaster *master =
100         (BackRefMaster*)backend->getBackRefSpace(BackRefMaster::masterSize,
101                                                  &rawMemUsed);
102     if (! master)
103         return false;
104     master->backend = backend;
105     master->listForUse = master->allRawMemBlocks = NULL;
106     master->rawMemUsed = rawMemUsed;
107     master->lastUsed = -1;
108     memset(&master->requestNewSpaceMutex, 0, sizeof(MallocMutex));
109     for (int i=0; i<BackRefMaster::leaves; i++) {
110         BackRefBlock *bl = (BackRefBlock*)((uintptr_t)master + BackRefMaster::bytes + i*BackRefBlock::bytes);
111         bl->zeroSet();
112         master->initEmptyBackRefBlock(bl);
113         if (i)
114             master->addToForUseList(bl);
115         else // active leaf is not needed in listForUse
116             master->active = bl;
117     }
118     // backRefMaster is read in getBackRef, so publish it in consistent state
119     backRefMaster.store(master, std::memory_order_release);
120     return true;
121 }
122 
123 void destroyBackRefMaster(Backend *backend)
124 {
125     if (backRefMaster.load(std::memory_order_acquire)) { // Is initBackRefMaster() called?
126         for (BackRefBlock *curr = backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks; curr; ) {
127             BackRefBlock *next = curr->nextRawMemBlock;
128             // allRawMemBlocks list is only for raw mem blocks
129             backend->putBackRefSpace(curr, BackRefMaster::blockSpaceSize,
130                                      /*rawMemUsed=*/true);
131             curr = next;
132         }
133         backend->putBackRefSpace(backRefMaster.load(std::memory_order_relaxed), BackRefMaster::masterSize,
134                                  backRefMaster.load(std::memory_order_relaxed)->rawMemUsed);
135     }
136 }
137 
138 void BackRefMaster::addToForUseList(BackRefBlock *bl)
139 {
140     bl->nextForUse = listForUse;
141     listForUse = bl;
142     bl->addedToForUse = true;
143 }
144 
145 void BackRefMaster::initEmptyBackRefBlock(BackRefBlock *newBl)
146 {
147     intptr_t nextLU = lastUsed+1;
148     new (newBl) BackRefBlock(newBl, nextLU);
149     MALLOC_ASSERT(nextLU < dataSz, NULL);
150     backRefBl[nextLU] = newBl;
151     // lastUsed is read in getBackRef, and access to backRefBl[lastUsed]
152     // is possible only after checking backref against current lastUsed
153     lastUsed.store(nextLU, std::memory_order_release);
154 }
155 
156 bool BackRefMaster::requestNewSpace()
157 {
158     bool isRawMemUsed;
159     static_assert(!(blockSpaceSize % BackRefBlock::bytes),
160                          "Must request space for whole number of blocks.");
161 
162     if (backRefMaster.load(std::memory_order_relaxed)->dataSz <= lastUsed + 1) // no space in master
163         return false;
164 
165     // only one thread at a time may add blocks
166     MallocMutex::scoped_lock newSpaceLock(requestNewSpaceMutex);
167 
168     if (listForUse) // double check that only one block is available
169         return true;
170     BackRefBlock *newBl = (BackRefBlock*)backend->getBackRefSpace(blockSpaceSize, &isRawMemUsed);
171     if (!newBl) return false;
172 
173     // touch a page for the 1st time without taking masterMutex ...
174     for (BackRefBlock *bl = newBl; (uintptr_t)bl < (uintptr_t)newBl + blockSpaceSize;
175          bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes)) {
176         bl->zeroSet();
177     }
178 
179     MallocMutex::scoped_lock lock(masterMutex); // ... and share under lock
180 
181     const size_t numOfUnusedIdxs = backRefMaster.load(std::memory_order_relaxed)->dataSz - lastUsed - 1;
182     if (numOfUnusedIdxs <= 0) { // no space in master under lock, roll back
183         backend->putBackRefSpace(newBl, blockSpaceSize, isRawMemUsed);
184         return false;
185     }
186     // It's possible that only part of newBl is used, due to lack of indices in master.
187     // This is OK as such underutilization is possible only once for backreferneces table.
188     int blocksToUse = min(numOfUnusedIdxs, blockSpaceSize / BackRefBlock::bytes);
189 
190     // use the first block in the batch to maintain the list of "raw" memory
191     // to be released at shutdown
192     if (isRawMemUsed) {
193         newBl->nextRawMemBlock = backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks;
194         backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks = newBl;
195     }
196     for (BackRefBlock *bl = newBl; blocksToUse>0; bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes), blocksToUse--) {
197         initEmptyBackRefBlock(bl);
198         if (active->allocatedCount == BR_MAX_CNT) {
199             active = bl; // active leaf is not needed in listForUse
200         } else {
201             addToForUseList(bl);
202         }
203     }
204     return true;
205 }
206 
207 BackRefBlock *BackRefMaster::findFreeBlock()
208 {
209     if (active->allocatedCount < BR_MAX_CNT)
210         return active;
211 
212     if (listForUse) {                                   // use released list
213         MallocMutex::scoped_lock lock(masterMutex);
214 
215         if (active->allocatedCount == BR_MAX_CNT && listForUse) {
216             active = listForUse;
217             listForUse = listForUse->nextForUse;
218             MALLOC_ASSERT(active->addedToForUse, ASSERT_TEXT);
219             active->addedToForUse = false;
220         }
221     } else // allocate new data node
222         if (!requestNewSpace())
223             return NULL;
224     return active;
225 }
226 
227 void *getBackRef(BackRefIdx backRefIdx)
228 {
229     // !backRefMaster means no initialization done, so it can't be valid memory
230     // see addEmptyBackRefBlock for fences around lastUsed
231     if (!(backRefMaster.load(std::memory_order_acquire))
232         || backRefIdx.getMaster() > (backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_acquire))
233         || backRefIdx.getOffset() >= BR_MAX_CNT)
234         return NULL;
235     return *(void**)((uintptr_t)backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()]
236                      + sizeof(BackRefBlock)+backRefIdx.getOffset()*sizeof(void*));
237 }
238 
239 void setBackRef(BackRefIdx backRefIdx, void *newPtr)
240 {
241     MALLOC_ASSERT(backRefIdx.getMaster()<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
242                                  && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
243     *(void**)((uintptr_t)backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()]
244               + sizeof(BackRefBlock) + backRefIdx.getOffset()*sizeof(void*)) = newPtr;
245 }
246 
247 BackRefIdx BackRefIdx::newBackRef(bool largeObj)
248 {
249     BackRefBlock *blockToUse;
250     void **toUse;
251     BackRefIdx res;
252     bool lastBlockFirstUsed = false;
253 
254     do {
255         MALLOC_ASSERT(backRefMaster.load(std::memory_order_relaxed), ASSERT_TEXT);
256         blockToUse = backRefMaster.load(std::memory_order_relaxed)->findFreeBlock();
257         if (!blockToUse)
258             return BackRefIdx();
259         toUse = NULL;
260         { // the block is locked to find a reference
261             MallocMutex::scoped_lock lock(blockToUse->blockMutex);
262 
263             if (blockToUse->freeList) {
264                 toUse = (void**)blockToUse->freeList;
265                 blockToUse->freeList = blockToUse->freeList->next;
266                 MALLOC_ASSERT(!blockToUse->freeList ||
267                               ((uintptr_t)blockToUse->freeList>=(uintptr_t)blockToUse
268                                && (uintptr_t)blockToUse->freeList <
269                                (uintptr_t)blockToUse + slabSize), ASSERT_TEXT);
270             } else if (blockToUse->allocatedCount < BR_MAX_CNT) {
271                 toUse = (void**)blockToUse->bumpPtr;
272                 blockToUse->bumpPtr =
273                     (FreeObject*)((uintptr_t)blockToUse->bumpPtr - sizeof(void*));
274                 if (blockToUse->allocatedCount == BR_MAX_CNT-1) {
275                     MALLOC_ASSERT((uintptr_t)blockToUse->bumpPtr
276                                   < (uintptr_t)blockToUse+sizeof(BackRefBlock),
277                                   ASSERT_TEXT);
278                     blockToUse->bumpPtr = NULL;
279                 }
280             }
281             if (toUse) {
282                 if (!blockToUse->allocatedCount && !backRefMaster.load(std::memory_order_relaxed)->listForUse)
283                     lastBlockFirstUsed = true;
284                 blockToUse->allocatedCount++;
285             }
286         } // end of lock scope
287     } while (!toUse);
288     // The first thread that uses the last block requests new space in advance;
289     // possible failures are ignored.
290     if (lastBlockFirstUsed)
291         backRefMaster.load(std::memory_order_relaxed)->requestNewSpace();
292 
293     res.master = blockToUse->myNum;
294     uintptr_t offset =
295         ((uintptr_t)toUse - ((uintptr_t)blockToUse + sizeof(BackRefBlock)))/sizeof(void*);
296     // Is offset too big?
297     MALLOC_ASSERT(!(offset >> 15), ASSERT_TEXT);
298     res.offset = offset;
299     if (largeObj) res.largeObj = largeObj;
300 
301     return res;
302 }
303 
304 void removeBackRef(BackRefIdx backRefIdx)
305 {
306     MALLOC_ASSERT(!backRefIdx.isInvalid(), ASSERT_TEXT);
307     MALLOC_ASSERT(backRefIdx.getMaster()<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
308                   && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
309     BackRefBlock *currBlock = backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()];
310     FreeObject *freeObj = (FreeObject*)((uintptr_t)currBlock + sizeof(BackRefBlock)
311                                         + backRefIdx.getOffset()*sizeof(void*));
312     MALLOC_ASSERT(((uintptr_t)freeObj>(uintptr_t)currBlock &&
313                    (uintptr_t)freeObj<(uintptr_t)currBlock + slabSize), ASSERT_TEXT);
314     {
315         MallocMutex::scoped_lock lock(currBlock->blockMutex);
316 
317         freeObj->next = currBlock->freeList;
318         MALLOC_ASSERT(!freeObj->next ||
319                       ((uintptr_t)freeObj->next > (uintptr_t)currBlock
320                        && (uintptr_t)freeObj->next <
321                        (uintptr_t)currBlock + slabSize), ASSERT_TEXT);
322         currBlock->freeList = freeObj;
323         currBlock->allocatedCount--;
324     }
325     // TODO: do we need double-check here?
326     if (!currBlock->addedToForUse && currBlock!=backRefMaster.load(std::memory_order_relaxed)->active) {
327         MallocMutex::scoped_lock lock(masterMutex);
328 
329         if (!currBlock->addedToForUse && currBlock!=backRefMaster.load(std::memory_order_relaxed)->active)
330             backRefMaster.load(std::memory_order_relaxed)->addToForUseList(currBlock);
331     }
332 }
333 
334 /********* End of backreferences ***********************/
335 
336 } // namespace internal
337 } // namespace rml
338 
339