xref: /oneTBB/src/tbbmalloc/backref.cpp (revision 1ecde27f)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include <new>        /* for placement new */
19 
20 namespace rml {
21 namespace internal {
22 
23 
24 /********* backreferences ***********************/
25 /* Each slab block and each large memory object header contains BackRefIdx
26  * that points out in some BackRefBlock which points back to this block or header.
27  */
28 struct BackRefBlock : public BlockI {
29     BackRefBlock *nextForUse;     // the next in the chain of blocks with free items
30     FreeObject   *bumpPtr;        // bump pointer moves from the end to the beginning of the block
31     FreeObject   *freeList;
32     // list of all blocks that were allocated from raw mem (i.e., not from backend)
33     BackRefBlock *nextRawMemBlock;
34     std::atomic<int> allocatedCount; // the number of objects allocated
35     BackRefIdx::main_t myNum;   // the index in the main
36     MallocMutex   blockMutex;
37     // true if this block has been added to the listForUse chain,
38     // modifications protected by mainMutex
39     std::atomic<bool> addedToForUse;
40 
41     BackRefBlock(const BackRefBlock *blockToUse, intptr_t num) :
42         nextForUse(NULL), bumpPtr((FreeObject*)((uintptr_t)blockToUse + slabSize - sizeof(void*))),
43         freeList(NULL), nextRawMemBlock(NULL), allocatedCount(0), myNum(num),
44         addedToForUse(false) {
45         memset(&blockMutex, 0, sizeof(MallocMutex));
46 
47         MALLOC_ASSERT(!(num >> CHAR_BIT*sizeof(BackRefIdx::main_t)),
48                       "index in BackRefMain must fit to BackRefIdx::main");
49     }
50     // clean all but header
51     void zeroSet() { memset(this+1, 0, BackRefBlock::bytes-sizeof(BackRefBlock)); }
52     static const int bytes = slabSize;
53 };
54 
55 // max number of backreference pointers in slab block
56 static const int BR_MAX_CNT = (BackRefBlock::bytes-sizeof(BackRefBlock))/sizeof(void*);
57 
58 struct BackRefMain {
59 /* On 64-bit systems a slab block can hold up to ~2K back pointers to slab blocks
60  * or large objects, so it can address at least 32MB. The main array of 256KB
61  * holds 32K pointers to such blocks, addressing ~1 TB.
62  * On 32-bit systems there is ~4K back pointers in a slab block, so ~64MB can be addressed.
63  * The main array of 8KB holds 2K pointers to leaves, so ~128 GB can addressed.
64  */
65     static const size_t bytes = sizeof(uintptr_t)>4? 256*1024 : 8*1024;
66     static const int dataSz;
67 /* space is reserved for main table and 4 leaves
68    taking into account VirtualAlloc allocation granularity */
69     static const int leaves = 4;
70     static const size_t mainSize = BackRefMain::bytes+leaves*BackRefBlock::bytes;
71     // The size of memory request for a few more leaf blocks;
72     // selected to match VirtualAlloc granularity
73     static const size_t blockSpaceSize = 64*1024;
74 
75     Backend       *backend;
76     std::atomic<BackRefBlock*> active;         // if defined, use it for allocations
77     std::atomic<BackRefBlock*> listForUse;     // the chain of data blocks with free items
78     BackRefBlock  *allRawMemBlocks;
79     std::atomic <intptr_t> lastUsed; // index of the last used block
80     bool           rawMemUsed;
81     MallocMutex    requestNewSpaceMutex;
82     BackRefBlock  *backRefBl[1];   // the real size of the array is dataSz
83 
84     BackRefBlock *findFreeBlock();
85     void          addToForUseList(BackRefBlock *bl);
86     void          initEmptyBackRefBlock(BackRefBlock *newBl);
87     bool          requestNewSpace();
88 };
89 
90 const int BackRefMain::dataSz
91     = 1+(BackRefMain::bytes-sizeof(BackRefMain))/sizeof(BackRefBlock*);
92 
93 static MallocMutex mainMutex;
94 static std::atomic<BackRefMain*> backRefMain;
95 
96 bool initBackRefMain(Backend *backend)
97 {
98     bool rawMemUsed;
99     BackRefMain *main =
100         (BackRefMain*)backend->getBackRefSpace(BackRefMain::mainSize,
101                                                  &rawMemUsed);
102     if (! main)
103         return false;
104     main->backend = backend;
105     main->listForUse.store(nullptr, std::memory_order_relaxed);
106     main->allRawMemBlocks = nullptr;
107     main->rawMemUsed = rawMemUsed;
108     main->lastUsed = -1;
109     memset(&main->requestNewSpaceMutex, 0, sizeof(MallocMutex));
110     for (int i=0; i<BackRefMain::leaves; i++) {
111         BackRefBlock *bl = (BackRefBlock*)((uintptr_t)main + BackRefMain::bytes + i*BackRefBlock::bytes);
112         bl->zeroSet();
113         main->initEmptyBackRefBlock(bl);
114         if (i)
115             main->addToForUseList(bl);
116         else // active leaf is not needed in listForUse
117             main->active.store(bl, std::memory_order_relaxed);
118     }
119     // backRefMain is read in getBackRef, so publish it in consistent state
120     backRefMain.store(main, std::memory_order_release);
121     return true;
122 }
123 
124 #if __TBB_SOURCE_DIRECTLY_INCLUDED
125 void destroyBackRefMain(Backend *backend)
126 {
127     if (backRefMain.load(std::memory_order_acquire)) { // Is initBackRefMain() called?
128         for (BackRefBlock *curr = backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks; curr; ) {
129             BackRefBlock *next = curr->nextRawMemBlock;
130             // allRawMemBlocks list is only for raw mem blocks
131             backend->putBackRefSpace(curr, BackRefMain::blockSpaceSize,
132                                      /*rawMemUsed=*/true);
133             curr = next;
134         }
135         backend->putBackRefSpace(backRefMain.load(std::memory_order_relaxed), BackRefMain::mainSize,
136                                  backRefMain.load(std::memory_order_relaxed)->rawMemUsed);
137     }
138 }
139 #endif
140 
141 void BackRefMain::addToForUseList(BackRefBlock *bl)
142 {
143     bl->nextForUse = listForUse.load(std::memory_order_relaxed);
144     listForUse.store(bl, std::memory_order_relaxed);
145     bl->addedToForUse.store(true, std::memory_order_relaxed);
146 }
147 
148 void BackRefMain::initEmptyBackRefBlock(BackRefBlock *newBl)
149 {
150     intptr_t nextLU = lastUsed+1;
151     new (newBl) BackRefBlock(newBl, nextLU);
152     MALLOC_ASSERT(nextLU < dataSz, NULL);
153     backRefBl[nextLU] = newBl;
154     // lastUsed is read in getBackRef, and access to backRefBl[lastUsed]
155     // is possible only after checking backref against current lastUsed
156     lastUsed.store(nextLU, std::memory_order_release);
157 }
158 
159 bool BackRefMain::requestNewSpace()
160 {
161     bool isRawMemUsed;
162     static_assert(!(blockSpaceSize % BackRefBlock::bytes),
163                          "Must request space for whole number of blocks.");
164 
165     if (backRefMain.load(std::memory_order_relaxed)->dataSz <= lastUsed + 1) // no space in main
166         return false;
167 
168     // only one thread at a time may add blocks
169     MallocMutex::scoped_lock newSpaceLock(requestNewSpaceMutex);
170 
171     if (listForUse.load(std::memory_order_relaxed)) // double check that only one block is available
172         return true;
173     BackRefBlock *newBl = (BackRefBlock*)backend->getBackRefSpace(blockSpaceSize, &isRawMemUsed);
174     if (!newBl) return false;
175 
176     // touch a page for the 1st time without taking mainMutex ...
177     for (BackRefBlock *bl = newBl; (uintptr_t)bl < (uintptr_t)newBl + blockSpaceSize;
178          bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes)) {
179         bl->zeroSet();
180     }
181 
182     MallocMutex::scoped_lock lock(mainMutex); // ... and share under lock
183 
184     const size_t numOfUnusedIdxs = backRefMain.load(std::memory_order_relaxed)->dataSz - lastUsed - 1;
185     if (numOfUnusedIdxs <= 0) { // no space in main under lock, roll back
186         backend->putBackRefSpace(newBl, blockSpaceSize, isRawMemUsed);
187         return false;
188     }
189     // It's possible that only part of newBl is used, due to lack of indices in main.
190     // This is OK as such underutilization is possible only once for backreferneces table.
191     int blocksToUse = min(numOfUnusedIdxs, blockSpaceSize / BackRefBlock::bytes);
192 
193     // use the first block in the batch to maintain the list of "raw" memory
194     // to be released at shutdown
195     if (isRawMemUsed) {
196         newBl->nextRawMemBlock = backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks;
197         backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks = newBl;
198     }
199     for (BackRefBlock *bl = newBl; blocksToUse>0; bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes), blocksToUse--) {
200         initEmptyBackRefBlock(bl);
201         if (active.load(std::memory_order_relaxed)->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
202             active.store(bl, std::memory_order_release); // active leaf is not needed in listForUse
203         } else {
204             addToForUseList(bl);
205         }
206     }
207     return true;
208 }
209 
210 BackRefBlock *BackRefMain::findFreeBlock()
211 {
212     BackRefBlock* active_block = active.load(std::memory_order_acquire);
213     MALLOC_ASSERT(active_block, ASSERT_TEXT);
214 
215     if (active_block->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT)
216         return active_block;
217 
218     if (listForUse.load(std::memory_order_relaxed)) { // use released list
219         MallocMutex::scoped_lock lock(mainMutex);
220 
221         if (active_block->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
222             active_block = listForUse.load(std::memory_order_relaxed);
223             if (active_block) {
224                 active.store(active_block, std::memory_order_release);
225                 listForUse.store(active_block->nextForUse, std::memory_order_relaxed);
226                 MALLOC_ASSERT(active_block->addedToForUse.load(std::memory_order_relaxed), ASSERT_TEXT);
227                 active_block->addedToForUse.store(false, std::memory_order_relaxed);
228             }
229         }
230     } else // allocate new data node
231         if (!requestNewSpace())
232             return NULL;
233     return active.load(std::memory_order_acquire); // reread because of requestNewSpace
234 }
235 
236 void *getBackRef(BackRefIdx backRefIdx)
237 {
238     // !backRefMain means no initialization done, so it can't be valid memory
239     // see addEmptyBackRefBlock for fences around lastUsed
240     if (!(backRefMain.load(std::memory_order_acquire))
241         || backRefIdx.getMain() > (backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_acquire))
242         || backRefIdx.getOffset() >= BR_MAX_CNT)
243     {
244         return NULL;
245     }
246     std::atomic<void*>& backRefEntry = *(std::atomic<void*>*)(
247             (uintptr_t)backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()]
248             + sizeof(BackRefBlock) + backRefIdx.getOffset() * sizeof(std::atomic<void*>)
249         );
250     return backRefEntry.load(std::memory_order_relaxed);
251 }
252 
253 void setBackRef(BackRefIdx backRefIdx, void *newPtr)
254 {
255     MALLOC_ASSERT(backRefIdx.getMain()<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
256                                  && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
257     ((std::atomic<void*>*)((uintptr_t)backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()]
258         + sizeof(BackRefBlock) + backRefIdx.getOffset() * sizeof(void*)))->store(newPtr, std::memory_order_relaxed);
259 }
260 
261 BackRefIdx BackRefIdx::newBackRef(bool largeObj)
262 {
263     BackRefBlock *blockToUse;
264     void **toUse;
265     BackRefIdx res;
266     bool lastBlockFirstUsed = false;
267 
268     do {
269         MALLOC_ASSERT(backRefMain.load(std::memory_order_relaxed), ASSERT_TEXT);
270         blockToUse = backRefMain.load(std::memory_order_relaxed)->findFreeBlock();
271         if (!blockToUse)
272             return BackRefIdx();
273         toUse = NULL;
274         { // the block is locked to find a reference
275             MallocMutex::scoped_lock lock(blockToUse->blockMutex);
276 
277             if (blockToUse->freeList) {
278                 toUse = (void**)blockToUse->freeList;
279                 blockToUse->freeList = blockToUse->freeList->next;
280                 MALLOC_ASSERT(!blockToUse->freeList ||
281                               ((uintptr_t)blockToUse->freeList>=(uintptr_t)blockToUse
282                                && (uintptr_t)blockToUse->freeList <
283                                (uintptr_t)blockToUse + slabSize), ASSERT_TEXT);
284             } else if (blockToUse->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT) {
285                 toUse = (void**)blockToUse->bumpPtr;
286                 blockToUse->bumpPtr =
287                     (FreeObject*)((uintptr_t)blockToUse->bumpPtr - sizeof(void*));
288                 if (blockToUse->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT-1) {
289                     MALLOC_ASSERT((uintptr_t)blockToUse->bumpPtr
290                                   < (uintptr_t)blockToUse+sizeof(BackRefBlock),
291                                   ASSERT_TEXT);
292                     blockToUse->bumpPtr = NULL;
293                 }
294             }
295             if (toUse) {
296                 if (!blockToUse->allocatedCount.load(std::memory_order_relaxed) &&
297                     !backRefMain.load(std::memory_order_relaxed)->listForUse.load(std::memory_order_relaxed)) {
298                     lastBlockFirstUsed = true;
299                 }
300                 blockToUse->allocatedCount.store(blockToUse->allocatedCount.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
301             }
302         } // end of lock scope
303     } while (!toUse);
304     // The first thread that uses the last block requests new space in advance;
305     // possible failures are ignored.
306     if (lastBlockFirstUsed)
307         backRefMain.load(std::memory_order_relaxed)->requestNewSpace();
308 
309     res.main = blockToUse->myNum;
310     uintptr_t offset =
311         ((uintptr_t)toUse - ((uintptr_t)blockToUse + sizeof(BackRefBlock)))/sizeof(void*);
312     // Is offset too big?
313     MALLOC_ASSERT(!(offset >> 15), ASSERT_TEXT);
314     res.offset = offset;
315     if (largeObj) res.largeObj = largeObj;
316 
317     return res;
318 }
319 
320 void removeBackRef(BackRefIdx backRefIdx)
321 {
322     MALLOC_ASSERT(!backRefIdx.isInvalid(), ASSERT_TEXT);
323     MALLOC_ASSERT(backRefIdx.getMain()<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
324                   && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
325     BackRefBlock *currBlock = backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()];
326     std::atomic<void*>& backRefEntry = *(std::atomic<void*>*)((uintptr_t)currBlock + sizeof(BackRefBlock)
327                                         + backRefIdx.getOffset()*sizeof(std::atomic<void*>));
328     MALLOC_ASSERT(((uintptr_t)&backRefEntry >(uintptr_t)currBlock &&
329                    (uintptr_t)&backRefEntry <(uintptr_t)currBlock + slabSize), ASSERT_TEXT);
330     {
331         MallocMutex::scoped_lock lock(currBlock->blockMutex);
332 
333         backRefEntry.store(currBlock->freeList, std::memory_order_relaxed);
334 #if MALLOC_DEBUG
335         uintptr_t backRefEntryValue = (uintptr_t)backRefEntry.load(std::memory_order_relaxed);
336         MALLOC_ASSERT(!backRefEntryValue ||
337                       (backRefEntryValue > (uintptr_t)currBlock
338                        && backRefEntryValue < (uintptr_t)currBlock + slabSize), ASSERT_TEXT);
339 #endif
340         currBlock->freeList = (FreeObject*)&backRefEntry;
341         currBlock->allocatedCount.store(currBlock->allocatedCount.load(std::memory_order_relaxed)-1, std::memory_order_relaxed);
342     }
343     // TODO: do we need double-check here?
344     if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
345         currBlock!=backRefMain.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed)) {
346         MallocMutex::scoped_lock lock(mainMutex);
347 
348         if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
349             currBlock!=backRefMain.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed))
350             backRefMain.load(std::memory_order_relaxed)->addToForUseList(currBlock);
351     }
352 }
353 
354 /********* End of backreferences ***********************/
355 
356 } // namespace internal
357 } // namespace rml
358 
359