xref: /oneTBB/src/tbbmalloc/backref.cpp (revision 4a23d002)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include <new>        /* for placement new */
19 
20 namespace rml {
21 namespace internal {
22 
23 
24 /********* backreferences ***********************/
25 /* Each slab block and each large memory object header contains BackRefIdx
26  * that points out in some BackRefBlock which points back to this block or header.
27  */
28 struct BackRefBlock : public BlockI {
29     BackRefBlock *nextForUse;     // the next in the chain of blocks with free items
30     FreeObject   *bumpPtr;        // bump pointer moves from the end to the beginning of the block
31     FreeObject   *freeList;
32     // list of all blocks that were allocated from raw mem (i.e., not from backend)
33     BackRefBlock *nextRawMemBlock;
34     std::atomic<int> allocatedCount; // the number of objects allocated
35     BackRefIdx::master_t myNum;   // the index in the master
36     MallocMutex   blockMutex;
37     // true if this block has been added to the listForUse chain,
38     // modifications protected by masterMutex
39     std::atomic<bool> addedToForUse;
40 
41     BackRefBlock(const BackRefBlock *blockToUse, intptr_t num) :
42         nextForUse(NULL), bumpPtr((FreeObject*)((uintptr_t)blockToUse + slabSize - sizeof(void*))),
43         freeList(NULL), nextRawMemBlock(NULL), allocatedCount(0), myNum(num),
44         addedToForUse(false) {
45         memset(&blockMutex, 0, sizeof(MallocMutex));
46 
47         MALLOC_ASSERT(!(num >> CHAR_BIT*sizeof(BackRefIdx::master_t)),
48                       "index in BackRefMaster must fit to BackRefIdx::master");
49     }
50     // clean all but header
51     void zeroSet() { memset(this+1, 0, BackRefBlock::bytes-sizeof(BackRefBlock)); }
52     static const int bytes = slabSize;
53 };
54 
55 // max number of backreference pointers in slab block
56 static const int BR_MAX_CNT = (BackRefBlock::bytes-sizeof(BackRefBlock))/sizeof(void*);
57 
58 struct BackRefMaster {
59 /* On 64-bit systems a slab block can hold up to ~2K back pointers to slab blocks
60  * or large objects, so it can address at least 32MB. The master array of 256KB
61  * holds 32K pointers to such blocks, addressing ~1 TB.
62  * On 32-bit systems there is ~4K back pointers in a slab block, so ~64MB can be addressed.
63  * The master array of 8KB holds 2K pointers to leaves, so ~128 GB can addressed.
64  */
65     static const size_t bytes = sizeof(uintptr_t)>4? 256*1024 : 8*1024;
66     static const int dataSz;
67 /* space is reserved for master table and 4 leaves
68    taking into account VirtualAlloc allocation granularity */
69     static const int leaves = 4;
70     static const size_t masterSize = BackRefMaster::bytes+leaves*BackRefBlock::bytes;
71     // The size of memory request for a few more leaf blocks;
72     // selected to match VirtualAlloc granularity
73     static const size_t blockSpaceSize = 64*1024;
74 
75     Backend       *backend;
76     std::atomic<BackRefBlock*> active;         // if defined, use it for allocations
77     std::atomic<BackRefBlock*> listForUse;     // the chain of data blocks with free items
78     BackRefBlock  *allRawMemBlocks;
79     std::atomic <intptr_t> lastUsed; // index of the last used block
80     bool           rawMemUsed;
81     MallocMutex    requestNewSpaceMutex;
82     BackRefBlock  *backRefBl[1];   // the real size of the array is dataSz
83 
84     BackRefBlock *findFreeBlock();
85     void          addToForUseList(BackRefBlock *bl);
86     void          initEmptyBackRefBlock(BackRefBlock *newBl);
87     bool          requestNewSpace();
88 };
89 
90 const int BackRefMaster::dataSz
91     = 1+(BackRefMaster::bytes-sizeof(BackRefMaster))/sizeof(BackRefBlock*);
92 
93 static MallocMutex masterMutex;
94 static std::atomic<BackRefMaster*> backRefMaster;
95 
96 bool initBackRefMaster(Backend *backend)
97 {
98     bool rawMemUsed;
99     BackRefMaster *master =
100         (BackRefMaster*)backend->getBackRefSpace(BackRefMaster::masterSize,
101                                                  &rawMemUsed);
102     if (! master)
103         return false;
104     master->backend = backend;
105     master->listForUse.store(nullptr, std::memory_order_relaxed);
106     master->allRawMemBlocks = nullptr;
107     master->rawMemUsed = rawMemUsed;
108     master->lastUsed = -1;
109     memset(&master->requestNewSpaceMutex, 0, sizeof(MallocMutex));
110     for (int i=0; i<BackRefMaster::leaves; i++) {
111         BackRefBlock *bl = (BackRefBlock*)((uintptr_t)master + BackRefMaster::bytes + i*BackRefBlock::bytes);
112         bl->zeroSet();
113         master->initEmptyBackRefBlock(bl);
114         if (i)
115             master->addToForUseList(bl);
116         else // active leaf is not needed in listForUse
117             master->active.store(bl, std::memory_order_relaxed);
118     }
119     // backRefMaster is read in getBackRef, so publish it in consistent state
120     backRefMaster.store(master, std::memory_order_release);
121     return true;
122 }
123 
124 #if __TBB_SOURCE_DIRECTLY_INCLUDED
125 void destroyBackRefMaster(Backend *backend)
126 {
127     if (backRefMaster.load(std::memory_order_acquire)) { // Is initBackRefMaster() called?
128         for (BackRefBlock *curr = backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks; curr; ) {
129             BackRefBlock *next = curr->nextRawMemBlock;
130             // allRawMemBlocks list is only for raw mem blocks
131             backend->putBackRefSpace(curr, BackRefMaster::blockSpaceSize,
132                                      /*rawMemUsed=*/true);
133             curr = next;
134         }
135         backend->putBackRefSpace(backRefMaster.load(std::memory_order_relaxed), BackRefMaster::masterSize,
136                                  backRefMaster.load(std::memory_order_relaxed)->rawMemUsed);
137     }
138 }
139 #endif
140 
141 void BackRefMaster::addToForUseList(BackRefBlock *bl)
142 {
143     bl->nextForUse = listForUse.load(std::memory_order_relaxed);
144     listForUse.store(bl, std::memory_order_relaxed);
145     bl->addedToForUse.store(true, std::memory_order_relaxed);
146 }
147 
148 void BackRefMaster::initEmptyBackRefBlock(BackRefBlock *newBl)
149 {
150     intptr_t nextLU = lastUsed+1;
151     new (newBl) BackRefBlock(newBl, nextLU);
152     MALLOC_ASSERT(nextLU < dataSz, NULL);
153     backRefBl[nextLU] = newBl;
154     // lastUsed is read in getBackRef, and access to backRefBl[lastUsed]
155     // is possible only after checking backref against current lastUsed
156     lastUsed.store(nextLU, std::memory_order_release);
157 }
158 
159 bool BackRefMaster::requestNewSpace()
160 {
161     bool isRawMemUsed;
162     static_assert(!(blockSpaceSize % BackRefBlock::bytes),
163                          "Must request space for whole number of blocks.");
164 
165     if (backRefMaster.load(std::memory_order_relaxed)->dataSz <= lastUsed + 1) // no space in master
166         return false;
167 
168     // only one thread at a time may add blocks
169     MallocMutex::scoped_lock newSpaceLock(requestNewSpaceMutex);
170 
171     if (listForUse.load(std::memory_order_relaxed)) // double check that only one block is available
172         return true;
173     BackRefBlock *newBl = (BackRefBlock*)backend->getBackRefSpace(blockSpaceSize, &isRawMemUsed);
174     if (!newBl) return false;
175 
176     // touch a page for the 1st time without taking masterMutex ...
177     for (BackRefBlock *bl = newBl; (uintptr_t)bl < (uintptr_t)newBl + blockSpaceSize;
178          bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes)) {
179         bl->zeroSet();
180     }
181 
182     MallocMutex::scoped_lock lock(masterMutex); // ... and share under lock
183 
184     const size_t numOfUnusedIdxs = backRefMaster.load(std::memory_order_relaxed)->dataSz - lastUsed - 1;
185     if (numOfUnusedIdxs <= 0) { // no space in master under lock, roll back
186         backend->putBackRefSpace(newBl, blockSpaceSize, isRawMemUsed);
187         return false;
188     }
189     // It's possible that only part of newBl is used, due to lack of indices in master.
190     // This is OK as such underutilization is possible only once for backreferneces table.
191     int blocksToUse = min(numOfUnusedIdxs, blockSpaceSize / BackRefBlock::bytes);
192 
193     // use the first block in the batch to maintain the list of "raw" memory
194     // to be released at shutdown
195     if (isRawMemUsed) {
196         newBl->nextRawMemBlock = backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks;
197         backRefMaster.load(std::memory_order_relaxed)->allRawMemBlocks = newBl;
198     }
199     for (BackRefBlock *bl = newBl; blocksToUse>0; bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes), blocksToUse--) {
200         initEmptyBackRefBlock(bl);
201         if (active.load(std::memory_order_relaxed)->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
202             active.store(bl, std::memory_order_release); // active leaf is not needed in listForUse
203         } else {
204             addToForUseList(bl);
205         }
206     }
207     return true;
208 }
209 
210 BackRefBlock *BackRefMaster::findFreeBlock()
211 {
212     BackRefBlock* active_block = active.load(std::memory_order_acquire);
213     MALLOC_ASSERT(active_block, ASSERT_TEXT);
214 
215     if (active_block->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT)
216         return active_block;
217 
218     if (listForUse.load(std::memory_order_relaxed)) { // use released list
219         MallocMutex::scoped_lock lock(masterMutex);
220 
221         if (active_block->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
222             active_block = listForUse.load(std::memory_order_relaxed);
223             if (active_block) {
224                 active.store(active_block, std::memory_order_release);
225                 listForUse.store(active_block->nextForUse, std::memory_order_relaxed);
226                 MALLOC_ASSERT(active_block->addedToForUse.load(std::memory_order_relaxed), ASSERT_TEXT);
227                 active_block->addedToForUse.store(false, std::memory_order_relaxed);
228             }
229         }
230     } else // allocate new data node
231         if (!requestNewSpace())
232             return NULL;
233     return active.load(std::memory_order_acquire); // reread because of requestNewSpace
234 }
235 
236 void *getBackRef(BackRefIdx backRefIdx)
237 {
238     // !backRefMaster means no initialization done, so it can't be valid memory
239     // see addEmptyBackRefBlock for fences around lastUsed
240     if (!(backRefMaster.load(std::memory_order_acquire))
241         || backRefIdx.getMaster() > (backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_acquire))
242         || backRefIdx.getOffset() >= BR_MAX_CNT)
243         return NULL;
244     return *(void**)((uintptr_t)backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()]
245                      + sizeof(BackRefBlock)+backRefIdx.getOffset()*sizeof(void*));
246 }
247 
248 void setBackRef(BackRefIdx backRefIdx, void *newPtr)
249 {
250     MALLOC_ASSERT(backRefIdx.getMaster()<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
251                                  && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
252     *(void**)((uintptr_t)backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()]
253               + sizeof(BackRefBlock) + backRefIdx.getOffset()*sizeof(void*)) = newPtr;
254 }
255 
256 BackRefIdx BackRefIdx::newBackRef(bool largeObj)
257 {
258     BackRefBlock *blockToUse;
259     void **toUse;
260     BackRefIdx res;
261     bool lastBlockFirstUsed = false;
262 
263     do {
264         MALLOC_ASSERT(backRefMaster.load(std::memory_order_relaxed), ASSERT_TEXT);
265         blockToUse = backRefMaster.load(std::memory_order_relaxed)->findFreeBlock();
266         if (!blockToUse)
267             return BackRefIdx();
268         toUse = NULL;
269         { // the block is locked to find a reference
270             MallocMutex::scoped_lock lock(blockToUse->blockMutex);
271 
272             if (blockToUse->freeList) {
273                 toUse = (void**)blockToUse->freeList;
274                 blockToUse->freeList = blockToUse->freeList->next;
275                 MALLOC_ASSERT(!blockToUse->freeList ||
276                               ((uintptr_t)blockToUse->freeList>=(uintptr_t)blockToUse
277                                && (uintptr_t)blockToUse->freeList <
278                                (uintptr_t)blockToUse + slabSize), ASSERT_TEXT);
279             } else if (blockToUse->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT) {
280                 toUse = (void**)blockToUse->bumpPtr;
281                 blockToUse->bumpPtr =
282                     (FreeObject*)((uintptr_t)blockToUse->bumpPtr - sizeof(void*));
283                 if (blockToUse->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT-1) {
284                     MALLOC_ASSERT((uintptr_t)blockToUse->bumpPtr
285                                   < (uintptr_t)blockToUse+sizeof(BackRefBlock),
286                                   ASSERT_TEXT);
287                     blockToUse->bumpPtr = NULL;
288                 }
289             }
290             if (toUse) {
291                 if (!blockToUse->allocatedCount.load(std::memory_order_relaxed) &&
292                     !backRefMaster.load(std::memory_order_relaxed)->listForUse.load(std::memory_order_relaxed)) {
293                     lastBlockFirstUsed = true;
294                 }
295                 blockToUse->allocatedCount.store(blockToUse->allocatedCount.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
296             }
297         } // end of lock scope
298     } while (!toUse);
299     // The first thread that uses the last block requests new space in advance;
300     // possible failures are ignored.
301     if (lastBlockFirstUsed)
302         backRefMaster.load(std::memory_order_relaxed)->requestNewSpace();
303 
304     res.master = blockToUse->myNum;
305     uintptr_t offset =
306         ((uintptr_t)toUse - ((uintptr_t)blockToUse + sizeof(BackRefBlock)))/sizeof(void*);
307     // Is offset too big?
308     MALLOC_ASSERT(!(offset >> 15), ASSERT_TEXT);
309     res.offset = offset;
310     if (largeObj) res.largeObj = largeObj;
311 
312     return res;
313 }
314 
315 void removeBackRef(BackRefIdx backRefIdx)
316 {
317     MALLOC_ASSERT(!backRefIdx.isInvalid(), ASSERT_TEXT);
318     MALLOC_ASSERT(backRefIdx.getMaster()<=backRefMaster.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
319                   && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
320     BackRefBlock *currBlock = backRefMaster.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMaster()];
321     FreeObject *freeObj = (FreeObject*)((uintptr_t)currBlock + sizeof(BackRefBlock)
322                                         + backRefIdx.getOffset()*sizeof(void*));
323     MALLOC_ASSERT(((uintptr_t)freeObj>(uintptr_t)currBlock &&
324                    (uintptr_t)freeObj<(uintptr_t)currBlock + slabSize), ASSERT_TEXT);
325     {
326         MallocMutex::scoped_lock lock(currBlock->blockMutex);
327 
328         freeObj->next = currBlock->freeList;
329         MALLOC_ASSERT(!freeObj->next ||
330                       ((uintptr_t)freeObj->next > (uintptr_t)currBlock
331                        && (uintptr_t)freeObj->next <
332                        (uintptr_t)currBlock + slabSize), ASSERT_TEXT);
333         currBlock->freeList = freeObj;
334         currBlock->allocatedCount.store(currBlock->allocatedCount.load(std::memory_order_relaxed)-1, std::memory_order_relaxed);
335     }
336     // TODO: do we need double-check here?
337     if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
338         currBlock!=backRefMaster.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed)) {
339         MallocMutex::scoped_lock lock(masterMutex);
340 
341         if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
342             currBlock!=backRefMaster.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed))
343             backRefMaster.load(std::memory_order_relaxed)->addToForUseList(currBlock);
344     }
345 }
346 
347 /********* End of backreferences ***********************/
348 
349 } // namespace internal
350 } // namespace rml
351 
352