1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "tbbmalloc_internal.h"
18 #include <new> /* for placement new */
19
20 namespace rml {
21 namespace internal {
22
23
24 /********* backreferences ***********************/
25 /* Each slab block and each large memory object header contains BackRefIdx
26 * that points out in some BackRefBlock which points back to this block or header.
27 */
28 struct BackRefBlock : public BlockI {
29 BackRefBlock *nextForUse; // the next in the chain of blocks with free items
30 FreeObject *bumpPtr; // bump pointer moves from the end to the beginning of the block
31 FreeObject *freeList;
32 // list of all blocks that were allocated from raw mem (i.e., not from backend)
33 BackRefBlock *nextRawMemBlock;
34 std::atomic<int> allocatedCount; // the number of objects allocated
35 BackRefIdx::main_t myNum; // the index in the main
36 MallocMutex blockMutex;
37 // true if this block has been added to the listForUse chain,
38 // modifications protected by mainMutex
39 std::atomic<bool> addedToForUse;
40
BackRefBlockrml::internal::BackRefBlock41 BackRefBlock(const BackRefBlock *blockToUse, intptr_t num) :
42 nextForUse(nullptr), bumpPtr((FreeObject*)((uintptr_t)blockToUse + slabSize - sizeof(void*))),
43 freeList(nullptr), nextRawMemBlock(nullptr), allocatedCount(0), myNum(num),
44 addedToForUse(false) {
45 memset(static_cast<void*>(&blockMutex), 0, sizeof(MallocMutex));
46
47 MALLOC_ASSERT(!(num >> CHAR_BIT*sizeof(BackRefIdx::main_t)),
48 "index in BackRefMain must fit to BackRefIdx::main");
49 }
50 // clean all but header
zeroSetrml::internal::BackRefBlock51 void zeroSet() { memset(static_cast<void*>(this+1), 0, BackRefBlock::bytes-sizeof(BackRefBlock)); }
52 static const int bytes = slabSize;
53 };
54
55 // max number of backreference pointers in slab block
56 static const int BR_MAX_CNT = (BackRefBlock::bytes-sizeof(BackRefBlock))/sizeof(void*);
57
58 struct BackRefMain {
59 /* On 64-bit systems a slab block can hold up to ~2K back pointers to slab blocks
60 * or large objects, so it can address at least 32MB. The main array of 256KB
61 * holds 32K pointers to such blocks, addressing ~1 TB.
62 * On 32-bit systems there is ~4K back pointers in a slab block, so ~64MB can be addressed.
63 * The main array of 8KB holds 2K pointers to leaves, so ~128 GB can addressed.
64 */
65 static const size_t bytes = sizeof(uintptr_t)>4? 256*1024 : 8*1024;
66 static const int dataSz;
67 /* space is reserved for main table and 4 leaves
68 taking into account VirtualAlloc allocation granularity */
69 static const int leaves = 4;
70 static const size_t mainSize = BackRefMain::bytes+leaves*BackRefBlock::bytes;
71 // The size of memory request for a few more leaf blocks;
72 // selected to match VirtualAlloc granularity
73 static const size_t blockSpaceSize = 64*1024;
74
75 Backend *backend;
76 std::atomic<BackRefBlock*> active; // if defined, use it for allocations
77 std::atomic<BackRefBlock*> listForUse; // the chain of data blocks with free items
78 BackRefBlock *allRawMemBlocks;
79 std::atomic <intptr_t> lastUsed; // index of the last used block
80 bool rawMemUsed;
81 MallocMutex requestNewSpaceMutex;
82 BackRefBlock *backRefBl[1]; // the real size of the array is dataSz
83
84 BackRefBlock *findFreeBlock();
85 void addToForUseList(BackRefBlock *bl);
86 void initEmptyBackRefBlock(BackRefBlock *newBl);
87 bool requestNewSpace();
88 };
89
90 const int BackRefMain::dataSz
91 = 1+(BackRefMain::bytes-sizeof(BackRefMain))/sizeof(BackRefBlock*);
92
93 static MallocMutex mainMutex;
94 static std::atomic<BackRefMain*> backRefMain;
95
initBackRefMain(Backend * backend)96 bool initBackRefMain(Backend *backend)
97 {
98 bool rawMemUsed;
99 BackRefMain *main =
100 (BackRefMain*)backend->getBackRefSpace(BackRefMain::mainSize,
101 &rawMemUsed);
102 if (! main)
103 return false;
104 main->backend = backend;
105 main->listForUse.store(nullptr, std::memory_order_relaxed);
106 main->allRawMemBlocks = nullptr;
107 main->rawMemUsed = rawMemUsed;
108 main->lastUsed = -1;
109 memset(static_cast<void*>(&main->requestNewSpaceMutex), 0, sizeof(MallocMutex));
110 for (int i=0; i<BackRefMain::leaves; i++) {
111 BackRefBlock *bl = (BackRefBlock*)((uintptr_t)main + BackRefMain::bytes + i*BackRefBlock::bytes);
112 bl->zeroSet();
113 main->initEmptyBackRefBlock(bl);
114 if (i)
115 main->addToForUseList(bl);
116 else // active leaf is not needed in listForUse
117 main->active.store(bl, std::memory_order_relaxed);
118 }
119 // backRefMain is read in getBackRef, so publish it in consistent state
120 backRefMain.store(main, std::memory_order_release);
121 return true;
122 }
123
124 #if __TBB_SOURCE_DIRECTLY_INCLUDED
destroyBackRefMain(Backend * backend)125 void destroyBackRefMain(Backend *backend)
126 {
127 if (backRefMain.load(std::memory_order_acquire)) { // Is initBackRefMain() called?
128 for (BackRefBlock *curr = backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks; curr; ) {
129 BackRefBlock *next = curr->nextRawMemBlock;
130 // allRawMemBlocks list is only for raw mem blocks
131 backend->putBackRefSpace(curr, BackRefMain::blockSpaceSize,
132 /*rawMemUsed=*/true);
133 curr = next;
134 }
135 backend->putBackRefSpace(backRefMain.load(std::memory_order_relaxed), BackRefMain::mainSize,
136 backRefMain.load(std::memory_order_relaxed)->rawMemUsed);
137 }
138 }
139 #endif
140
addToForUseList(BackRefBlock * bl)141 void BackRefMain::addToForUseList(BackRefBlock *bl)
142 {
143 bl->nextForUse = listForUse.load(std::memory_order_relaxed);
144 listForUse.store(bl, std::memory_order_relaxed);
145 bl->addedToForUse.store(true, std::memory_order_relaxed);
146 }
147
initEmptyBackRefBlock(BackRefBlock * newBl)148 void BackRefMain::initEmptyBackRefBlock(BackRefBlock *newBl)
149 {
150 intptr_t nextLU = lastUsed+1;
151 new (newBl) BackRefBlock(newBl, nextLU);
152 MALLOC_ASSERT(nextLU < dataSz, nullptr);
153 backRefBl[nextLU] = newBl;
154 // lastUsed is read in getBackRef, and access to backRefBl[lastUsed]
155 // is possible only after checking backref against current lastUsed
156 lastUsed.store(nextLU, std::memory_order_release);
157 }
158
requestNewSpace()159 bool BackRefMain::requestNewSpace()
160 {
161 bool isRawMemUsed;
162 static_assert(!(blockSpaceSize % BackRefBlock::bytes),
163 "Must request space for whole number of blocks.");
164
165 if (BackRefMain::dataSz <= lastUsed + 1) // no space in main
166 return false;
167
168 // only one thread at a time may add blocks
169 MallocMutex::scoped_lock newSpaceLock(requestNewSpaceMutex);
170
171 if (listForUse.load(std::memory_order_relaxed)) // double check that only one block is available
172 return true;
173 BackRefBlock *newBl = (BackRefBlock*)backend->getBackRefSpace(blockSpaceSize, &isRawMemUsed);
174 if (!newBl) return false;
175
176 // touch a page for the 1st time without taking mainMutex ...
177 for (BackRefBlock *bl = newBl; (uintptr_t)bl < (uintptr_t)newBl + blockSpaceSize;
178 bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes)) {
179 bl->zeroSet();
180 }
181
182 MallocMutex::scoped_lock lock(mainMutex); // ... and share under lock
183
184 const size_t numOfUnusedIdxs = BackRefMain::dataSz - lastUsed - 1;
185 if (numOfUnusedIdxs <= 0) { // no space in main under lock, roll back
186 backend->putBackRefSpace(newBl, blockSpaceSize, isRawMemUsed);
187 return false;
188 }
189 // It's possible that only part of newBl is used, due to lack of indices in main.
190 // This is OK as such underutilization is possible only once for backreferneces table.
191 int blocksToUse = min(numOfUnusedIdxs, blockSpaceSize / BackRefBlock::bytes);
192
193 // use the first block in the batch to maintain the list of "raw" memory
194 // to be released at shutdown
195 if (isRawMemUsed) {
196 newBl->nextRawMemBlock = backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks;
197 backRefMain.load(std::memory_order_relaxed)->allRawMemBlocks = newBl;
198 }
199 for (BackRefBlock *bl = newBl; blocksToUse>0; bl = (BackRefBlock*)((uintptr_t)bl + BackRefBlock::bytes), blocksToUse--) {
200 initEmptyBackRefBlock(bl);
201 if (active.load(std::memory_order_relaxed)->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
202 active.store(bl, std::memory_order_release); // active leaf is not needed in listForUse
203 } else {
204 addToForUseList(bl);
205 }
206 }
207 return true;
208 }
209
findFreeBlock()210 BackRefBlock *BackRefMain::findFreeBlock()
211 {
212 BackRefBlock* active_block = active.load(std::memory_order_acquire);
213 MALLOC_ASSERT(active_block, ASSERT_TEXT);
214
215 if (active_block->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT)
216 return active_block;
217
218 if (listForUse.load(std::memory_order_relaxed)) { // use released list
219 MallocMutex::scoped_lock lock(mainMutex);
220
221 if (active_block->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT) {
222 active_block = listForUse.load(std::memory_order_relaxed);
223 if (active_block) {
224 active.store(active_block, std::memory_order_release);
225 listForUse.store(active_block->nextForUse, std::memory_order_relaxed);
226 MALLOC_ASSERT(active_block->addedToForUse.load(std::memory_order_relaxed), ASSERT_TEXT);
227 active_block->addedToForUse.store(false, std::memory_order_relaxed);
228 }
229 }
230 } else // allocate new data node
231 if (!requestNewSpace())
232 return nullptr;
233 return active.load(std::memory_order_acquire); // reread because of requestNewSpace
234 }
235
getBackRef(BackRefIdx backRefIdx)236 void *getBackRef(BackRefIdx backRefIdx)
237 {
238 // !backRefMain means no initialization done, so it can't be valid memory
239 // see addEmptyBackRefBlock for fences around lastUsed
240 if (!(backRefMain.load(std::memory_order_acquire))
241 || backRefIdx.getMain() > (backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_acquire))
242 || backRefIdx.getOffset() >= BR_MAX_CNT)
243 {
244 return nullptr;
245 }
246 std::atomic<void*>& backRefEntry = *(std::atomic<void*>*)(
247 (uintptr_t)backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()]
248 + sizeof(BackRefBlock) + backRefIdx.getOffset() * sizeof(std::atomic<void*>)
249 );
250 return backRefEntry.load(std::memory_order_relaxed);
251 }
252
setBackRef(BackRefIdx backRefIdx,void * newPtr)253 void setBackRef(BackRefIdx backRefIdx, void *newPtr)
254 {
255 MALLOC_ASSERT(backRefIdx.getMain()<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
256 && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
257 ((std::atomic<void*>*)((uintptr_t)backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()]
258 + sizeof(BackRefBlock) + backRefIdx.getOffset() * sizeof(void*)))->store(newPtr, std::memory_order_relaxed);
259 }
260
newBackRef(bool largeObj)261 BackRefIdx BackRefIdx::newBackRef(bool largeObj)
262 {
263 BackRefBlock *blockToUse;
264 void **toUse;
265 BackRefIdx res;
266 bool lastBlockFirstUsed = false;
267
268 do {
269 MALLOC_ASSERT(backRefMain.load(std::memory_order_relaxed), ASSERT_TEXT);
270 blockToUse = backRefMain.load(std::memory_order_relaxed)->findFreeBlock();
271 if (!blockToUse)
272 return BackRefIdx();
273 toUse = nullptr;
274 { // the block is locked to find a reference
275 MallocMutex::scoped_lock lock(blockToUse->blockMutex);
276
277 if (blockToUse->freeList) {
278 toUse = (void**)blockToUse->freeList;
279 blockToUse->freeList = blockToUse->freeList->next;
280 MALLOC_ASSERT(!blockToUse->freeList ||
281 ((uintptr_t)blockToUse->freeList>=(uintptr_t)blockToUse
282 && (uintptr_t)blockToUse->freeList <
283 (uintptr_t)blockToUse + slabSize), ASSERT_TEXT);
284 } else if (blockToUse->allocatedCount.load(std::memory_order_relaxed) < BR_MAX_CNT) {
285 toUse = (void**)blockToUse->bumpPtr;
286 blockToUse->bumpPtr =
287 (FreeObject*)((uintptr_t)blockToUse->bumpPtr - sizeof(void*));
288 if (blockToUse->allocatedCount.load(std::memory_order_relaxed) == BR_MAX_CNT-1) {
289 MALLOC_ASSERT((uintptr_t)blockToUse->bumpPtr
290 < (uintptr_t)blockToUse+sizeof(BackRefBlock),
291 ASSERT_TEXT);
292 blockToUse->bumpPtr = nullptr;
293 }
294 }
295 if (toUse) {
296 if (!blockToUse->allocatedCount.load(std::memory_order_relaxed) &&
297 !backRefMain.load(std::memory_order_relaxed)->listForUse.load(std::memory_order_relaxed)) {
298 lastBlockFirstUsed = true;
299 }
300 blockToUse->allocatedCount.store(blockToUse->allocatedCount.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
301 }
302 } // end of lock scope
303 } while (!toUse);
304 // The first thread that uses the last block requests new space in advance;
305 // possible failures are ignored.
306 if (lastBlockFirstUsed)
307 backRefMain.load(std::memory_order_relaxed)->requestNewSpace();
308
309 res.main = blockToUse->myNum;
310 uintptr_t offset =
311 ((uintptr_t)toUse - ((uintptr_t)blockToUse + sizeof(BackRefBlock)))/sizeof(void*);
312 // Is offset too big?
313 MALLOC_ASSERT(!(offset >> 15), ASSERT_TEXT);
314 res.offset = offset;
315 if (largeObj) res.largeObj = largeObj;
316
317 return res;
318 }
319
removeBackRef(BackRefIdx backRefIdx)320 void removeBackRef(BackRefIdx backRefIdx)
321 {
322 MALLOC_ASSERT(!backRefIdx.isInvalid(), ASSERT_TEXT);
323 MALLOC_ASSERT(backRefIdx.getMain()<=backRefMain.load(std::memory_order_relaxed)->lastUsed.load(std::memory_order_relaxed)
324 && backRefIdx.getOffset()<BR_MAX_CNT, ASSERT_TEXT);
325 BackRefBlock *currBlock = backRefMain.load(std::memory_order_relaxed)->backRefBl[backRefIdx.getMain()];
326 std::atomic<void*>& backRefEntry = *(std::atomic<void*>*)((uintptr_t)currBlock + sizeof(BackRefBlock)
327 + backRefIdx.getOffset()*sizeof(std::atomic<void*>));
328 MALLOC_ASSERT(((uintptr_t)&backRefEntry >(uintptr_t)currBlock &&
329 (uintptr_t)&backRefEntry <(uintptr_t)currBlock + slabSize), ASSERT_TEXT);
330 {
331 MallocMutex::scoped_lock lock(currBlock->blockMutex);
332
333 backRefEntry.store(currBlock->freeList, std::memory_order_relaxed);
334 #if MALLOC_DEBUG
335 uintptr_t backRefEntryValue = (uintptr_t)backRefEntry.load(std::memory_order_relaxed);
336 MALLOC_ASSERT(!backRefEntryValue ||
337 (backRefEntryValue > (uintptr_t)currBlock
338 && backRefEntryValue < (uintptr_t)currBlock + slabSize), ASSERT_TEXT);
339 #endif
340 currBlock->freeList = (FreeObject*)&backRefEntry;
341 currBlock->allocatedCount.store(currBlock->allocatedCount.load(std::memory_order_relaxed)-1, std::memory_order_relaxed);
342 }
343 // TODO: do we need double-check here?
344 if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
345 currBlock!=backRefMain.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed)) {
346 MallocMutex::scoped_lock lock(mainMutex);
347
348 if (!currBlock->addedToForUse.load(std::memory_order_relaxed) &&
349 currBlock!=backRefMain.load(std::memory_order_relaxed)->active.load(std::memory_order_relaxed))
350 backRefMain.load(std::memory_order_relaxed)->addToForUseList(currBlock);
351 }
352 }
353
354 /********* End of backreferences ***********************/
355
356 } // namespace internal
357 } // namespace rml
358
359