1 //===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a module and rewrite: 11 // 1. `malloc` -> `polly_mallocManaged` 12 // 2. `free` -> `polly_freeManaged` 13 // 3. global arrays with initializers -> global arrays that are initialized 14 // with a constructor call to 15 // `polly_mallocManaged`. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "polly/CodeGen/CodeGeneration.h" 20 #include "polly/CodeGen/IslAst.h" 21 #include "polly/CodeGen/IslNodeBuilder.h" 22 #include "polly/CodeGen/PPCGCodeGeneration.h" 23 #include "polly/CodeGen/Utils.h" 24 #include "polly/DependenceInfo.h" 25 #include "polly/LinkAllPasses.h" 26 #include "polly/Options.h" 27 #include "polly/ScopDetection.h" 28 #include "polly/ScopInfo.h" 29 #include "polly/Support/SCEVValidator.h" 30 #include "llvm/Analysis/AliasAnalysis.h" 31 #include "llvm/Analysis/BasicAliasAnalysis.h" 32 #include "llvm/Analysis/CaptureTracking.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/TargetTransformInfo.h" 37 #include "llvm/IR/LegacyPassManager.h" 38 #include "llvm/IR/Verifier.h" 39 #include "llvm/IRReader/IRReader.h" 40 #include "llvm/Linker/Linker.h" 41 #include "llvm/Support/TargetRegistry.h" 42 #include "llvm/Support/TargetSelect.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 45 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 46 #include "llvm/Transforms/Utils/ModuleUtils.h" 47 48 static cl::opt<bool> RewriteAllocas( 49 "polly-acc-rewrite-allocas", 50 cl::desc( 51 "Ask the managed memory rewriter to also rewrite alloca instructions"), 52 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 53 54 static cl::opt<bool> IgnoreLinkageForGlobals( 55 "polly-acc-rewrite-ignore-linkage-for-globals", 56 cl::desc( 57 "By default, we only rewrite globals with internal linkage. This flag " 58 "enables rewriting of globals regardless of linkage"), 59 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 60 61 #define DEBUG_TYPE "polly-acc-rewrite-managed-memory" 62 namespace { 63 64 static llvm::Function *getOrCreatePollyMallocManaged(Module &M) { 65 const char *Name = "polly_mallocManaged"; 66 Function *F = M.getFunction(Name); 67 68 // If F is not available, declare it. 69 if (!F) { 70 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 71 PollyIRBuilder Builder(M.getContext()); 72 // TODO: How do I get `size_t`? I assume from DataLayout? 73 FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), 74 {Builder.getInt64Ty()}, false); 75 F = Function::Create(Ty, Linkage, Name, &M); 76 } 77 78 return F; 79 } 80 81 static llvm::Function *getOrCreatePollyFreeManaged(Module &M) { 82 const char *Name = "polly_freeManaged"; 83 Function *F = M.getFunction(Name); 84 85 // If F is not available, declare it. 86 if (!F) { 87 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 88 PollyIRBuilder Builder(M.getContext()); 89 // TODO: How do I get `size_t`? I assume from DataLayout? 90 FunctionType *Ty = 91 FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false); 92 F = Function::Create(Ty, Linkage, Name, &M); 93 } 94 95 return F; 96 } 97 98 // Expand a constant expression `Cur`, which is used at instruction `Parent` 99 // at index `index`. 100 // Since a constant expression can expand to multiple instructions, store all 101 // the expands into a set called `Expands`. 102 // Note that this goes inorder on the constant expression tree. 103 // A * ((B * D) + C) 104 // will be processed with first A, then B * D, then B, then D, and then C. 105 // Though ConstantExprs are not treated as "trees" but as DAGs, since you can 106 // have something like this: 107 // * 108 // / \ 109 // \ / 110 // (D) 111 // 112 // For the purposes of this expansion, we expand the two occurences of D 113 // separately. Therefore, we expand the DAG into the tree: 114 // * 115 // / \ 116 // D D 117 // TODO: We don't _have_to do this, but this is the simplest solution. 118 // We can write a solution that keeps track of which constants have been 119 // already expanded. 120 static void expandConstantExpr(ConstantExpr *Cur, PollyIRBuilder &Builder, 121 Instruction *Parent, int index, 122 SmallPtrSet<Instruction *, 4> &Expands) { 123 assert(Cur && "invalid constant expression passed"); 124 Instruction *I = Cur->getAsInstruction(); 125 assert(I && "unable to convert ConstantExpr to Instruction"); 126 127 DEBUG(dbgs() << "Expanding ConstantExpression: " << *Cur 128 << " | in Instruction: " << *I << "\n";); 129 130 // Invalidate `Cur` so that no one after this point uses `Cur`. Rather, 131 // they should mutate `I`. 132 Cur = nullptr; 133 134 Expands.insert(I); 135 Parent->setOperand(index, I); 136 137 // The things that `Parent` uses (its operands) should be created 138 // before `Parent`. 139 Builder.SetInsertPoint(Parent); 140 Builder.Insert(I); 141 142 for (unsigned i = 0; i < I->getNumOperands(); i++) { 143 Value *Op = I->getOperand(i); 144 assert(isa<Constant>(Op) && "constant must have a constant operand"); 145 146 if (ConstantExpr *CExprOp = dyn_cast<ConstantExpr>(Op)) 147 expandConstantExpr(CExprOp, Builder, I, i, Expands); 148 } 149 } 150 151 // Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite 152 // `ConstantExpr`s that are used in the `Inst`. 153 // Note that `replaceAllUsesWith` is insufficient for this purpose because it 154 // does not rewrite values in `ConstantExpr`s. 155 static void rewriteOldValToNew(Instruction *Inst, Value *OldVal, Value *NewVal, 156 PollyIRBuilder &Builder) { 157 158 // This contains a set of instructions in which OldVal must be replaced. 159 // We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s 160 // from `Inst`s arguments. 161 // We need to go through this process because `replaceAllUsesWith` does not 162 // actually edit `ConstantExpr`s. 163 SmallPtrSet<Instruction *, 4> InstsToVisit = {Inst}; 164 165 // Expand all `ConstantExpr`s and place it in `InstsToVisit`. 166 for (unsigned i = 0; i < Inst->getNumOperands(); i++) { 167 Value *Operand = Inst->getOperand(i); 168 if (ConstantExpr *ValueConstExpr = dyn_cast<ConstantExpr>(Operand)) 169 expandConstantExpr(ValueConstExpr, Builder, Inst, i, InstsToVisit); 170 } 171 172 // Now visit each instruction and use `replaceUsesOfWith`. We know that 173 // will work because `I` cannot have any `ConstantExpr` within it. 174 for (Instruction *I : InstsToVisit) 175 I->replaceUsesOfWith(OldVal, NewVal); 176 } 177 178 // Given a value `Current`, return all Instructions that may contain `Current` 179 // in an expression. 180 // We need this auxiliary function, because if we have a 181 // `Constant` that is a user of `V`, we need to recurse into the 182 // `Constant`s uses to gather the root instruciton. 183 static void getInstructionUsersOfValue(Value *V, 184 SmallVector<Instruction *, 4> &Owners) { 185 if (auto *I = dyn_cast<Instruction>(V)) { 186 Owners.push_back(I); 187 } else { 188 // Anything that is a `User` must be a constant or an instruction. 189 auto *C = cast<Constant>(V); 190 for (Use &CUse : C->uses()) 191 getInstructionUsersOfValue(CUse.getUser(), Owners); 192 } 193 } 194 195 static void 196 replaceGlobalArray(Module &M, const DataLayout &DL, GlobalVariable &Array, 197 SmallPtrSet<GlobalVariable *, 4> &ReplacedGlobals) { 198 // We only want arrays. 199 ArrayType *ArrayTy = dyn_cast<ArrayType>(Array.getType()->getElementType()); 200 if (!ArrayTy) 201 return; 202 Type *ElemTy = ArrayTy->getElementType(); 203 PointerType *ElemPtrTy = ElemTy->getPointerTo(); 204 205 // We only wish to replace arrays that are visible in the module they 206 // inhabit. Otherwise, our type edit from [T] to T* would be illegal across 207 // modules. 208 const bool OnlyVisibleInsideModule = Array.hasPrivateLinkage() || 209 Array.hasInternalLinkage() || 210 IgnoreLinkageForGlobals; 211 if (!OnlyVisibleInsideModule) 212 return; 213 214 if (!Array.hasInitializer() || 215 !isa<ConstantAggregateZero>(Array.getInitializer())) 216 return; 217 218 // At this point, we have committed to replacing this array. 219 ReplacedGlobals.insert(&Array); 220 221 std::string NewName = Array.getName(); 222 NewName += ".toptr"; 223 GlobalVariable *ReplacementToArr = 224 cast<GlobalVariable>(M.getOrInsertGlobal(NewName, ElemPtrTy)); 225 ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy)); 226 227 Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M); 228 std::string FnName = Array.getName(); 229 FnName += ".constructor"; 230 PollyIRBuilder Builder(M.getContext()); 231 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 232 const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 233 Function *F = Function::Create(Ty, Linkage, FnName, &M); 234 BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F); 235 Builder.SetInsertPoint(Start); 236 237 int ArraySizeInt = DL.getTypeAllocSizeInBits(ArrayTy) / 8; 238 Value *ArraySize = Builder.getInt64(ArraySizeInt); 239 ArraySize->setName("array.size"); 240 241 Value *AllocatedMemRaw = 242 Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw"); 243 Value *AllocatedMemTyped = 244 Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed"); 245 Builder.CreateStore(AllocatedMemTyped, ReplacementToArr); 246 Builder.CreateRetVoid(); 247 248 const int Priority = 0; 249 appendToGlobalCtors(M, F, Priority, ReplacementToArr); 250 251 SmallVector<Instruction *, 4> ArrayUserInstructions; 252 // Get all instructions that use array. We need to do this weird thing 253 // because `Constant`s that contain this array neeed to be expanded into 254 // instructions so that we can replace their parameters. `Constant`s cannot 255 // be edited easily, so we choose to convert all `Constant`s to 256 // `Instruction`s and handle all of the uses of `Array` uniformly. 257 for (Use &ArrayUse : Array.uses()) 258 getInstructionUsersOfValue(ArrayUse.getUser(), ArrayUserInstructions); 259 260 for (Instruction *UserOfArrayInst : ArrayUserInstructions) { 261 262 Builder.SetInsertPoint(UserOfArrayInst); 263 // <ty>** -> <ty>* 264 Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load"); 265 // <ty>* -> [ty]* 266 Value *ArrPtrLoadedBitcasted = Builder.CreateBitCast( 267 ArrPtrLoaded, ArrayTy->getPointerTo(), "arrptr.bitcast"); 268 rewriteOldValToNew(UserOfArrayInst, &Array, ArrPtrLoadedBitcasted, Builder); 269 } 270 } 271 272 // We return all `allocas` that may need to be converted to a call to 273 // cudaMallocManaged. 274 static void getAllocasToBeManaged(Function &F, 275 SmallSet<AllocaInst *, 4> &Allocas) { 276 for (BasicBlock &BB : F) { 277 for (Instruction &I : BB) { 278 auto *Alloca = dyn_cast<AllocaInst>(&I); 279 if (!Alloca) 280 continue; 281 DEBUG(dbgs() << "Checking if " << *Alloca << "may be captured: "); 282 283 if (PointerMayBeCaptured(Alloca, /* ReturnCaptures */ false, 284 /* StoreCaptures */ true)) { 285 Allocas.insert(Alloca); 286 DEBUG(dbgs() << "YES (captured)\n"); 287 } else { 288 DEBUG(dbgs() << "NO (not captured)\n"); 289 } 290 } 291 } 292 } 293 294 static void rewriteAllocaAsManagedMemory(AllocaInst *Alloca, 295 const DataLayout &DL) { 296 DEBUG(dbgs() << "rewriting: " << *Alloca << " to managed mem.\n"); 297 Module *M = Alloca->getModule(); 298 assert(M && "Alloca does not have a module"); 299 300 PollyIRBuilder Builder(M->getContext()); 301 Builder.SetInsertPoint(Alloca); 302 303 Value *MallocManagedFn = getOrCreatePollyMallocManaged(*Alloca->getModule()); 304 const int Size = DL.getTypeAllocSize(Alloca->getType()->getElementType()); 305 Value *SizeVal = Builder.getInt64(Size); 306 Value *RawManagedMem = Builder.CreateCall(MallocManagedFn, {SizeVal}); 307 Value *Bitcasted = Builder.CreateBitCast(RawManagedMem, Alloca->getType()); 308 309 Function *F = Alloca->getFunction(); 310 assert(F && "Alloca has invalid function"); 311 312 Bitcasted->takeName(Alloca); 313 Alloca->replaceAllUsesWith(Bitcasted); 314 Alloca->eraseFromParent(); 315 316 for (BasicBlock &BB : *F) { 317 ReturnInst *Return = dyn_cast<ReturnInst>(BB.getTerminator()); 318 if (!Return) 319 continue; 320 Builder.SetInsertPoint(Return); 321 322 Value *FreeManagedFn = getOrCreatePollyFreeManaged(*M); 323 Builder.CreateCall(FreeManagedFn, {RawManagedMem}); 324 } 325 } 326 327 // Replace all uses of `Old` with `New`, even inside `ConstantExpr`. 328 // 329 // `replaceAllUsesWith` does replace values in `ConstantExpr`. This function 330 // actually does replace it in `ConstantExpr`. The caveat is that if there is 331 // a use that is *outside* a function (say, at global declarations), we fail. 332 // So, this is meant to be used on values which we know will only be used 333 // within functions. 334 // 335 // This process works by looking through the uses of `Old`. If it finds a 336 // `ConstantExpr`, it recursively looks for the owning instruction. 337 // Then, it expands all the `ConstantExpr` to instructions and replaces 338 // `Old` with `New` in the expanded instructions. 339 static void replaceAllUsesAndConstantUses(Value *Old, Value *New, 340 PollyIRBuilder &Builder) { 341 SmallVector<Instruction *, 4> UserInstructions; 342 // Get all instructions that use array. We need to do this weird thing 343 // because `Constant`s that contain this array neeed to be expanded into 344 // instructions so that we can replace their parameters. `Constant`s cannot 345 // be edited easily, so we choose to convert all `Constant`s to 346 // `Instruction`s and handle all of the uses of `Array` uniformly. 347 for (Use &ArrayUse : Old->uses()) 348 getInstructionUsersOfValue(ArrayUse.getUser(), UserInstructions); 349 350 for (Instruction *I : UserInstructions) 351 rewriteOldValToNew(I, Old, New, Builder); 352 } 353 354 class ManagedMemoryRewritePass : public ModulePass { 355 public: 356 static char ID; 357 GPUArch Architecture; 358 GPURuntime Runtime; 359 360 ManagedMemoryRewritePass() : ModulePass(ID) {} 361 virtual bool runOnModule(Module &M) { 362 const DataLayout &DL = M.getDataLayout(); 363 364 Function *Malloc = M.getFunction("malloc"); 365 366 if (Malloc) { 367 PollyIRBuilder Builder(M.getContext()); 368 Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M); 369 assert(PollyMallocManaged && "unable to create polly_mallocManaged"); 370 371 replaceAllUsesAndConstantUses(Malloc, PollyMallocManaged, Builder); 372 Malloc->eraseFromParent(); 373 } 374 375 Function *Free = M.getFunction("free"); 376 377 if (Free) { 378 PollyIRBuilder Builder(M.getContext()); 379 Function *PollyFreeManaged = getOrCreatePollyFreeManaged(M); 380 assert(PollyFreeManaged && "unable to create polly_freeManaged"); 381 382 replaceAllUsesAndConstantUses(Free, PollyFreeManaged, Builder); 383 Free->eraseFromParent(); 384 } 385 386 SmallPtrSet<GlobalVariable *, 4> GlobalsToErase; 387 for (GlobalVariable &Global : M.globals()) 388 replaceGlobalArray(M, DL, Global, GlobalsToErase); 389 for (GlobalVariable *G : GlobalsToErase) 390 G->eraseFromParent(); 391 392 // Rewrite allocas to cudaMallocs if we are asked to do so. 393 if (RewriteAllocas) { 394 SmallSet<AllocaInst *, 4> AllocasToBeManaged; 395 for (Function &F : M.functions()) 396 getAllocasToBeManaged(F, AllocasToBeManaged); 397 398 for (AllocaInst *Alloca : AllocasToBeManaged) 399 rewriteAllocaAsManagedMemory(Alloca, DL); 400 } 401 402 return true; 403 } 404 }; 405 406 } // namespace 407 char ManagedMemoryRewritePass::ID = 42; 408 409 Pass *polly::createManagedMemoryRewritePassPass(GPUArch Arch, 410 GPURuntime Runtime) { 411 ManagedMemoryRewritePass *pass = new ManagedMemoryRewritePass(); 412 pass->Runtime = Runtime; 413 pass->Architecture = Arch; 414 return pass; 415 } 416 417 INITIALIZE_PASS_BEGIN( 418 ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory", 419 "Polly - Rewrite all allocations in heap & data section to managed memory", 420 false, false) 421 INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration); 422 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 423 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 424 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 425 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 426 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 427 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 428 INITIALIZE_PASS_END( 429 ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory", 430 "Polly - Rewrite all allocations in heap & data section to managed memory", 431 false, false) 432