1 //===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a module and rewrite: 11 // 1. `malloc` -> `polly_mallocManaged` 12 // 2. `free` -> `polly_freeManaged` 13 // 3. global arrays with initializers -> global arrays that are initialized 14 // with a constructor call to 15 // `polly_mallocManaged`. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "polly/CodeGen/CodeGeneration.h" 20 #include "polly/CodeGen/IslAst.h" 21 #include "polly/CodeGen/IslNodeBuilder.h" 22 #include "polly/CodeGen/PPCGCodeGeneration.h" 23 #include "polly/CodeGen/Utils.h" 24 #include "polly/DependenceInfo.h" 25 #include "polly/LinkAllPasses.h" 26 #include "polly/Options.h" 27 #include "polly/ScopDetection.h" 28 #include "polly/ScopInfo.h" 29 #include "polly/Support/SCEVValidator.h" 30 #include "llvm/Analysis/AliasAnalysis.h" 31 #include "llvm/Analysis/BasicAliasAnalysis.h" 32 #include "llvm/Analysis/CaptureTracking.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/TargetTransformInfo.h" 37 #include "llvm/IR/LegacyPassManager.h" 38 #include "llvm/IR/Verifier.h" 39 #include "llvm/IRReader/IRReader.h" 40 #include "llvm/Linker/Linker.h" 41 #include "llvm/Support/TargetRegistry.h" 42 #include "llvm/Support/TargetSelect.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 45 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 46 #include "llvm/Transforms/Utils/ModuleUtils.h" 47 48 static cl::opt<bool> RewriteAllocas( 49 "polly-acc-rewrite-allocas", 50 cl::desc( 51 "Ask the managed memory rewriter to also rewrite alloca instructions"), 52 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 53 54 static cl::opt<bool> IgnoreLinkageForGlobals( 55 "polly-acc-rewrite-ignore-linkage-for-globals", 56 cl::desc( 57 "By default, we only rewrite globals with internal linkage. This flag " 58 "enables rewriting of globals regardless of linkage"), 59 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 60 61 #define DEBUG_TYPE "polly-acc-rewrite-managed-memory" 62 namespace { 63 64 static llvm::Function *getOrCreatePollyMallocManaged(Module &M) { 65 const char *Name = "polly_mallocManaged"; 66 Function *F = M.getFunction(Name); 67 68 // If F is not available, declare it. 69 if (!F) { 70 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 71 PollyIRBuilder Builder(M.getContext()); 72 // TODO: How do I get `size_t`? I assume from DataLayout? 73 FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), 74 {Builder.getInt64Ty()}, false); 75 F = Function::Create(Ty, Linkage, Name, &M); 76 } 77 78 return F; 79 } 80 81 static llvm::Function *getOrCreatePollyFreeManaged(Module &M) { 82 const char *Name = "polly_freeManaged"; 83 Function *F = M.getFunction(Name); 84 85 // If F is not available, declare it. 86 if (!F) { 87 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 88 PollyIRBuilder Builder(M.getContext()); 89 // TODO: How do I get `size_t`? I assume from DataLayout? 90 FunctionType *Ty = 91 FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false); 92 F = Function::Create(Ty, Linkage, Name, &M); 93 } 94 95 return F; 96 } 97 98 // Expand a constant expression `Cur`, which is used at instruction `Parent` 99 // at index `index`. 100 // Since a constant expression can expand to multiple instructions, store all 101 // the expands into a set called `Expands`. 102 // Note that this goes inorder on the constant expression tree. 103 // A * ((B * D) + C) 104 // will be processed with first A, then B * D, then B, then D, and then C. 105 // Though ConstantExprs are not treated as "trees" but as DAGs, since you can 106 // have something like this: 107 // * 108 // / \ 109 // \ / 110 // (D) 111 // 112 // For the purposes of this expansion, we expand the two occurences of D 113 // separately. Therefore, we expand the DAG into the tree: 114 // * 115 // / \ 116 // D D 117 // TODO: We don't _have_to do this, but this is the simplest solution. 118 // We can write a solution that keeps track of which constants have been 119 // already expanded. 120 static void expandConstantExpr(ConstantExpr *Cur, PollyIRBuilder &Builder, 121 Instruction *Parent, int index, 122 SmallPtrSet<Instruction *, 4> &Expands) { 123 assert(Cur && "invalid constant expression passed"); 124 Instruction *I = Cur->getAsInstruction(); 125 assert(I && "unable to convert ConstantExpr to Instruction"); 126 127 DEBUG(dbgs() << "Expanding ConstantExpression: (" << *Cur 128 << ") in Instruction: (" << *I << ")\n";); 129 130 // Invalidate `Cur` so that no one after this point uses `Cur`. Rather, 131 // they should mutate `I`. 132 Cur = nullptr; 133 134 Expands.insert(I); 135 Parent->setOperand(index, I); 136 137 // The things that `Parent` uses (its operands) should be created 138 // before `Parent`. 139 Builder.SetInsertPoint(Parent); 140 Builder.Insert(I); 141 142 for (unsigned i = 0; i < I->getNumOperands(); i++) { 143 Value *Op = I->getOperand(i); 144 assert(isa<Constant>(Op) && "constant must have a constant operand"); 145 146 if (ConstantExpr *CExprOp = dyn_cast<ConstantExpr>(Op)) 147 expandConstantExpr(CExprOp, Builder, I, i, Expands); 148 } 149 } 150 151 // Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite 152 // `ConstantExpr`s that are used in the `Inst`. 153 // Note that `replaceAllUsesWith` is insufficient for this purpose because it 154 // does not rewrite values in `ConstantExpr`s. 155 static void rewriteOldValToNew(Instruction *Inst, Value *OldVal, Value *NewVal, 156 PollyIRBuilder &Builder) { 157 158 // This contains a set of instructions in which OldVal must be replaced. 159 // We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s 160 // from `Inst`s arguments. 161 // We need to go through this process because `replaceAllUsesWith` does not 162 // actually edit `ConstantExpr`s. 163 SmallPtrSet<Instruction *, 4> InstsToVisit = {Inst}; 164 165 // Expand all `ConstantExpr`s and place it in `InstsToVisit`. 166 for (unsigned i = 0; i < Inst->getNumOperands(); i++) { 167 Value *Operand = Inst->getOperand(i); 168 if (ConstantExpr *ValueConstExpr = dyn_cast<ConstantExpr>(Operand)) 169 expandConstantExpr(ValueConstExpr, Builder, Inst, i, InstsToVisit); 170 } 171 172 // Now visit each instruction and use `replaceUsesOfWith`. We know that 173 // will work because `I` cannot have any `ConstantExpr` within it. 174 for (Instruction *I : InstsToVisit) 175 I->replaceUsesOfWith(OldVal, NewVal); 176 } 177 178 // Given a value `Current`, return all Instructions that may contain `Current` 179 // in an expression. 180 // We need this auxiliary function, because if we have a 181 // `Constant` that is a user of `V`, we need to recurse into the 182 // `Constant`s uses to gather the root instruciton. 183 static void getInstructionUsersOfValue(Value *V, 184 SmallVector<Instruction *, 4> &Owners) { 185 if (auto *I = dyn_cast<Instruction>(V)) { 186 Owners.push_back(I); 187 } else { 188 // Anything that is a `User` must be a constant or an instruction. 189 auto *C = cast<Constant>(V); 190 for (Use &CUse : C->uses()) 191 getInstructionUsersOfValue(CUse.getUser(), Owners); 192 } 193 } 194 195 static void 196 replaceGlobalArray(Module &M, const DataLayout &DL, GlobalVariable &Array, 197 SmallPtrSet<GlobalVariable *, 4> &ReplacedGlobals) { 198 // We only want arrays. 199 ArrayType *ArrayTy = dyn_cast<ArrayType>(Array.getType()->getElementType()); 200 if (!ArrayTy) 201 return; 202 Type *ElemTy = ArrayTy->getElementType(); 203 PointerType *ElemPtrTy = ElemTy->getPointerTo(); 204 205 // We only wish to replace arrays that are visible in the module they 206 // inhabit. Otherwise, our type edit from [T] to T* would be illegal across 207 // modules. 208 const bool OnlyVisibleInsideModule = Array.hasPrivateLinkage() || 209 Array.hasInternalLinkage() || 210 IgnoreLinkageForGlobals; 211 if (!OnlyVisibleInsideModule) { 212 DEBUG(dbgs() << "Not rewriting (" << Array 213 << ") to managed memory " 214 "because it could be visible externally. To force rewrite, " 215 "use -polly-acc-rewrite-ignore-linkage-for-globals.\n"); 216 return; 217 } 218 219 if (!Array.hasInitializer() || 220 !isa<ConstantAggregateZero>(Array.getInitializer())) { 221 DEBUG(dbgs() << "Not rewriting (" << Array 222 << ") to managed memory " 223 "because it has an initializer which is " 224 "not a zeroinitializer.\n"); 225 return; 226 } 227 228 // At this point, we have committed to replacing this array. 229 ReplacedGlobals.insert(&Array); 230 231 std::string NewName = Array.getName(); 232 NewName += ".toptr"; 233 GlobalVariable *ReplacementToArr = 234 cast<GlobalVariable>(M.getOrInsertGlobal(NewName, ElemPtrTy)); 235 ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy)); 236 237 Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M); 238 std::string FnName = Array.getName(); 239 FnName += ".constructor"; 240 PollyIRBuilder Builder(M.getContext()); 241 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 242 const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 243 Function *F = Function::Create(Ty, Linkage, FnName, &M); 244 BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F); 245 Builder.SetInsertPoint(Start); 246 247 const uint64_t ArraySizeInt = DL.getTypeAllocSize(ArrayTy); 248 Value *ArraySize = Builder.getInt64(ArraySizeInt); 249 ArraySize->setName("array.size"); 250 251 Value *AllocatedMemRaw = 252 Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw"); 253 Value *AllocatedMemTyped = 254 Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed"); 255 Builder.CreateStore(AllocatedMemTyped, ReplacementToArr); 256 Builder.CreateRetVoid(); 257 258 const int Priority = 0; 259 appendToGlobalCtors(M, F, Priority, ReplacementToArr); 260 261 SmallVector<Instruction *, 4> ArrayUserInstructions; 262 // Get all instructions that use array. We need to do this weird thing 263 // because `Constant`s that contain this array neeed to be expanded into 264 // instructions so that we can replace their parameters. `Constant`s cannot 265 // be edited easily, so we choose to convert all `Constant`s to 266 // `Instruction`s and handle all of the uses of `Array` uniformly. 267 for (Use &ArrayUse : Array.uses()) 268 getInstructionUsersOfValue(ArrayUse.getUser(), ArrayUserInstructions); 269 270 for (Instruction *UserOfArrayInst : ArrayUserInstructions) { 271 272 Builder.SetInsertPoint(UserOfArrayInst); 273 // <ty>** -> <ty>* 274 Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load"); 275 // <ty>* -> [ty]* 276 Value *ArrPtrLoadedBitcasted = Builder.CreateBitCast( 277 ArrPtrLoaded, ArrayTy->getPointerTo(), "arrptr.bitcast"); 278 rewriteOldValToNew(UserOfArrayInst, &Array, ArrPtrLoadedBitcasted, Builder); 279 } 280 } 281 282 // We return all `allocas` that may need to be converted to a call to 283 // cudaMallocManaged. 284 static void getAllocasToBeManaged(Function &F, 285 SmallSet<AllocaInst *, 4> &Allocas) { 286 for (BasicBlock &BB : F) { 287 for (Instruction &I : BB) { 288 auto *Alloca = dyn_cast<AllocaInst>(&I); 289 if (!Alloca) 290 continue; 291 DEBUG(dbgs() << "Checking if (" << *Alloca << ") may be captured: "); 292 293 if (PointerMayBeCaptured(Alloca, /* ReturnCaptures */ false, 294 /* StoreCaptures */ true)) { 295 Allocas.insert(Alloca); 296 DEBUG(dbgs() << "YES (captured).\n"); 297 } else { 298 DEBUG(dbgs() << "NO (not captured).\n"); 299 } 300 } 301 } 302 } 303 304 static void rewriteAllocaAsManagedMemory(AllocaInst *Alloca, 305 const DataLayout &DL) { 306 DEBUG(dbgs() << "rewriting: (" << *Alloca << ") to managed mem.\n"); 307 Module *M = Alloca->getModule(); 308 assert(M && "Alloca does not have a module"); 309 310 PollyIRBuilder Builder(M->getContext()); 311 Builder.SetInsertPoint(Alloca); 312 313 Value *MallocManagedFn = getOrCreatePollyMallocManaged(*Alloca->getModule()); 314 const uint64_t Size = 315 DL.getTypeAllocSize(Alloca->getType()->getElementType()); 316 Value *SizeVal = Builder.getInt64(Size); 317 Value *RawManagedMem = Builder.CreateCall(MallocManagedFn, {SizeVal}); 318 Value *Bitcasted = Builder.CreateBitCast(RawManagedMem, Alloca->getType()); 319 320 Function *F = Alloca->getFunction(); 321 assert(F && "Alloca has invalid function"); 322 323 Bitcasted->takeName(Alloca); 324 Alloca->replaceAllUsesWith(Bitcasted); 325 Alloca->eraseFromParent(); 326 327 for (BasicBlock &BB : *F) { 328 ReturnInst *Return = dyn_cast<ReturnInst>(BB.getTerminator()); 329 if (!Return) 330 continue; 331 Builder.SetInsertPoint(Return); 332 333 Value *FreeManagedFn = getOrCreatePollyFreeManaged(*M); 334 Builder.CreateCall(FreeManagedFn, {RawManagedMem}); 335 } 336 } 337 338 // Replace all uses of `Old` with `New`, even inside `ConstantExpr`. 339 // 340 // `replaceAllUsesWith` does replace values in `ConstantExpr`. This function 341 // actually does replace it in `ConstantExpr`. The caveat is that if there is 342 // a use that is *outside* a function (say, at global declarations), we fail. 343 // So, this is meant to be used on values which we know will only be used 344 // within functions. 345 // 346 // This process works by looking through the uses of `Old`. If it finds a 347 // `ConstantExpr`, it recursively looks for the owning instruction. 348 // Then, it expands all the `ConstantExpr` to instructions and replaces 349 // `Old` with `New` in the expanded instructions. 350 static void replaceAllUsesAndConstantUses(Value *Old, Value *New, 351 PollyIRBuilder &Builder) { 352 SmallVector<Instruction *, 4> UserInstructions; 353 // Get all instructions that use array. We need to do this weird thing 354 // because `Constant`s that contain this array neeed to be expanded into 355 // instructions so that we can replace their parameters. `Constant`s cannot 356 // be edited easily, so we choose to convert all `Constant`s to 357 // `Instruction`s and handle all of the uses of `Array` uniformly. 358 for (Use &ArrayUse : Old->uses()) 359 getInstructionUsersOfValue(ArrayUse.getUser(), UserInstructions); 360 361 for (Instruction *I : UserInstructions) 362 rewriteOldValToNew(I, Old, New, Builder); 363 } 364 365 class ManagedMemoryRewritePass : public ModulePass { 366 public: 367 static char ID; 368 GPUArch Architecture; 369 GPURuntime Runtime; 370 371 ManagedMemoryRewritePass() : ModulePass(ID) {} 372 virtual bool runOnModule(Module &M) { 373 const DataLayout &DL = M.getDataLayout(); 374 375 Function *Malloc = M.getFunction("malloc"); 376 377 if (Malloc) { 378 PollyIRBuilder Builder(M.getContext()); 379 Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M); 380 assert(PollyMallocManaged && "unable to create polly_mallocManaged"); 381 382 replaceAllUsesAndConstantUses(Malloc, PollyMallocManaged, Builder); 383 Malloc->eraseFromParent(); 384 } 385 386 Function *Free = M.getFunction("free"); 387 388 if (Free) { 389 PollyIRBuilder Builder(M.getContext()); 390 Function *PollyFreeManaged = getOrCreatePollyFreeManaged(M); 391 assert(PollyFreeManaged && "unable to create polly_freeManaged"); 392 393 replaceAllUsesAndConstantUses(Free, PollyFreeManaged, Builder); 394 Free->eraseFromParent(); 395 } 396 397 SmallPtrSet<GlobalVariable *, 4> GlobalsToErase; 398 for (GlobalVariable &Global : M.globals()) 399 replaceGlobalArray(M, DL, Global, GlobalsToErase); 400 for (GlobalVariable *G : GlobalsToErase) 401 G->eraseFromParent(); 402 403 // Rewrite allocas to cudaMallocs if we are asked to do so. 404 if (RewriteAllocas) { 405 SmallSet<AllocaInst *, 4> AllocasToBeManaged; 406 for (Function &F : M.functions()) 407 getAllocasToBeManaged(F, AllocasToBeManaged); 408 409 for (AllocaInst *Alloca : AllocasToBeManaged) 410 rewriteAllocaAsManagedMemory(Alloca, DL); 411 } 412 413 return true; 414 } 415 }; 416 417 } // namespace 418 char ManagedMemoryRewritePass::ID = 42; 419 420 Pass *polly::createManagedMemoryRewritePassPass(GPUArch Arch, 421 GPURuntime Runtime) { 422 ManagedMemoryRewritePass *pass = new ManagedMemoryRewritePass(); 423 pass->Runtime = Runtime; 424 pass->Architecture = Arch; 425 return pass; 426 } 427 428 INITIALIZE_PASS_BEGIN( 429 ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory", 430 "Polly - Rewrite all allocations in heap & data section to managed memory", 431 false, false) 432 INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration); 433 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 434 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 435 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 436 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 437 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 438 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 439 INITIALIZE_PASS_END( 440 ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory", 441 "Polly - Rewrite all allocations in heap & data section to managed memory", 442 false, false) 443