1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass transforms simple global variables that never have their address 10 // taken. If obviously true, it marks read/write globals as constant, deletes 11 // variables only stored to, etc. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/GlobalOpt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/BlockFrequencyInfo.h" 24 #include "llvm/Analysis/ConstantFolding.h" 25 #include "llvm/Analysis/MemoryBuiltins.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/BinaryFormat/Dwarf.h" 29 #include "llvm/IR/Attributes.h" 30 #include "llvm/IR/BasicBlock.h" 31 #include "llvm/IR/CallSite.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/Constant.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DataLayout.h" 36 #include "llvm/IR/DebugInfoMetadata.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GetElementPtrTypeIterator.h" 41 #include "llvm/IR/GlobalAlias.h" 42 #include "llvm/IR/GlobalValue.h" 43 #include "llvm/IR/GlobalVariable.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/Operator.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Use.h" 52 #include "llvm/IR/User.h" 53 #include "llvm/IR/Value.h" 54 #include "llvm/IR/ValueHandle.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/AtomicOrdering.h" 58 #include "llvm/Support/Casting.h" 59 #include "llvm/Support/CommandLine.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/MathExtras.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/IPO.h" 65 #include "llvm/Transforms/Utils/CtorUtils.h" 66 #include "llvm/Transforms/Utils/Evaluator.h" 67 #include "llvm/Transforms/Utils/GlobalStatus.h" 68 #include "llvm/Transforms/Utils/Local.h" 69 #include <cassert> 70 #include <cstdint> 71 #include <utility> 72 #include <vector> 73 74 using namespace llvm; 75 76 #define DEBUG_TYPE "globalopt" 77 78 STATISTIC(NumMarked , "Number of globals marked constant"); 79 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); 80 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 81 STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 82 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 83 STATISTIC(NumDeleted , "Number of globals deleted"); 84 STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 85 STATISTIC(NumLocalized , "Number of globals localized"); 86 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 87 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 88 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 89 STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 90 STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 91 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 92 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); 93 STATISTIC(NumInternalFunc, "Number of internal functions"); 94 STATISTIC(NumColdCC, "Number of functions marked coldcc"); 95 96 static cl::opt<bool> 97 EnableColdCCStressTest("enable-coldcc-stress-test", 98 cl::desc("Enable stress test of coldcc by adding " 99 "calling conv to all internal functions."), 100 cl::init(false), cl::Hidden); 101 102 static cl::opt<int> ColdCCRelFreq( 103 "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, 104 cl::desc( 105 "Maximum block frequency, expressed as a percentage of caller's " 106 "entry frequency, for a call site to be considered cold for enabling" 107 "coldcc")); 108 109 /// Is this global variable possibly used by a leak checker as a root? If so, 110 /// we might not really want to eliminate the stores to it. 111 static bool isLeakCheckerRoot(GlobalVariable *GV) { 112 // A global variable is a root if it is a pointer, or could plausibly contain 113 // a pointer. There are two challenges; one is that we could have a struct 114 // the has an inner member which is a pointer. We recurse through the type to 115 // detect these (up to a point). The other is that we may actually be a union 116 // of a pointer and another type, and so our LLVM type is an integer which 117 // gets converted into a pointer, or our type is an [i8 x #] with a pointer 118 // potentially contained here. 119 120 if (GV->hasPrivateLinkage()) 121 return false; 122 123 SmallVector<Type *, 4> Types; 124 Types.push_back(GV->getValueType()); 125 126 unsigned Limit = 20; 127 do { 128 Type *Ty = Types.pop_back_val(); 129 switch (Ty->getTypeID()) { 130 default: break; 131 case Type::PointerTyID: return true; 132 case Type::ArrayTyID: 133 case Type::VectorTyID: { 134 SequentialType *STy = cast<SequentialType>(Ty); 135 Types.push_back(STy->getElementType()); 136 break; 137 } 138 case Type::StructTyID: { 139 StructType *STy = cast<StructType>(Ty); 140 if (STy->isOpaque()) return true; 141 for (StructType::element_iterator I = STy->element_begin(), 142 E = STy->element_end(); I != E; ++I) { 143 Type *InnerTy = *I; 144 if (isa<PointerType>(InnerTy)) return true; 145 if (isa<CompositeType>(InnerTy)) 146 Types.push_back(InnerTy); 147 } 148 break; 149 } 150 } 151 if (--Limit == 0) return true; 152 } while (!Types.empty()); 153 return false; 154 } 155 156 /// Given a value that is stored to a global but never read, determine whether 157 /// it's safe to remove the store and the chain of computation that feeds the 158 /// store. 159 static bool IsSafeComputationToRemove( 160 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 161 do { 162 if (isa<Constant>(V)) 163 return true; 164 if (!V->hasOneUse()) 165 return false; 166 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || 167 isa<GlobalValue>(V)) 168 return false; 169 if (isAllocationFn(V, GetTLI)) 170 return true; 171 172 Instruction *I = cast<Instruction>(V); 173 if (I->mayHaveSideEffects()) 174 return false; 175 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 176 if (!GEP->hasAllConstantIndices()) 177 return false; 178 } else if (I->getNumOperands() != 1) { 179 return false; 180 } 181 182 V = I->getOperand(0); 183 } while (true); 184 } 185 186 /// This GV is a pointer root. Loop over all users of the global and clean up 187 /// any that obviously don't assign the global a value that isn't dynamically 188 /// allocated. 189 static bool 190 CleanupPointerRootUsers(GlobalVariable *GV, 191 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 192 // A brief explanation of leak checkers. The goal is to find bugs where 193 // pointers are forgotten, causing an accumulating growth in memory 194 // usage over time. The common strategy for leak checkers is to whitelist the 195 // memory pointed to by globals at exit. This is popular because it also 196 // solves another problem where the main thread of a C++ program may shut down 197 // before other threads that are still expecting to use those globals. To 198 // handle that case, we expect the program may create a singleton and never 199 // destroy it. 200 201 bool Changed = false; 202 203 // If Dead[n].first is the only use of a malloc result, we can delete its 204 // chain of computation and the store to the global in Dead[n].second. 205 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; 206 207 // Constants can't be pointers to dynamically allocated memory. 208 for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); 209 UI != E;) { 210 User *U = *UI++; 211 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 212 Value *V = SI->getValueOperand(); 213 if (isa<Constant>(V)) { 214 Changed = true; 215 SI->eraseFromParent(); 216 } else if (Instruction *I = dyn_cast<Instruction>(V)) { 217 if (I->hasOneUse()) 218 Dead.push_back(std::make_pair(I, SI)); 219 } 220 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { 221 if (isa<Constant>(MSI->getValue())) { 222 Changed = true; 223 MSI->eraseFromParent(); 224 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { 225 if (I->hasOneUse()) 226 Dead.push_back(std::make_pair(I, MSI)); 227 } 228 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { 229 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); 230 if (MemSrc && MemSrc->isConstant()) { 231 Changed = true; 232 MTI->eraseFromParent(); 233 } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { 234 if (I->hasOneUse()) 235 Dead.push_back(std::make_pair(I, MTI)); 236 } 237 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 238 if (CE->use_empty()) { 239 CE->destroyConstant(); 240 Changed = true; 241 } 242 } else if (Constant *C = dyn_cast<Constant>(U)) { 243 if (isSafeToDestroyConstant(C)) { 244 C->destroyConstant(); 245 // This could have invalidated UI, start over from scratch. 246 Dead.clear(); 247 CleanupPointerRootUsers(GV, GetTLI); 248 return true; 249 } 250 } 251 } 252 253 for (int i = 0, e = Dead.size(); i != e; ++i) { 254 if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { 255 Dead[i].second->eraseFromParent(); 256 Instruction *I = Dead[i].first; 257 do { 258 if (isAllocationFn(I, GetTLI)) 259 break; 260 Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); 261 if (!J) 262 break; 263 I->eraseFromParent(); 264 I = J; 265 } while (true); 266 I->eraseFromParent(); 267 } 268 } 269 270 return Changed; 271 } 272 273 /// We just marked GV constant. Loop over all users of the global, cleaning up 274 /// the obvious ones. This is largely just a quick scan over the use list to 275 /// clean up the easy and obvious cruft. This returns true if it made a change. 276 static bool CleanupConstantGlobalUsers( 277 Value *V, Constant *Init, const DataLayout &DL, 278 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 279 bool Changed = false; 280 // Note that we need to use a weak value handle for the worklist items. When 281 // we delete a constant array, we may also be holding pointer to one of its 282 // elements (or an element of one of its elements if we're dealing with an 283 // array of arrays) in the worklist. 284 SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end()); 285 while (!WorkList.empty()) { 286 Value *UV = WorkList.pop_back_val(); 287 if (!UV) 288 continue; 289 290 User *U = cast<User>(UV); 291 292 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 293 if (Init) { 294 // Replace the load with the initializer. 295 LI->replaceAllUsesWith(Init); 296 LI->eraseFromParent(); 297 Changed = true; 298 } 299 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 300 // Store must be unreachable or storing Init into the global. 301 SI->eraseFromParent(); 302 Changed = true; 303 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 304 if (CE->getOpcode() == Instruction::GetElementPtr) { 305 Constant *SubInit = nullptr; 306 if (Init) 307 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 308 Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); 309 } else if ((CE->getOpcode() == Instruction::BitCast && 310 CE->getType()->isPointerTy()) || 311 CE->getOpcode() == Instruction::AddrSpaceCast) { 312 // Pointer cast, delete any stores and memsets to the global. 313 Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); 314 } 315 316 if (CE->use_empty()) { 317 CE->destroyConstant(); 318 Changed = true; 319 } 320 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 321 // Do not transform "gepinst (gep constexpr (GV))" here, because forming 322 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 323 // and will invalidate our notion of what Init is. 324 Constant *SubInit = nullptr; 325 if (!isa<ConstantExpr>(GEP->getOperand(0))) { 326 ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( 327 ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); 328 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 329 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 330 331 // If the initializer is an all-null value and we have an inbounds GEP, 332 // we already know what the result of any load from that GEP is. 333 // TODO: Handle splats. 334 if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) 335 SubInit = Constant::getNullValue(GEP->getResultElementType()); 336 } 337 Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); 338 339 if (GEP->use_empty()) { 340 GEP->eraseFromParent(); 341 Changed = true; 342 } 343 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 344 if (MI->getRawDest() == V) { 345 MI->eraseFromParent(); 346 Changed = true; 347 } 348 349 } else if (Constant *C = dyn_cast<Constant>(U)) { 350 // If we have a chain of dead constantexprs or other things dangling from 351 // us, and if they are all dead, nuke them without remorse. 352 if (isSafeToDestroyConstant(C)) { 353 C->destroyConstant(); 354 CleanupConstantGlobalUsers(V, Init, DL, GetTLI); 355 return true; 356 } 357 } 358 } 359 return Changed; 360 } 361 362 static bool isSafeSROAElementUse(Value *V); 363 364 /// Return true if the specified GEP is a safe user of a derived 365 /// expression from a global that we want to SROA. 366 static bool isSafeSROAGEP(User *U) { 367 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 368 // don't like < 3 operand CE's, and we don't like non-constant integer 369 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 370 // value of C. 371 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 372 !cast<Constant>(U->getOperand(1))->isNullValue()) 373 return false; 374 375 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 376 ++GEPI; // Skip over the pointer index. 377 378 // For all other level we require that the indices are constant and inrange. 379 // In particular, consider: A[0][i]. We cannot know that the user isn't doing 380 // invalid things like allowing i to index an out-of-range subscript that 381 // accesses A[1]. This can also happen between different members of a struct 382 // in llvm IR. 383 for (; GEPI != E; ++GEPI) { 384 if (GEPI.isStruct()) 385 continue; 386 387 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 388 if (!IdxVal || (GEPI.isBoundedSequential() && 389 IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) 390 return false; 391 } 392 393 return llvm::all_of(U->users(), 394 [](User *UU) { return isSafeSROAElementUse(UU); }); 395 } 396 397 /// Return true if the specified instruction is a safe user of a derived 398 /// expression from a global that we want to SROA. 399 static bool isSafeSROAElementUse(Value *V) { 400 // We might have a dead and dangling constant hanging off of here. 401 if (Constant *C = dyn_cast<Constant>(V)) 402 return isSafeToDestroyConstant(C); 403 404 Instruction *I = dyn_cast<Instruction>(V); 405 if (!I) return false; 406 407 // Loads are ok. 408 if (isa<LoadInst>(I)) return true; 409 410 // Stores *to* the pointer are ok. 411 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 412 return SI->getOperand(0) != V; 413 414 // Otherwise, it must be a GEP. Check it and its users are safe to SRA. 415 return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I); 416 } 417 418 /// Look at all uses of the global and decide whether it is safe for us to 419 /// perform this transformation. 420 static bool GlobalUsersSafeToSRA(GlobalValue *GV) { 421 for (User *U : GV->users()) { 422 // The user of the global must be a GEP Inst or a ConstantExpr GEP. 423 if (!isa<GetElementPtrInst>(U) && 424 (!isa<ConstantExpr>(U) || 425 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 426 return false; 427 428 // Check the gep and it's users are safe to SRA 429 if (!isSafeSROAGEP(U)) 430 return false; 431 } 432 433 return true; 434 } 435 436 /// Copy over the debug info for a variable to its SRA replacements. 437 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, 438 uint64_t FragmentOffsetInBits, 439 uint64_t FragmentSizeInBits, 440 unsigned NumElements) { 441 SmallVector<DIGlobalVariableExpression *, 1> GVs; 442 GV->getDebugInfo(GVs); 443 for (auto *GVE : GVs) { 444 DIVariable *Var = GVE->getVariable(); 445 DIExpression *Expr = GVE->getExpression(); 446 if (NumElements > 1) { 447 if (auto E = DIExpression::createFragmentExpression( 448 Expr, FragmentOffsetInBits, FragmentSizeInBits)) 449 Expr = *E; 450 else 451 return; 452 } 453 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); 454 NGV->addDebugInfo(NGVE); 455 } 456 } 457 458 /// Perform scalar replacement of aggregates on the specified global variable. 459 /// This opens the door for other optimizations by exposing the behavior of the 460 /// program in a more fine-grained way. We have determined that this 461 /// transformation is safe already. We return the first global variable we 462 /// insert so that the caller can reprocess it. 463 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { 464 // Make sure this global only has simple uses that we can SRA. 465 if (!GlobalUsersSafeToSRA(GV)) 466 return nullptr; 467 468 assert(GV->hasLocalLinkage()); 469 Constant *Init = GV->getInitializer(); 470 Type *Ty = Init->getType(); 471 472 std::vector<GlobalVariable *> NewGlobals; 473 Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 474 475 // Get the alignment of the global, either explicit or target-specific. 476 unsigned StartAlignment = GV->getAlignment(); 477 if (StartAlignment == 0) 478 StartAlignment = DL.getABITypeAlignment(GV->getType()); 479 480 if (StructType *STy = dyn_cast<StructType>(Ty)) { 481 unsigned NumElements = STy->getNumElements(); 482 NewGlobals.reserve(NumElements); 483 const StructLayout &Layout = *DL.getStructLayout(STy); 484 for (unsigned i = 0, e = NumElements; i != e; ++i) { 485 Constant *In = Init->getAggregateElement(i); 486 assert(In && "Couldn't get element of initializer?"); 487 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, 488 GlobalVariable::InternalLinkage, 489 In, GV->getName()+"."+Twine(i), 490 GV->getThreadLocalMode(), 491 GV->getType()->getAddressSpace()); 492 NGV->setExternallyInitialized(GV->isExternallyInitialized()); 493 NGV->copyAttributesFrom(GV); 494 Globals.push_back(NGV); 495 NewGlobals.push_back(NGV); 496 497 // Calculate the known alignment of the field. If the original aggregate 498 // had 256 byte alignment for example, something might depend on that: 499 // propagate info to each field. 500 uint64_t FieldOffset = Layout.getElementOffset(i); 501 Align NewAlign(MinAlign(StartAlignment, FieldOffset)); 502 if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i)))) 503 NGV->setAlignment(NewAlign); 504 505 // Copy over the debug info for the variable. 506 uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); 507 uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i); 508 transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements); 509 } 510 } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 511 unsigned NumElements = STy->getNumElements(); 512 if (NumElements > 16 && GV->hasNUsesOrMore(16)) 513 return nullptr; // It's not worth it. 514 NewGlobals.reserve(NumElements); 515 auto ElTy = STy->getElementType(); 516 uint64_t EltSize = DL.getTypeAllocSize(ElTy); 517 Align EltAlign(DL.getABITypeAlignment(ElTy)); 518 uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); 519 for (unsigned i = 0, e = NumElements; i != e; ++i) { 520 Constant *In = Init->getAggregateElement(i); 521 assert(In && "Couldn't get element of initializer?"); 522 523 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, 524 GlobalVariable::InternalLinkage, 525 In, GV->getName()+"."+Twine(i), 526 GV->getThreadLocalMode(), 527 GV->getType()->getAddressSpace()); 528 NGV->setExternallyInitialized(GV->isExternallyInitialized()); 529 NGV->copyAttributesFrom(GV); 530 Globals.push_back(NGV); 531 NewGlobals.push_back(NGV); 532 533 // Calculate the known alignment of the field. If the original aggregate 534 // had 256 byte alignment for example, something might depend on that: 535 // propagate info to each field. 536 Align NewAlign(MinAlign(StartAlignment, EltSize * i)); 537 if (NewAlign > EltAlign) 538 NGV->setAlignment(NewAlign); 539 transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, 540 NumElements); 541 } 542 } 543 544 if (NewGlobals.empty()) 545 return nullptr; 546 547 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); 548 549 Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); 550 551 // Loop over all of the uses of the global, replacing the constantexpr geps, 552 // with smaller constantexpr geps or direct references. 553 while (!GV->use_empty()) { 554 User *GEP = GV->user_back(); 555 assert(((isa<ConstantExpr>(GEP) && 556 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 557 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 558 559 // Ignore the 1th operand, which has to be zero or else the program is quite 560 // broken (undefined). Get the 2nd operand, which is the structure or array 561 // index. 562 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 563 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 564 565 Value *NewPtr = NewGlobals[Val]; 566 Type *NewTy = NewGlobals[Val]->getValueType(); 567 568 // Form a shorter GEP if needed. 569 if (GEP->getNumOperands() > 3) { 570 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 571 SmallVector<Constant*, 8> Idxs; 572 Idxs.push_back(NullInt); 573 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 574 Idxs.push_back(CE->getOperand(i)); 575 NewPtr = 576 ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); 577 } else { 578 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 579 SmallVector<Value*, 8> Idxs; 580 Idxs.push_back(NullInt); 581 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 582 Idxs.push_back(GEPI->getOperand(i)); 583 NewPtr = GetElementPtrInst::Create( 584 NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI); 585 } 586 } 587 GEP->replaceAllUsesWith(NewPtr); 588 589 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 590 GEPI->eraseFromParent(); 591 else 592 cast<ConstantExpr>(GEP)->destroyConstant(); 593 } 594 595 // Delete the old global, now that it is dead. 596 Globals.erase(GV); 597 ++NumSRA; 598 599 // Loop over the new globals array deleting any globals that are obviously 600 // dead. This can arise due to scalarization of a structure or an array that 601 // has elements that are dead. 602 unsigned FirstGlobal = 0; 603 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 604 if (NewGlobals[i]->use_empty()) { 605 Globals.erase(NewGlobals[i]); 606 if (FirstGlobal == i) ++FirstGlobal; 607 } 608 609 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; 610 } 611 612 /// Return true if all users of the specified value will trap if the value is 613 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid 614 /// reprocessing them. 615 static bool AllUsesOfValueWillTrapIfNull(const Value *V, 616 SmallPtrSetImpl<const PHINode*> &PHIs) { 617 for (const User *U : V->users()) { 618 if (const Instruction *I = dyn_cast<Instruction>(U)) { 619 // If null pointer is considered valid, then all uses are non-trapping. 620 // Non address-space 0 globals have already been pruned by the caller. 621 if (NullPointerIsDefined(I->getFunction())) 622 return false; 623 } 624 if (isa<LoadInst>(U)) { 625 // Will trap. 626 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 627 if (SI->getOperand(0) == V) { 628 //cerr << "NONTRAPPING USE: " << *U; 629 return false; // Storing the value. 630 } 631 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 632 if (CI->getCalledValue() != V) { 633 //cerr << "NONTRAPPING USE: " << *U; 634 return false; // Not calling the ptr 635 } 636 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 637 if (II->getCalledValue() != V) { 638 //cerr << "NONTRAPPING USE: " << *U; 639 return false; // Not calling the ptr 640 } 641 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { 642 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 643 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 644 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 645 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 646 // If we've already seen this phi node, ignore it, it has already been 647 // checked. 648 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 649 return false; 650 } else if (isa<ICmpInst>(U) && 651 isa<ConstantPointerNull>(U->getOperand(1))) { 652 // Ignore icmp X, null 653 } else { 654 //cerr << "NONTRAPPING USE: " << *U; 655 return false; 656 } 657 } 658 return true; 659 } 660 661 /// Return true if all uses of any loads from GV will trap if the loaded value 662 /// is null. Note that this also permits comparisons of the loaded value 663 /// against null, as a special case. 664 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 665 for (const User *U : GV->users()) 666 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 667 SmallPtrSet<const PHINode*, 8> PHIs; 668 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 669 return false; 670 } else if (isa<StoreInst>(U)) { 671 // Ignore stores to the global. 672 } else { 673 // We don't know or understand this user, bail out. 674 //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; 675 return false; 676 } 677 return true; 678 } 679 680 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 681 bool Changed = false; 682 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { 683 Instruction *I = cast<Instruction>(*UI++); 684 // Uses are non-trapping if null pointer is considered valid. 685 // Non address-space 0 globals are already pruned by the caller. 686 if (NullPointerIsDefined(I->getFunction())) 687 return false; 688 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 689 LI->setOperand(0, NewV); 690 Changed = true; 691 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 692 if (SI->getOperand(1) == V) { 693 SI->setOperand(1, NewV); 694 Changed = true; 695 } 696 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 697 CallSite CS(I); 698 if (CS.getCalledValue() == V) { 699 // Calling through the pointer! Turn into a direct call, but be careful 700 // that the pointer is not also being passed as an argument. 701 CS.setCalledFunction(NewV); 702 Changed = true; 703 bool PassedAsArg = false; 704 for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 705 if (CS.getArgument(i) == V) { 706 PassedAsArg = true; 707 CS.setArgument(i, NewV); 708 } 709 710 if (PassedAsArg) { 711 // Being passed as an argument also. Be careful to not invalidate UI! 712 UI = V->user_begin(); 713 } 714 } 715 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 716 Changed |= OptimizeAwayTrappingUsesOfValue(CI, 717 ConstantExpr::getCast(CI->getOpcode(), 718 NewV, CI->getType())); 719 if (CI->use_empty()) { 720 Changed = true; 721 CI->eraseFromParent(); 722 } 723 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 724 // Should handle GEP here. 725 SmallVector<Constant*, 8> Idxs; 726 Idxs.reserve(GEPI->getNumOperands()-1); 727 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 728 i != e; ++i) 729 if (Constant *C = dyn_cast<Constant>(*i)) 730 Idxs.push_back(C); 731 else 732 break; 733 if (Idxs.size() == GEPI->getNumOperands()-1) 734 Changed |= OptimizeAwayTrappingUsesOfValue( 735 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(), 736 NewV, Idxs)); 737 if (GEPI->use_empty()) { 738 Changed = true; 739 GEPI->eraseFromParent(); 740 } 741 } 742 } 743 744 return Changed; 745 } 746 747 /// The specified global has only one non-null value stored into it. If there 748 /// are uses of the loaded value that would trap if the loaded value is 749 /// dynamically null, then we know that they cannot be reachable with a null 750 /// optimize away the load. 751 static bool OptimizeAwayTrappingUsesOfLoads( 752 GlobalVariable *GV, Constant *LV, const DataLayout &DL, 753 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 754 bool Changed = false; 755 756 // Keep track of whether we are able to remove all the uses of the global 757 // other than the store that defines it. 758 bool AllNonStoreUsesGone = true; 759 760 // Replace all uses of loads with uses of uses of the stored value. 761 for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){ 762 User *GlobalUser = *GUI++; 763 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 764 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 765 // If we were able to delete all uses of the loads 766 if (LI->use_empty()) { 767 LI->eraseFromParent(); 768 Changed = true; 769 } else { 770 AllNonStoreUsesGone = false; 771 } 772 } else if (isa<StoreInst>(GlobalUser)) { 773 // Ignore the store that stores "LV" to the global. 774 assert(GlobalUser->getOperand(1) == GV && 775 "Must be storing *to* the global"); 776 } else { 777 AllNonStoreUsesGone = false; 778 779 // If we get here we could have other crazy uses that are transitively 780 // loaded. 781 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 782 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || 783 isa<BitCastInst>(GlobalUser) || 784 isa<GetElementPtrInst>(GlobalUser)) && 785 "Only expect load and stores!"); 786 } 787 } 788 789 if (Changed) { 790 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV 791 << "\n"); 792 ++NumGlobUses; 793 } 794 795 // If we nuked all of the loads, then none of the stores are needed either, 796 // nor is the global. 797 if (AllNonStoreUsesGone) { 798 if (isLeakCheckerRoot(GV)) { 799 Changed |= CleanupPointerRootUsers(GV, GetTLI); 800 } else { 801 Changed = true; 802 CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); 803 } 804 if (GV->use_empty()) { 805 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 806 Changed = true; 807 GV->eraseFromParent(); 808 ++NumDeleted; 809 } 810 } 811 return Changed; 812 } 813 814 /// Walk the use list of V, constant folding all of the instructions that are 815 /// foldable. 816 static void ConstantPropUsersOf(Value *V, const DataLayout &DL, 817 TargetLibraryInfo *TLI) { 818 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) 819 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 820 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { 821 I->replaceAllUsesWith(NewC); 822 823 // Advance UI to the next non-I use to avoid invalidating it! 824 // Instructions could multiply use V. 825 while (UI != E && *UI == I) 826 ++UI; 827 if (isInstructionTriviallyDead(I, TLI)) 828 I->eraseFromParent(); 829 } 830 } 831 832 /// This function takes the specified global variable, and transforms the 833 /// program as if it always contained the result of the specified malloc. 834 /// Because it is always the result of the specified malloc, there is no reason 835 /// to actually DO the malloc. Instead, turn the malloc into a global, and any 836 /// loads of GV as uses of the new global. 837 static GlobalVariable * 838 OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, 839 ConstantInt *NElements, const DataLayout &DL, 840 TargetLibraryInfo *TLI) { 841 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI 842 << '\n'); 843 844 Type *GlobalType; 845 if (NElements->getZExtValue() == 1) 846 GlobalType = AllocTy; 847 else 848 // If we have an array allocation, the global variable is of an array. 849 GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); 850 851 // Create the new global variable. The contents of the malloc'd memory is 852 // undefined, so initialize with an undef value. 853 GlobalVariable *NewGV = new GlobalVariable( 854 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, 855 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, 856 GV->getThreadLocalMode()); 857 858 // If there are bitcast users of the malloc (which is typical, usually we have 859 // a malloc + bitcast) then replace them with uses of the new global. Update 860 // other users to use the global as well. 861 BitCastInst *TheBC = nullptr; 862 while (!CI->use_empty()) { 863 Instruction *User = cast<Instruction>(CI->user_back()); 864 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 865 if (BCI->getType() == NewGV->getType()) { 866 BCI->replaceAllUsesWith(NewGV); 867 BCI->eraseFromParent(); 868 } else { 869 BCI->setOperand(0, NewGV); 870 } 871 } else { 872 if (!TheBC) 873 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); 874 User->replaceUsesOfWith(CI, TheBC); 875 } 876 } 877 878 Constant *RepValue = NewGV; 879 if (NewGV->getType() != GV->getValueType()) 880 RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType()); 881 882 // If there is a comparison against null, we will insert a global bool to 883 // keep track of whether the global was initialized yet or not. 884 GlobalVariable *InitBool = 885 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, 886 GlobalValue::InternalLinkage, 887 ConstantInt::getFalse(GV->getContext()), 888 GV->getName()+".init", GV->getThreadLocalMode()); 889 bool InitBoolUsed = false; 890 891 // Loop over all uses of GV, processing them in turn. 892 while (!GV->use_empty()) { 893 if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { 894 // The global is initialized when the store to it occurs. 895 new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 896 None, SI->getOrdering(), SI->getSyncScopeID(), SI); 897 SI->eraseFromParent(); 898 continue; 899 } 900 901 LoadInst *LI = cast<LoadInst>(GV->user_back()); 902 while (!LI->use_empty()) { 903 Use &LoadUse = *LI->use_begin(); 904 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); 905 if (!ICI) { 906 LoadUse = RepValue; 907 continue; 908 } 909 910 // Replace the cmp X, 0 with a use of the bool value. 911 // Sink the load to where the compare was, if atomic rules allow us to. 912 Value *LV = new LoadInst(InitBool->getValueType(), InitBool, 913 InitBool->getName() + ".val", false, None, 914 LI->getOrdering(), LI->getSyncScopeID(), 915 LI->isUnordered() ? (Instruction *)ICI : LI); 916 InitBoolUsed = true; 917 switch (ICI->getPredicate()) { 918 default: llvm_unreachable("Unknown ICmp Predicate!"); 919 case ICmpInst::ICMP_ULT: 920 case ICmpInst::ICMP_SLT: // X < null -> always false 921 LV = ConstantInt::getFalse(GV->getContext()); 922 break; 923 case ICmpInst::ICMP_ULE: 924 case ICmpInst::ICMP_SLE: 925 case ICmpInst::ICMP_EQ: 926 LV = BinaryOperator::CreateNot(LV, "notinit", ICI); 927 break; 928 case ICmpInst::ICMP_NE: 929 case ICmpInst::ICMP_UGE: 930 case ICmpInst::ICMP_SGE: 931 case ICmpInst::ICMP_UGT: 932 case ICmpInst::ICMP_SGT: 933 break; // no change. 934 } 935 ICI->replaceAllUsesWith(LV); 936 ICI->eraseFromParent(); 937 } 938 LI->eraseFromParent(); 939 } 940 941 // If the initialization boolean was used, insert it, otherwise delete it. 942 if (!InitBoolUsed) { 943 while (!InitBool->use_empty()) // Delete initializations 944 cast<StoreInst>(InitBool->user_back())->eraseFromParent(); 945 delete InitBool; 946 } else 947 GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); 948 949 // Now the GV is dead, nuke it and the malloc.. 950 GV->eraseFromParent(); 951 CI->eraseFromParent(); 952 953 // To further other optimizations, loop over all users of NewGV and try to 954 // constant prop them. This will promote GEP instructions with constant 955 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 956 ConstantPropUsersOf(NewGV, DL, TLI); 957 if (RepValue != NewGV) 958 ConstantPropUsersOf(RepValue, DL, TLI); 959 960 return NewGV; 961 } 962 963 /// Scan the use-list of V checking to make sure that there are no complex uses 964 /// of V. We permit simple things like dereferencing the pointer, but not 965 /// storing through the address, unless it is to the specified global. 966 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, 967 const GlobalVariable *GV, 968 SmallPtrSetImpl<const PHINode*> &PHIs) { 969 for (const User *U : V->users()) { 970 const Instruction *Inst = cast<Instruction>(U); 971 972 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 973 continue; // Fine, ignore. 974 } 975 976 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 977 if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 978 return false; // Storing the pointer itself... bad. 979 continue; // Otherwise, storing through it, or storing into GV... fine. 980 } 981 982 // Must index into the array and into the struct. 983 if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { 984 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 985 return false; 986 continue; 987 } 988 989 if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { 990 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 991 // cycles. 992 if (PHIs.insert(PN).second) 993 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 994 return false; 995 continue; 996 } 997 998 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 999 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 1000 return false; 1001 continue; 1002 } 1003 1004 return false; 1005 } 1006 return true; 1007 } 1008 1009 /// The Alloc pointer is stored into GV somewhere. Transform all uses of the 1010 /// allocation into loads from the global and uses of the resultant pointer. 1011 /// Further, delete the store into GV. This assumes that these value pass the 1012 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 1013 static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 1014 GlobalVariable *GV) { 1015 while (!Alloc->use_empty()) { 1016 Instruction *U = cast<Instruction>(*Alloc->user_begin()); 1017 Instruction *InsertPt = U; 1018 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1019 // If this is the store of the allocation into the global, remove it. 1020 if (SI->getOperand(1) == GV) { 1021 SI->eraseFromParent(); 1022 continue; 1023 } 1024 } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1025 // Insert the load in the corresponding predecessor, not right before the 1026 // PHI. 1027 InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator(); 1028 } else if (isa<BitCastInst>(U)) { 1029 // Must be bitcast between the malloc and store to initialize the global. 1030 ReplaceUsesOfMallocWithGlobal(U, GV); 1031 U->eraseFromParent(); 1032 continue; 1033 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1034 // If this is a "GEP bitcast" and the user is a store to the global, then 1035 // just process it as a bitcast. 1036 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1037 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back())) 1038 if (SI->getOperand(1) == GV) { 1039 // Must be bitcast GEP between the malloc and store to initialize 1040 // the global. 1041 ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1042 GEPI->eraseFromParent(); 1043 continue; 1044 } 1045 } 1046 1047 // Insert a load from the global, and use it instead of the malloc. 1048 Value *NL = 1049 new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt); 1050 U->replaceUsesOfWith(Alloc, NL); 1051 } 1052 } 1053 1054 /// Verify that all uses of V (a load, or a phi of a load) are simple enough to 1055 /// perform heap SRA on. This permits GEP's that index through the array and 1056 /// struct field, icmps of null, and PHIs. 1057 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, 1058 SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs, 1059 SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) { 1060 // We permit two users of the load: setcc comparing against the null 1061 // pointer, and a getelementptr of a specific form. 1062 for (const User *U : V->users()) { 1063 const Instruction *UI = cast<Instruction>(U); 1064 1065 // Comparison against null is ok. 1066 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) { 1067 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1068 return false; 1069 continue; 1070 } 1071 1072 // getelementptr is also ok, but only a simple form. 1073 if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) { 1074 // Must index into the array and into the struct. 1075 if (GEPI->getNumOperands() < 3) 1076 return false; 1077 1078 // Otherwise the GEP is ok. 1079 continue; 1080 } 1081 1082 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 1083 if (!LoadUsingPHIsPerLoad.insert(PN).second) 1084 // This means some phi nodes are dependent on each other. 1085 // Avoid infinite looping! 1086 return false; 1087 if (!LoadUsingPHIs.insert(PN).second) 1088 // If we have already analyzed this PHI, then it is safe. 1089 continue; 1090 1091 // Make sure all uses of the PHI are simple enough to transform. 1092 if (!LoadUsesSimpleEnoughForHeapSRA(PN, 1093 LoadUsingPHIs, LoadUsingPHIsPerLoad)) 1094 return false; 1095 1096 continue; 1097 } 1098 1099 // Otherwise we don't know what this is, not ok. 1100 return false; 1101 } 1102 1103 return true; 1104 } 1105 1106 /// If all users of values loaded from GV are simple enough to perform HeapSRA, 1107 /// return true. 1108 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, 1109 Instruction *StoredVal) { 1110 SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; 1111 SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; 1112 for (const User *U : GV->users()) 1113 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 1114 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, 1115 LoadUsingPHIsPerLoad)) 1116 return false; 1117 LoadUsingPHIsPerLoad.clear(); 1118 } 1119 1120 // If we reach here, we know that all uses of the loads and transitive uses 1121 // (through PHI nodes) are simple enough to transform. However, we don't know 1122 // that all inputs the to the PHI nodes are in the same equivalence sets. 1123 // Check to verify that all operands of the PHIs are either PHIS that can be 1124 // transformed, loads from GV, or MI itself. 1125 for (const PHINode *PN : LoadUsingPHIs) { 1126 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1127 Value *InVal = PN->getIncomingValue(op); 1128 1129 // PHI of the stored value itself is ok. 1130 if (InVal == StoredVal) continue; 1131 1132 if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1133 // One of the PHIs in our set is (optimistically) ok. 1134 if (LoadUsingPHIs.count(InPN)) 1135 continue; 1136 return false; 1137 } 1138 1139 // Load from GV is ok. 1140 if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1141 if (LI->getOperand(0) == GV) 1142 continue; 1143 1144 // UNDEF? NULL? 1145 1146 // Anything else is rejected. 1147 return false; 1148 } 1149 } 1150 1151 return true; 1152 } 1153 1154 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1155 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues, 1156 std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) { 1157 std::vector<Value *> &FieldVals = InsertedScalarizedValues[V]; 1158 1159 if (FieldNo >= FieldVals.size()) 1160 FieldVals.resize(FieldNo+1); 1161 1162 // If we already have this value, just reuse the previously scalarized 1163 // version. 1164 if (Value *FieldVal = FieldVals[FieldNo]) 1165 return FieldVal; 1166 1167 // Depending on what instruction this is, we have several cases. 1168 Value *Result; 1169 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1170 // This is a scalarized version of the load from the global. Just create 1171 // a new Load of the scalarized global. 1172 Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo, 1173 InsertedScalarizedValues, PHIsToRewrite); 1174 Result = new LoadInst(V->getType()->getPointerElementType(), V, 1175 LI->getName() + ".f" + Twine(FieldNo), LI); 1176 } else { 1177 PHINode *PN = cast<PHINode>(V); 1178 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1179 // field. 1180 1181 PointerType *PTy = cast<PointerType>(PN->getType()); 1182 StructType *ST = cast<StructType>(PTy->getElementType()); 1183 1184 unsigned AS = PTy->getAddressSpace(); 1185 PHINode *NewPN = 1186 PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), 1187 PN->getNumIncomingValues(), 1188 PN->getName()+".f"+Twine(FieldNo), PN); 1189 Result = NewPN; 1190 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1191 } 1192 1193 return FieldVals[FieldNo] = Result; 1194 } 1195 1196 /// Given a load instruction and a value derived from the load, rewrite the 1197 /// derived value to use the HeapSRoA'd load. 1198 static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1199 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues, 1200 std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) { 1201 // If this is a comparison against null, handle it. 1202 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1203 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1204 // If we have a setcc of the loaded pointer, we can use a setcc of any 1205 // field. 1206 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1207 InsertedScalarizedValues, PHIsToRewrite); 1208 1209 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, 1210 Constant::getNullValue(NPtr->getType()), 1211 SCI->getName()); 1212 SCI->replaceAllUsesWith(New); 1213 SCI->eraseFromParent(); 1214 return; 1215 } 1216 1217 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1218 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1219 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1220 && "Unexpected GEPI!"); 1221 1222 // Load the pointer for this field. 1223 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1224 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1225 InsertedScalarizedValues, PHIsToRewrite); 1226 1227 // Create the new GEP idx vector. 1228 SmallVector<Value*, 8> GEPIdx; 1229 GEPIdx.push_back(GEPI->getOperand(1)); 1230 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1231 1232 Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, 1233 GEPI->getName(), GEPI); 1234 GEPI->replaceAllUsesWith(NGEPI); 1235 GEPI->eraseFromParent(); 1236 return; 1237 } 1238 1239 // Recursively transform the users of PHI nodes. This will lazily create the 1240 // PHIs that are needed for individual elements. Keep track of what PHIs we 1241 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1242 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1243 // already been seen first by another load, so its uses have already been 1244 // processed. 1245 PHINode *PN = cast<PHINode>(LoadUser); 1246 if (!InsertedScalarizedValues.insert(std::make_pair(PN, 1247 std::vector<Value *>())).second) 1248 return; 1249 1250 // If this is the first time we've seen this PHI, recursively process all 1251 // users. 1252 for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { 1253 Instruction *User = cast<Instruction>(*UI++); 1254 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1255 } 1256 } 1257 1258 /// We are performing Heap SRoA on a global. Ptr is a value loaded from the 1259 /// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. 1260 /// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1261 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1262 DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues, 1263 std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) { 1264 for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { 1265 Instruction *User = cast<Instruction>(*UI++); 1266 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1267 } 1268 1269 if (Load->use_empty()) { 1270 Load->eraseFromParent(); 1271 InsertedScalarizedValues.erase(Load); 1272 } 1273 } 1274 1275 /// CI is an allocation of an array of structures. Break it up into multiple 1276 /// allocations of arrays of the fields. 1277 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, 1278 Value *NElems, const DataLayout &DL, 1279 const TargetLibraryInfo *TLI) { 1280 LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI 1281 << '\n'); 1282 Type *MAT = getMallocAllocatedType(CI, TLI); 1283 StructType *STy = cast<StructType>(MAT); 1284 1285 // There is guaranteed to be at least one use of the malloc (storing 1286 // it into GV). If there are other uses, change them to be uses of 1287 // the global to simplify later code. This also deletes the store 1288 // into GV. 1289 ReplaceUsesOfMallocWithGlobal(CI, GV); 1290 1291 // Okay, at this point, there are no users of the malloc. Insert N 1292 // new mallocs at the same place as CI, and N globals. 1293 std::vector<Value *> FieldGlobals; 1294 std::vector<Value *> FieldMallocs; 1295 1296 SmallVector<OperandBundleDef, 1> OpBundles; 1297 CI->getOperandBundlesAsDefs(OpBundles); 1298 1299 unsigned AS = GV->getType()->getPointerAddressSpace(); 1300 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1301 Type *FieldTy = STy->getElementType(FieldNo); 1302 PointerType *PFieldTy = PointerType::get(FieldTy, AS); 1303 1304 GlobalVariable *NGV = new GlobalVariable( 1305 *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, 1306 Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), 1307 nullptr, GV->getThreadLocalMode()); 1308 NGV->copyAttributesFrom(GV); 1309 FieldGlobals.push_back(NGV); 1310 1311 unsigned TypeSize = DL.getTypeAllocSize(FieldTy); 1312 if (StructType *ST = dyn_cast<StructType>(FieldTy)) 1313 TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); 1314 Type *IntPtrTy = DL.getIntPtrType(CI->getType()); 1315 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, 1316 ConstantInt::get(IntPtrTy, TypeSize), 1317 NElems, OpBundles, nullptr, 1318 CI->getName() + ".f" + Twine(FieldNo)); 1319 FieldMallocs.push_back(NMI); 1320 new StoreInst(NMI, NGV, CI); 1321 } 1322 1323 // The tricky aspect of this transformation is handling the case when malloc 1324 // fails. In the original code, malloc failing would set the result pointer 1325 // of malloc to null. In this case, some mallocs could succeed and others 1326 // could fail. As such, we emit code that looks like this: 1327 // F0 = malloc(field0) 1328 // F1 = malloc(field1) 1329 // F2 = malloc(field2) 1330 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1331 // if (F0) { free(F0); F0 = 0; } 1332 // if (F1) { free(F1); F1 = 0; } 1333 // if (F2) { free(F2); F2 = 0; } 1334 // } 1335 // The malloc can also fail if its argument is too large. 1336 Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); 1337 Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), 1338 ConstantZero, "isneg"); 1339 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1340 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], 1341 Constant::getNullValue(FieldMallocs[i]->getType()), 1342 "isnull"); 1343 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); 1344 } 1345 1346 // Split the basic block at the old malloc. 1347 BasicBlock *OrigBB = CI->getParent(); 1348 BasicBlock *ContBB = 1349 OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); 1350 1351 // Create the block to check the first condition. Put all these blocks at the 1352 // end of the function as they are unlikely to be executed. 1353 BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), 1354 "malloc_ret_null", 1355 OrigBB->getParent()); 1356 1357 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1358 // branch on RunningOr. 1359 OrigBB->getTerminator()->eraseFromParent(); 1360 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1361 1362 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1363 // pointer, because some may be null while others are not. 1364 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1365 Value *GVVal = 1366 new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(), 1367 FieldGlobals[i], "tmp", NullPtrBlock); 1368 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 1369 Constant::getNullValue(GVVal->getType())); 1370 BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", 1371 OrigBB->getParent()); 1372 BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", 1373 OrigBB->getParent()); 1374 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, 1375 Cmp, NullPtrBlock); 1376 1377 // Fill in FreeBlock. 1378 CallInst::CreateFree(GVVal, OpBundles, BI); 1379 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1380 FreeBlock); 1381 BranchInst::Create(NextBlock, FreeBlock); 1382 1383 NullPtrBlock = NextBlock; 1384 } 1385 1386 BranchInst::Create(ContBB, NullPtrBlock); 1387 1388 // CI is no longer needed, remove it. 1389 CI->eraseFromParent(); 1390 1391 /// As we process loads, if we can't immediately update all uses of the load, 1392 /// keep track of what scalarized loads are inserted for a given load. 1393 DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues; 1394 InsertedScalarizedValues[GV] = FieldGlobals; 1395 1396 std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite; 1397 1398 // Okay, the malloc site is completely handled. All of the uses of GV are now 1399 // loads, and all uses of those loads are simple. Rewrite them to use loads 1400 // of the per-field globals instead. 1401 for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { 1402 Instruction *User = cast<Instruction>(*UI++); 1403 1404 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1405 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); 1406 continue; 1407 } 1408 1409 // Must be a store of null. 1410 StoreInst *SI = cast<StoreInst>(User); 1411 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1412 "Unexpected heap-sra user!"); 1413 1414 // Insert a store of null into each global. 1415 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1416 Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType(); 1417 Constant *Null = Constant::getNullValue(ValTy); 1418 new StoreInst(Null, FieldGlobals[i], SI); 1419 } 1420 // Erase the original store. 1421 SI->eraseFromParent(); 1422 } 1423 1424 // While we have PHIs that are interesting to rewrite, do it. 1425 while (!PHIsToRewrite.empty()) { 1426 PHINode *PN = PHIsToRewrite.back().first; 1427 unsigned FieldNo = PHIsToRewrite.back().second; 1428 PHIsToRewrite.pop_back(); 1429 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1430 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1431 1432 // Add all the incoming values. This can materialize more phis. 1433 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1434 Value *InVal = PN->getIncomingValue(i); 1435 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1436 PHIsToRewrite); 1437 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1438 } 1439 } 1440 1441 // Drop all inter-phi links and any loads that made it this far. 1442 for (DenseMap<Value *, std::vector<Value *>>::iterator 1443 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1444 I != E; ++I) { 1445 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1446 PN->dropAllReferences(); 1447 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1448 LI->dropAllReferences(); 1449 } 1450 1451 // Delete all the phis and loads now that inter-references are dead. 1452 for (DenseMap<Value *, std::vector<Value *>>::iterator 1453 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1454 I != E; ++I) { 1455 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1456 PN->eraseFromParent(); 1457 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1458 LI->eraseFromParent(); 1459 } 1460 1461 // The old global is now dead, remove it. 1462 GV->eraseFromParent(); 1463 1464 ++NumHeapSRA; 1465 return cast<GlobalVariable>(FieldGlobals[0]); 1466 } 1467 1468 /// This function is called when we see a pointer global variable with a single 1469 /// value stored it that is a malloc or cast of malloc. 1470 static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, 1471 Type *AllocTy, 1472 AtomicOrdering Ordering, 1473 const DataLayout &DL, 1474 TargetLibraryInfo *TLI) { 1475 // If this is a malloc of an abstract type, don't touch it. 1476 if (!AllocTy->isSized()) 1477 return false; 1478 1479 // We can't optimize this global unless all uses of it are *known* to be 1480 // of the malloc value, not of the null initializer value (consider a use 1481 // that compares the global's value against zero to see if the malloc has 1482 // been reached). To do this, we check to see if all uses of the global 1483 // would trap if the global were null: this proves that they must all 1484 // happen after the malloc. 1485 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1486 return false; 1487 1488 // We can't optimize this if the malloc itself is used in a complex way, 1489 // for example, being stored into multiple globals. This allows the 1490 // malloc to be stored into the specified global, loaded icmp'd, and 1491 // GEP'd. These are all things we could transform to using the global 1492 // for. 1493 SmallPtrSet<const PHINode*, 8> PHIs; 1494 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) 1495 return false; 1496 1497 // If we have a global that is only initialized with a fixed size malloc, 1498 // transform the program to use global memory instead of malloc'd memory. 1499 // This eliminates dynamic allocation, avoids an indirection accessing the 1500 // data, and exposes the resultant global to further GlobalOpt. 1501 // We cannot optimize the malloc if we cannot determine malloc array size. 1502 Value *NElems = getMallocArraySize(CI, DL, TLI, true); 1503 if (!NElems) 1504 return false; 1505 1506 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) 1507 // Restrict this transformation to only working on small allocations 1508 // (2048 bytes currently), as we don't want to introduce a 16M global or 1509 // something. 1510 if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { 1511 OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); 1512 return true; 1513 } 1514 1515 // If the allocation is an array of structures, consider transforming this 1516 // into multiple malloc'd arrays, one for each field. This is basically 1517 // SRoA for malloc'd memory. 1518 1519 if (Ordering != AtomicOrdering::NotAtomic) 1520 return false; 1521 1522 // If this is an allocation of a fixed size array of structs, analyze as a 1523 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1524 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) 1525 if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1526 AllocTy = AT->getElementType(); 1527 1528 StructType *AllocSTy = dyn_cast<StructType>(AllocTy); 1529 if (!AllocSTy) 1530 return false; 1531 1532 // This the structure has an unreasonable number of fields, leave it 1533 // alone. 1534 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1535 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { 1536 1537 // If this is a fixed size array, transform the Malloc to be an alloc of 1538 // structs. malloc [100 x struct],1 -> malloc struct, 100 1539 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { 1540 Type *IntPtrTy = DL.getIntPtrType(CI->getType()); 1541 unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); 1542 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); 1543 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); 1544 SmallVector<OperandBundleDef, 1> OpBundles; 1545 CI->getOperandBundlesAsDefs(OpBundles); 1546 Instruction *Malloc = 1547 CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, 1548 OpBundles, nullptr, CI->getName()); 1549 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); 1550 CI->replaceAllUsesWith(Cast); 1551 CI->eraseFromParent(); 1552 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc)) 1553 CI = cast<CallInst>(BCI->getOperand(0)); 1554 else 1555 CI = cast<CallInst>(Malloc); 1556 } 1557 1558 PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, 1559 TLI); 1560 return true; 1561 } 1562 1563 return false; 1564 } 1565 1566 // Try to optimize globals based on the knowledge that only one value (besides 1567 // its initializer) is ever stored to the global. 1568 static bool 1569 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1570 AtomicOrdering Ordering, const DataLayout &DL, 1571 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 1572 // Ignore no-op GEPs and bitcasts. 1573 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1574 1575 // If we are dealing with a pointer global that is initialized to null and 1576 // only has one (non-null) value stored into it, then we can optimize any 1577 // users of the loaded value (often calls and loads) that would trap if the 1578 // value was null. 1579 if (GV->getInitializer()->getType()->isPointerTy() && 1580 GV->getInitializer()->isNullValue() && 1581 !NullPointerIsDefined( 1582 nullptr /* F */, 1583 GV->getInitializer()->getType()->getPointerAddressSpace())) { 1584 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1585 if (GV->getInitializer()->getType() != SOVC->getType()) 1586 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1587 1588 // Optimize away any trapping uses of the loaded value. 1589 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) 1590 return true; 1591 } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { 1592 auto *TLI = &GetTLI(*CI->getFunction()); 1593 Type *MallocType = getMallocAllocatedType(CI, TLI); 1594 if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 1595 Ordering, DL, TLI)) 1596 return true; 1597 } 1598 } 1599 1600 return false; 1601 } 1602 1603 /// At this point, we have learned that the only two values ever stored into GV 1604 /// are its initializer and OtherVal. See if we can shrink the global into a 1605 /// boolean and select between the two values whenever it is used. This exposes 1606 /// the values to other scalar optimizations. 1607 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1608 Type *GVElType = GV->getValueType(); 1609 1610 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1611 // an FP value, pointer or vector, don't do this optimization because a select 1612 // between them is very expensive and unlikely to lead to later 1613 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1614 // where v1 and v2 both require constant pool loads, a big loss. 1615 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1616 GVElType->isFloatingPointTy() || 1617 GVElType->isPointerTy() || GVElType->isVectorTy()) 1618 return false; 1619 1620 // Walk the use list of the global seeing if all the uses are load or store. 1621 // If there is anything else, bail out. 1622 for (User *U : GV->users()) 1623 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1624 return false; 1625 1626 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); 1627 1628 // Create the new global, initializing it to false. 1629 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1630 false, 1631 GlobalValue::InternalLinkage, 1632 ConstantInt::getFalse(GV->getContext()), 1633 GV->getName()+".b", 1634 GV->getThreadLocalMode(), 1635 GV->getType()->getAddressSpace()); 1636 NewGV->copyAttributesFrom(GV); 1637 GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); 1638 1639 Constant *InitVal = GV->getInitializer(); 1640 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1641 "No reason to shrink to bool!"); 1642 1643 SmallVector<DIGlobalVariableExpression *, 1> GVs; 1644 GV->getDebugInfo(GVs); 1645 1646 // If initialized to zero and storing one into the global, we can use a cast 1647 // instead of a select to synthesize the desired value. 1648 bool IsOneZero = false; 1649 bool EmitOneOrZero = true; 1650 auto *CI = dyn_cast<ConstantInt>(OtherVal); 1651 if (CI && CI->getValue().getActiveBits() <= 64) { 1652 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1653 1654 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); 1655 if (CIInit && CIInit->getValue().getActiveBits() <= 64) { 1656 uint64_t ValInit = CIInit->getZExtValue(); 1657 uint64_t ValOther = CI->getZExtValue(); 1658 uint64_t ValMinus = ValOther - ValInit; 1659 1660 for(auto *GVe : GVs){ 1661 DIGlobalVariable *DGV = GVe->getVariable(); 1662 DIExpression *E = GVe->getExpression(); 1663 const DataLayout &DL = GV->getParent()->getDataLayout(); 1664 unsigned SizeInOctets = 1665 DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8; 1666 1667 // It is expected that the address of global optimized variable is on 1668 // top of the stack. After optimization, value of that variable will 1669 // be ether 0 for initial value or 1 for other value. The following 1670 // expression should return constant integer value depending on the 1671 // value at global object address: 1672 // val * (ValOther - ValInit) + ValInit: 1673 // DW_OP_deref DW_OP_constu <ValMinus> 1674 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value 1675 SmallVector<uint64_t, 12> Ops = { 1676 dwarf::DW_OP_deref_size, SizeInOctets, 1677 dwarf::DW_OP_constu, ValMinus, 1678 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, 1679 dwarf::DW_OP_plus}; 1680 bool WithStackValue = true; 1681 E = DIExpression::prependOpcodes(E, Ops, WithStackValue); 1682 DIGlobalVariableExpression *DGVE = 1683 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); 1684 NewGV->addDebugInfo(DGVE); 1685 } 1686 EmitOneOrZero = false; 1687 } 1688 } 1689 1690 if (EmitOneOrZero) { 1691 // FIXME: This will only emit address for debugger on which will 1692 // be written only 0 or 1. 1693 for(auto *GV : GVs) 1694 NewGV->addDebugInfo(GV); 1695 } 1696 1697 while (!GV->use_empty()) { 1698 Instruction *UI = cast<Instruction>(GV->user_back()); 1699 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1700 // Change the store into a boolean store. 1701 bool StoringOther = SI->getOperand(0) == OtherVal; 1702 // Only do this if we weren't storing a loaded value. 1703 Value *StoreVal; 1704 if (StoringOther || SI->getOperand(0) == InitVal) { 1705 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1706 StoringOther); 1707 } else { 1708 // Otherwise, we are storing a previously loaded copy. To do this, 1709 // change the copy from copying the original value to just copying the 1710 // bool. 1711 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1712 1713 // If we've already replaced the input, StoredVal will be a cast or 1714 // select instruction. If not, it will be a load of the original 1715 // global. 1716 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1717 assert(LI->getOperand(0) == GV && "Not a copy!"); 1718 // Insert a new load, to preserve the saved value. 1719 StoreVal = new LoadInst(NewGV->getValueType(), NewGV, 1720 LI->getName() + ".b", false, None, 1721 LI->getOrdering(), LI->getSyncScopeID(), LI); 1722 } else { 1723 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1724 "This is not a form that we understand!"); 1725 StoreVal = StoredVal->getOperand(0); 1726 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1727 } 1728 } 1729 StoreInst *NSI = 1730 new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), 1731 SI->getSyncScopeID(), SI); 1732 NSI->setDebugLoc(SI->getDebugLoc()); 1733 } else { 1734 // Change the load into a load of bool then a select. 1735 LoadInst *LI = cast<LoadInst>(UI); 1736 LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, 1737 LI->getName() + ".b", false, None, 1738 LI->getOrdering(), LI->getSyncScopeID(), LI); 1739 Instruction *NSI; 1740 if (IsOneZero) 1741 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1742 else 1743 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1744 NSI->takeName(LI); 1745 // Since LI is split into two instructions, NLI and NSI both inherit the 1746 // same DebugLoc 1747 NLI->setDebugLoc(LI->getDebugLoc()); 1748 NSI->setDebugLoc(LI->getDebugLoc()); 1749 LI->replaceAllUsesWith(NSI); 1750 } 1751 UI->eraseFromParent(); 1752 } 1753 1754 // Retain the name of the old global variable. People who are debugging their 1755 // programs may expect these variables to be named the same. 1756 NewGV->takeName(GV); 1757 GV->eraseFromParent(); 1758 return true; 1759 } 1760 1761 static bool deleteIfDead( 1762 GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 1763 GV.removeDeadConstantUsers(); 1764 1765 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) 1766 return false; 1767 1768 if (const Comdat *C = GV.getComdat()) 1769 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) 1770 return false; 1771 1772 bool Dead; 1773 if (auto *F = dyn_cast<Function>(&GV)) 1774 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead(); 1775 else 1776 Dead = GV.use_empty(); 1777 if (!Dead) 1778 return false; 1779 1780 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); 1781 GV.eraseFromParent(); 1782 ++NumDeleted; 1783 return true; 1784 } 1785 1786 static bool isPointerValueDeadOnEntryToFunction( 1787 const Function *F, GlobalValue *GV, 1788 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1789 // Find all uses of GV. We expect them all to be in F, and if we can't 1790 // identify any of the uses we bail out. 1791 // 1792 // On each of these uses, identify if the memory that GV points to is 1793 // used/required/live at the start of the function. If it is not, for example 1794 // if the first thing the function does is store to the GV, the GV can 1795 // possibly be demoted. 1796 // 1797 // We don't do an exhaustive search for memory operations - simply look 1798 // through bitcasts as they're quite common and benign. 1799 const DataLayout &DL = GV->getParent()->getDataLayout(); 1800 SmallVector<LoadInst *, 4> Loads; 1801 SmallVector<StoreInst *, 4> Stores; 1802 for (auto *U : GV->users()) { 1803 if (Operator::getOpcode(U) == Instruction::BitCast) { 1804 for (auto *UU : U->users()) { 1805 if (auto *LI = dyn_cast<LoadInst>(UU)) 1806 Loads.push_back(LI); 1807 else if (auto *SI = dyn_cast<StoreInst>(UU)) 1808 Stores.push_back(SI); 1809 else 1810 return false; 1811 } 1812 continue; 1813 } 1814 1815 Instruction *I = dyn_cast<Instruction>(U); 1816 if (!I) 1817 return false; 1818 assert(I->getParent()->getParent() == F); 1819 1820 if (auto *LI = dyn_cast<LoadInst>(I)) 1821 Loads.push_back(LI); 1822 else if (auto *SI = dyn_cast<StoreInst>(I)) 1823 Stores.push_back(SI); 1824 else 1825 return false; 1826 } 1827 1828 // We have identified all uses of GV into loads and stores. Now check if all 1829 // of them are known not to depend on the value of the global at the function 1830 // entry point. We do this by ensuring that every load is dominated by at 1831 // least one store. 1832 auto &DT = LookupDomTree(*const_cast<Function *>(F)); 1833 1834 // The below check is quadratic. Check we're not going to do too many tests. 1835 // FIXME: Even though this will always have worst-case quadratic time, we 1836 // could put effort into minimizing the average time by putting stores that 1837 // have been shown to dominate at least one load at the beginning of the 1838 // Stores array, making subsequent dominance checks more likely to succeed 1839 // early. 1840 // 1841 // The threshold here is fairly large because global->local demotion is a 1842 // very powerful optimization should it fire. 1843 const unsigned Threshold = 100; 1844 if (Loads.size() * Stores.size() > Threshold) 1845 return false; 1846 1847 for (auto *L : Loads) { 1848 auto *LTy = L->getType(); 1849 if (none_of(Stores, [&](const StoreInst *S) { 1850 auto *STy = S->getValueOperand()->getType(); 1851 // The load is only dominated by the store if DomTree says so 1852 // and the number of bits loaded in L is less than or equal to 1853 // the number of bits stored in S. 1854 return DT.dominates(S, L) && 1855 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy); 1856 })) 1857 return false; 1858 } 1859 // All loads have known dependences inside F, so the global can be localized. 1860 return true; 1861 } 1862 1863 /// C may have non-instruction users. Can all of those users be turned into 1864 /// instructions? 1865 static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { 1866 // We don't do this exhaustively. The most common pattern that we really need 1867 // to care about is a constant GEP or constant bitcast - so just looking 1868 // through one single ConstantExpr. 1869 // 1870 // The set of constants that this function returns true for must be able to be 1871 // handled by makeAllConstantUsesInstructions. 1872 for (auto *U : C->users()) { 1873 if (isa<Instruction>(U)) 1874 continue; 1875 if (!isa<ConstantExpr>(U)) 1876 // Non instruction, non-constantexpr user; cannot convert this. 1877 return false; 1878 for (auto *UU : U->users()) 1879 if (!isa<Instruction>(UU)) 1880 // A constantexpr used by another constant. We don't try and recurse any 1881 // further but just bail out at this point. 1882 return false; 1883 } 1884 1885 return true; 1886 } 1887 1888 /// C may have non-instruction users, and 1889 /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the 1890 /// non-instruction users to instructions. 1891 static void makeAllConstantUsesInstructions(Constant *C) { 1892 SmallVector<ConstantExpr*,4> Users; 1893 for (auto *U : C->users()) { 1894 if (isa<ConstantExpr>(U)) 1895 Users.push_back(cast<ConstantExpr>(U)); 1896 else 1897 // We should never get here; allNonInstructionUsersCanBeMadeInstructions 1898 // should not have returned true for C. 1899 assert( 1900 isa<Instruction>(U) && 1901 "Can't transform non-constantexpr non-instruction to instruction!"); 1902 } 1903 1904 SmallVector<Value*,4> UUsers; 1905 for (auto *U : Users) { 1906 UUsers.clear(); 1907 for (auto *UU : U->users()) 1908 UUsers.push_back(UU); 1909 for (auto *UU : UUsers) { 1910 Instruction *UI = cast<Instruction>(UU); 1911 Instruction *NewU = U->getAsInstruction(); 1912 NewU->insertBefore(UI); 1913 UI->replaceUsesOfWith(U, NewU); 1914 } 1915 // We've replaced all the uses, so destroy the constant. (destroyConstant 1916 // will update value handles and metadata.) 1917 U->destroyConstant(); 1918 } 1919 } 1920 1921 /// Analyze the specified global variable and optimize 1922 /// it if possible. If we make a change, return true. 1923 static bool 1924 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, 1925 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1926 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1927 auto &DL = GV->getParent()->getDataLayout(); 1928 // If this is a first class global and has only one accessing function and 1929 // this function is non-recursive, we replace the global with a local alloca 1930 // in this function. 1931 // 1932 // NOTE: It doesn't make sense to promote non-single-value types since we 1933 // are just replacing static memory to stack memory. 1934 // 1935 // If the global is in different address space, don't bring it to stack. 1936 if (!GS.HasMultipleAccessingFunctions && 1937 GS.AccessingFunction && 1938 GV->getValueType()->isSingleValueType() && 1939 GV->getType()->getAddressSpace() == 0 && 1940 !GV->isExternallyInitialized() && 1941 allNonInstructionUsersCanBeMadeInstructions(GV) && 1942 GS.AccessingFunction->doesNotRecurse() && 1943 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, 1944 LookupDomTree)) { 1945 const DataLayout &DL = GV->getParent()->getDataLayout(); 1946 1947 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); 1948 Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction 1949 ->getEntryBlock().begin()); 1950 Type *ElemTy = GV->getValueType(); 1951 // FIXME: Pass Global's alignment when globals have alignment 1952 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr, 1953 GV->getName(), &FirstI); 1954 if (!isa<UndefValue>(GV->getInitializer())) 1955 new StoreInst(GV->getInitializer(), Alloca, &FirstI); 1956 1957 makeAllConstantUsesInstructions(GV); 1958 1959 GV->replaceAllUsesWith(Alloca); 1960 GV->eraseFromParent(); 1961 ++NumLocalized; 1962 return true; 1963 } 1964 1965 // If the global is never loaded (but may be stored to), it is dead. 1966 // Delete it now. 1967 if (!GS.IsLoaded) { 1968 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); 1969 1970 bool Changed; 1971 if (isLeakCheckerRoot(GV)) { 1972 // Delete any constant stores to the global. 1973 Changed = CleanupPointerRootUsers(GV, GetTLI); 1974 } else { 1975 // Delete any stores we can find to the global. We may not be able to 1976 // make it completely dead though. 1977 Changed = 1978 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); 1979 } 1980 1981 // If the global is dead now, delete it. 1982 if (GV->use_empty()) { 1983 GV->eraseFromParent(); 1984 ++NumDeleted; 1985 Changed = true; 1986 } 1987 return Changed; 1988 1989 } 1990 if (GS.StoredType <= GlobalStatus::InitializerStored) { 1991 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); 1992 1993 // Don't actually mark a global constant if it's atomic because atomic loads 1994 // are implemented by a trivial cmpxchg in some edge-cases and that usually 1995 // requires write access to the variable even if it's not actually changed. 1996 if (GS.Ordering == AtomicOrdering::NotAtomic) 1997 GV->setConstant(true); 1998 1999 // Clean up any obviously simplifiable users now. 2000 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); 2001 2002 // If the global is dead now, just nuke it. 2003 if (GV->use_empty()) { 2004 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 2005 << "all users and delete global!\n"); 2006 GV->eraseFromParent(); 2007 ++NumDeleted; 2008 return true; 2009 } 2010 2011 // Fall through to the next check; see if we can optimize further. 2012 ++NumMarked; 2013 } 2014 if (!GV->getInitializer()->getType()->isSingleValueType()) { 2015 const DataLayout &DL = GV->getParent()->getDataLayout(); 2016 if (SRAGlobal(GV, DL)) 2017 return true; 2018 } 2019 if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { 2020 // If the initial value for the global was an undef value, and if only 2021 // one other value was stored into it, we can just change the 2022 // initializer to be the stored value, then delete all stores to the 2023 // global. This allows us to mark it constant. 2024 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 2025 if (isa<UndefValue>(GV->getInitializer())) { 2026 // Change the initial value here. 2027 GV->setInitializer(SOVConstant); 2028 2029 // Clean up any obviously simplifiable users now. 2030 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); 2031 2032 if (GV->use_empty()) { 2033 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " 2034 << "simplify all users and delete global!\n"); 2035 GV->eraseFromParent(); 2036 ++NumDeleted; 2037 } 2038 ++NumSubstitute; 2039 return true; 2040 } 2041 2042 // Try to optimize globals based on the knowledge that only one value 2043 // (besides its initializer) is ever stored to the global. 2044 if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, 2045 GetTLI)) 2046 return true; 2047 2048 // Otherwise, if the global was not a boolean, we can shrink it to be a 2049 // boolean. 2050 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) { 2051 if (GS.Ordering == AtomicOrdering::NotAtomic) { 2052 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 2053 ++NumShrunkToBool; 2054 return true; 2055 } 2056 } 2057 } 2058 } 2059 2060 return false; 2061 } 2062 2063 /// Analyze the specified global variable and optimize it if possible. If we 2064 /// make a change, return true. 2065 static bool 2066 processGlobal(GlobalValue &GV, 2067 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2068 function_ref<DominatorTree &(Function &)> LookupDomTree) { 2069 if (GV.getName().startswith("llvm.")) 2070 return false; 2071 2072 GlobalStatus GS; 2073 2074 if (GlobalStatus::analyzeGlobal(&GV, GS)) 2075 return false; 2076 2077 bool Changed = false; 2078 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { 2079 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global 2080 : GlobalValue::UnnamedAddr::Local; 2081 if (NewUnnamedAddr != GV.getUnnamedAddr()) { 2082 GV.setUnnamedAddr(NewUnnamedAddr); 2083 NumUnnamed++; 2084 Changed = true; 2085 } 2086 } 2087 2088 // Do more involved optimizations if the global is internal. 2089 if (!GV.hasLocalLinkage()) 2090 return Changed; 2091 2092 auto *GVar = dyn_cast<GlobalVariable>(&GV); 2093 if (!GVar) 2094 return Changed; 2095 2096 if (GVar->isConstant() || !GVar->hasInitializer()) 2097 return Changed; 2098 2099 return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; 2100 } 2101 2102 /// Walk all of the direct calls of the specified function, changing them to 2103 /// FastCC. 2104 static void ChangeCalleesToFastCall(Function *F) { 2105 for (User *U : F->users()) { 2106 if (isa<BlockAddress>(U)) 2107 continue; 2108 CallSite CS(cast<Instruction>(U)); 2109 CS.setCallingConv(CallingConv::Fast); 2110 } 2111 } 2112 2113 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, 2114 Attribute::AttrKind A) { 2115 unsigned AttrIndex; 2116 if (Attrs.hasAttrSomewhere(A, &AttrIndex)) 2117 return Attrs.removeAttribute(C, AttrIndex, A); 2118 return Attrs; 2119 } 2120 2121 static void RemoveAttribute(Function *F, Attribute::AttrKind A) { 2122 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); 2123 for (User *U : F->users()) { 2124 if (isa<BlockAddress>(U)) 2125 continue; 2126 CallSite CS(cast<Instruction>(U)); 2127 CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A)); 2128 } 2129 } 2130 2131 /// Return true if this is a calling convention that we'd like to change. The 2132 /// idea here is that we don't want to mess with the convention if the user 2133 /// explicitly requested something with performance implications like coldcc, 2134 /// GHC, or anyregcc. 2135 static bool hasChangeableCC(Function *F) { 2136 CallingConv::ID CC = F->getCallingConv(); 2137 2138 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? 2139 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) 2140 return false; 2141 2142 // FIXME: Change CC for the whole chain of musttail calls when possible. 2143 // 2144 // Can't change CC of the function that either has musttail calls, or is a 2145 // musttail callee itself 2146 for (User *U : F->users()) { 2147 if (isa<BlockAddress>(U)) 2148 continue; 2149 CallInst* CI = dyn_cast<CallInst>(U); 2150 if (!CI) 2151 continue; 2152 2153 if (CI->isMustTailCall()) 2154 return false; 2155 } 2156 2157 for (BasicBlock &BB : *F) 2158 if (BB.getTerminatingMustTailCall()) 2159 return false; 2160 2161 return true; 2162 } 2163 2164 /// Return true if the block containing the call site has a BlockFrequency of 2165 /// less than ColdCCRelFreq% of the entry block. 2166 static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) { 2167 const BranchProbability ColdProb(ColdCCRelFreq, 100); 2168 auto CallSiteBB = CS.getInstruction()->getParent(); 2169 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); 2170 auto CallerEntryFreq = 2171 CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock())); 2172 return CallSiteFreq < CallerEntryFreq * ColdProb; 2173 } 2174 2175 // This function checks if the input function F is cold at all call sites. It 2176 // also looks each call site's containing function, returning false if the 2177 // caller function contains other non cold calls. The input vector AllCallsCold 2178 // contains a list of functions that only have call sites in cold blocks. 2179 static bool 2180 isValidCandidateForColdCC(Function &F, 2181 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2182 const std::vector<Function *> &AllCallsCold) { 2183 2184 if (F.user_empty()) 2185 return false; 2186 2187 for (User *U : F.users()) { 2188 if (isa<BlockAddress>(U)) 2189 continue; 2190 2191 CallSite CS(cast<Instruction>(U)); 2192 Function *CallerFunc = CS.getInstruction()->getParent()->getParent(); 2193 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); 2194 if (!isColdCallSite(CS, CallerBFI)) 2195 return false; 2196 auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc); 2197 if (It == AllCallsCold.end()) 2198 return false; 2199 } 2200 return true; 2201 } 2202 2203 static void changeCallSitesToColdCC(Function *F) { 2204 for (User *U : F->users()) { 2205 if (isa<BlockAddress>(U)) 2206 continue; 2207 CallSite CS(cast<Instruction>(U)); 2208 CS.setCallingConv(CallingConv::Cold); 2209 } 2210 } 2211 2212 // This function iterates over all the call instructions in the input Function 2213 // and checks that all call sites are in cold blocks and are allowed to use the 2214 // coldcc calling convention. 2215 static bool 2216 hasOnlyColdCalls(Function &F, 2217 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) { 2218 for (BasicBlock &BB : F) { 2219 for (Instruction &I : BB) { 2220 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2221 CallSite CS(cast<Instruction>(CI)); 2222 // Skip over isline asm instructions since they aren't function calls. 2223 if (CI->isInlineAsm()) 2224 continue; 2225 Function *CalledFn = CI->getCalledFunction(); 2226 if (!CalledFn) 2227 return false; 2228 if (!CalledFn->hasLocalLinkage()) 2229 return false; 2230 // Skip over instrinsics since they won't remain as function calls. 2231 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) 2232 continue; 2233 // Check if it's valid to use coldcc calling convention. 2234 if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() || 2235 CalledFn->hasAddressTaken()) 2236 return false; 2237 BlockFrequencyInfo &CallerBFI = GetBFI(F); 2238 if (!isColdCallSite(CS, CallerBFI)) 2239 return false; 2240 } 2241 } 2242 } 2243 return true; 2244 } 2245 2246 static bool 2247 OptimizeFunctions(Module &M, 2248 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2249 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2250 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2251 function_ref<DominatorTree &(Function &)> LookupDomTree, 2252 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2253 2254 bool Changed = false; 2255 2256 std::vector<Function *> AllCallsCold; 2257 for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) { 2258 Function *F = &*FI++; 2259 if (hasOnlyColdCalls(*F, GetBFI)) 2260 AllCallsCold.push_back(F); 2261 } 2262 2263 // Optimize functions. 2264 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 2265 Function *F = &*FI++; 2266 2267 // Don't perform global opt pass on naked functions; we don't want fast 2268 // calling conventions for naked functions. 2269 if (F->hasFnAttribute(Attribute::Naked)) 2270 continue; 2271 2272 // Functions without names cannot be referenced outside this module. 2273 if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) 2274 F->setLinkage(GlobalValue::InternalLinkage); 2275 2276 if (deleteIfDead(*F, NotDiscardableComdats)) { 2277 Changed = true; 2278 continue; 2279 } 2280 2281 // LLVM's definition of dominance allows instructions that are cyclic 2282 // in unreachable blocks, e.g.: 2283 // %pat = select i1 %condition, @global, i16* %pat 2284 // because any instruction dominates an instruction in a block that's 2285 // not reachable from entry. 2286 // So, remove unreachable blocks from the function, because a) there's 2287 // no point in analyzing them and b) GlobalOpt should otherwise grow 2288 // some more complicated logic to break these cycles. 2289 if (!F->isDeclaration()) { 2290 auto &DT = LookupDomTree(*F); 2291 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 2292 Changed |= removeUnreachableBlocks(*F, &DTU); 2293 } 2294 2295 Changed |= processGlobal(*F, GetTLI, LookupDomTree); 2296 2297 if (!F->hasLocalLinkage()) 2298 continue; 2299 2300 // If we have an inalloca parameter that we can safely remove the 2301 // inalloca attribute from, do so. This unlocks optimizations that 2302 // wouldn't be safe in the presence of inalloca. 2303 // FIXME: We should also hoist alloca affected by this to the entry 2304 // block if possible. 2305 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && 2306 !F->hasAddressTaken()) { 2307 RemoveAttribute(F, Attribute::InAlloca); 2308 Changed = true; 2309 } 2310 2311 if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { 2312 NumInternalFunc++; 2313 TargetTransformInfo &TTI = GetTTI(*F); 2314 // Change the calling convention to coldcc if either stress testing is 2315 // enabled or the target would like to use coldcc on functions which are 2316 // cold at all call sites and the callers contain no other non coldcc 2317 // calls. 2318 if (EnableColdCCStressTest || 2319 (TTI.useColdCCForColdCall(*F) && 2320 isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) { 2321 F->setCallingConv(CallingConv::Cold); 2322 changeCallSitesToColdCC(F); 2323 Changed = true; 2324 NumColdCC++; 2325 } 2326 } 2327 2328 if (hasChangeableCC(F) && !F->isVarArg() && 2329 !F->hasAddressTaken()) { 2330 // If this function has a calling convention worth changing, is not a 2331 // varargs function, and is only called directly, promote it to use the 2332 // Fast calling convention. 2333 F->setCallingConv(CallingConv::Fast); 2334 ChangeCalleesToFastCall(F); 2335 ++NumFastCallFns; 2336 Changed = true; 2337 } 2338 2339 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 2340 !F->hasAddressTaken()) { 2341 // The function is not used by a trampoline intrinsic, so it is safe 2342 // to remove the 'nest' attribute. 2343 RemoveAttribute(F, Attribute::Nest); 2344 ++NumNestRemoved; 2345 Changed = true; 2346 } 2347 } 2348 return Changed; 2349 } 2350 2351 static bool 2352 OptimizeGlobalVars(Module &M, 2353 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2354 function_ref<DominatorTree &(Function &)> LookupDomTree, 2355 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2356 bool Changed = false; 2357 2358 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 2359 GVI != E; ) { 2360 GlobalVariable *GV = &*GVI++; 2361 // Global variables without names cannot be referenced outside this module. 2362 if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) 2363 GV->setLinkage(GlobalValue::InternalLinkage); 2364 // Simplify the initializer. 2365 if (GV->hasInitializer()) 2366 if (auto *C = dyn_cast<Constant>(GV->getInitializer())) { 2367 auto &DL = M.getDataLayout(); 2368 // TLI is not used in the case of a Constant, so use default nullptr 2369 // for that optional parameter, since we don't have a Function to 2370 // provide GetTLI anyway. 2371 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); 2372 if (New && New != C) 2373 GV->setInitializer(New); 2374 } 2375 2376 if (deleteIfDead(*GV, NotDiscardableComdats)) { 2377 Changed = true; 2378 continue; 2379 } 2380 2381 Changed |= processGlobal(*GV, GetTLI, LookupDomTree); 2382 } 2383 return Changed; 2384 } 2385 2386 /// Evaluate a piece of a constantexpr store into a global initializer. This 2387 /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the 2388 /// GEP operands of Addr [0, OpNo) have been stepped into. 2389 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2390 ConstantExpr *Addr, unsigned OpNo) { 2391 // Base case of the recursion. 2392 if (OpNo == Addr->getNumOperands()) { 2393 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2394 return Val; 2395 } 2396 2397 SmallVector<Constant*, 32> Elts; 2398 if (StructType *STy = dyn_cast<StructType>(Init->getType())) { 2399 // Break up the constant into its elements. 2400 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2401 Elts.push_back(Init->getAggregateElement(i)); 2402 2403 // Replace the element that we are supposed to. 2404 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2405 unsigned Idx = CU->getZExtValue(); 2406 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2407 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); 2408 2409 // Return the modified struct. 2410 return ConstantStruct::get(STy, Elts); 2411 } 2412 2413 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2414 SequentialType *InitTy = cast<SequentialType>(Init->getType()); 2415 uint64_t NumElts = InitTy->getNumElements(); 2416 2417 // Break up the array into elements. 2418 for (uint64_t i = 0, e = NumElts; i != e; ++i) 2419 Elts.push_back(Init->getAggregateElement(i)); 2420 2421 assert(CI->getZExtValue() < NumElts); 2422 Elts[CI->getZExtValue()] = 2423 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); 2424 2425 if (Init->getType()->isArrayTy()) 2426 return ConstantArray::get(cast<ArrayType>(InitTy), Elts); 2427 return ConstantVector::get(Elts); 2428 } 2429 2430 /// We have decided that Addr (which satisfies the predicate 2431 /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2432 static void CommitValueTo(Constant *Val, Constant *Addr) { 2433 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2434 assert(GV->hasInitializer()); 2435 GV->setInitializer(Val); 2436 return; 2437 } 2438 2439 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2440 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2441 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); 2442 } 2443 2444 /// Given a map of address -> value, where addresses are expected to be some form 2445 /// of either a global or a constant GEP, set the initializer for the address to 2446 /// be the value. This performs mostly the same function as CommitValueTo() 2447 /// and EvaluateStoreInto() but is optimized to be more efficient for the common 2448 /// case where the set of addresses are GEPs sharing the same underlying global, 2449 /// processing the GEPs in batches rather than individually. 2450 /// 2451 /// To give an example, consider the following C++ code adapted from the clang 2452 /// regression tests: 2453 /// struct S { 2454 /// int n = 10; 2455 /// int m = 2 * n; 2456 /// S(int a) : n(a) {} 2457 /// }; 2458 /// 2459 /// template<typename T> 2460 /// struct U { 2461 /// T *r = &q; 2462 /// T q = 42; 2463 /// U *p = this; 2464 /// }; 2465 /// 2466 /// U<S> e; 2467 /// 2468 /// The global static constructor for 'e' will need to initialize 'r' and 'p' of 2469 /// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' 2470 /// members. This batch algorithm will simply use general CommitValueTo() method 2471 /// to handle the complex nested S struct initialization of 'q', before 2472 /// processing the outermost members in a single batch. Using CommitValueTo() to 2473 /// handle member in the outer struct is inefficient when the struct/array is 2474 /// very large as we end up creating and destroy constant arrays for each 2475 /// initialization. 2476 /// For the above case, we expect the following IR to be generated: 2477 /// 2478 /// %struct.U = type { %struct.S*, %struct.S, %struct.U* } 2479 /// %struct.S = type { i32, i32 } 2480 /// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, 2481 /// i64 0, i32 1), 2482 /// %struct.S { i32 42, i32 84 }, %struct.U* @e } 2483 /// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex 2484 /// constant expression, while the other two elements of @e are "simple". 2485 static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { 2486 SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs; 2487 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs; 2488 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs; 2489 SimpleCEs.reserve(Mem.size()); 2490 2491 for (const auto &I : Mem) { 2492 if (auto *GV = dyn_cast<GlobalVariable>(I.first)) { 2493 GVs.push_back(std::make_pair(GV, I.second)); 2494 } else { 2495 ConstantExpr *GEP = cast<ConstantExpr>(I.first); 2496 // We don't handle the deeply recursive case using the batch method. 2497 if (GEP->getNumOperands() > 3) 2498 ComplexCEs.push_back(std::make_pair(GEP, I.second)); 2499 else 2500 SimpleCEs.push_back(std::make_pair(GEP, I.second)); 2501 } 2502 } 2503 2504 // The algorithm below doesn't handle cases like nested structs, so use the 2505 // slower fully general method if we have to. 2506 for (auto ComplexCE : ComplexCEs) 2507 CommitValueTo(ComplexCE.second, ComplexCE.first); 2508 2509 for (auto GVPair : GVs) { 2510 assert(GVPair.first->hasInitializer()); 2511 GVPair.first->setInitializer(GVPair.second); 2512 } 2513 2514 if (SimpleCEs.empty()) 2515 return; 2516 2517 // We cache a single global's initializer elements in the case where the 2518 // subsequent address/val pair uses the same one. This avoids throwing away and 2519 // rebuilding the constant struct/vector/array just because one element is 2520 // modified at a time. 2521 SmallVector<Constant *, 32> Elts; 2522 Elts.reserve(SimpleCEs.size()); 2523 GlobalVariable *CurrentGV = nullptr; 2524 2525 auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { 2526 Constant *Init = GV->getInitializer(); 2527 Type *Ty = Init->getType(); 2528 if (Update) { 2529 if (CurrentGV) { 2530 assert(CurrentGV && "Expected a GV to commit to!"); 2531 Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); 2532 // We have a valid cache that needs to be committed. 2533 if (StructType *STy = dyn_cast<StructType>(CurrentInitTy)) 2534 CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); 2535 else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy)) 2536 CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); 2537 else 2538 CurrentGV->setInitializer(ConstantVector::get(Elts)); 2539 } 2540 if (CurrentGV == GV) 2541 return; 2542 // Need to clear and set up cache for new initializer. 2543 CurrentGV = GV; 2544 Elts.clear(); 2545 unsigned NumElts; 2546 if (auto *STy = dyn_cast<StructType>(Ty)) 2547 NumElts = STy->getNumElements(); 2548 else 2549 NumElts = cast<SequentialType>(Ty)->getNumElements(); 2550 for (unsigned i = 0, e = NumElts; i != e; ++i) 2551 Elts.push_back(Init->getAggregateElement(i)); 2552 } 2553 }; 2554 2555 for (auto CEPair : SimpleCEs) { 2556 ConstantExpr *GEP = CEPair.first; 2557 Constant *Val = CEPair.second; 2558 2559 GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0)); 2560 commitAndSetupCache(GV, GV != CurrentGV); 2561 ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2)); 2562 Elts[CI->getZExtValue()] = Val; 2563 } 2564 // The last initializer in the list needs to be committed, others 2565 // will be committed on a new initializer being processed. 2566 commitAndSetupCache(CurrentGV, true); 2567 } 2568 2569 /// Evaluate static constructors in the function, if we can. Return true if we 2570 /// can, false otherwise. 2571 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, 2572 TargetLibraryInfo *TLI) { 2573 // Call the function. 2574 Evaluator Eval(DL, TLI); 2575 Constant *RetValDummy; 2576 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, 2577 SmallVector<Constant*, 0>()); 2578 2579 if (EvalSuccess) { 2580 ++NumCtorsEvaluated; 2581 2582 // We succeeded at evaluation: commit the result. 2583 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2584 << F->getName() << "' to " 2585 << Eval.getMutatedMemory().size() << " stores.\n"); 2586 BatchCommitValueTo(Eval.getMutatedMemory()); 2587 for (GlobalVariable *GV : Eval.getInvariants()) 2588 GV->setConstant(true); 2589 } 2590 2591 return EvalSuccess; 2592 } 2593 2594 static int compareNames(Constant *const *A, Constant *const *B) { 2595 Value *AStripped = (*A)->stripPointerCasts(); 2596 Value *BStripped = (*B)->stripPointerCasts(); 2597 return AStripped->getName().compare(BStripped->getName()); 2598 } 2599 2600 static void setUsedInitializer(GlobalVariable &V, 2601 const SmallPtrSetImpl<GlobalValue *> &Init) { 2602 if (Init.empty()) { 2603 V.eraseFromParent(); 2604 return; 2605 } 2606 2607 // Type of pointer to the array of pointers. 2608 PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); 2609 2610 SmallVector<Constant *, 8> UsedArray; 2611 for (GlobalValue *GV : Init) { 2612 Constant *Cast 2613 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); 2614 UsedArray.push_back(Cast); 2615 } 2616 // Sort to get deterministic order. 2617 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); 2618 ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size()); 2619 2620 Module *M = V.getParent(); 2621 V.removeFromParent(); 2622 GlobalVariable *NV = 2623 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, 2624 ConstantArray::get(ATy, UsedArray), ""); 2625 NV->takeName(&V); 2626 NV->setSection("llvm.metadata"); 2627 delete &V; 2628 } 2629 2630 namespace { 2631 2632 /// An easy to access representation of llvm.used and llvm.compiler.used. 2633 class LLVMUsed { 2634 SmallPtrSet<GlobalValue *, 8> Used; 2635 SmallPtrSet<GlobalValue *, 8> CompilerUsed; 2636 GlobalVariable *UsedV; 2637 GlobalVariable *CompilerUsedV; 2638 2639 public: 2640 LLVMUsed(Module &M) { 2641 UsedV = collectUsedGlobalVariables(M, Used, false); 2642 CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true); 2643 } 2644 2645 using iterator = SmallPtrSet<GlobalValue *, 8>::iterator; 2646 using used_iterator_range = iterator_range<iterator>; 2647 2648 iterator usedBegin() { return Used.begin(); } 2649 iterator usedEnd() { return Used.end(); } 2650 2651 used_iterator_range used() { 2652 return used_iterator_range(usedBegin(), usedEnd()); 2653 } 2654 2655 iterator compilerUsedBegin() { return CompilerUsed.begin(); } 2656 iterator compilerUsedEnd() { return CompilerUsed.end(); } 2657 2658 used_iterator_range compilerUsed() { 2659 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); 2660 } 2661 2662 bool usedCount(GlobalValue *GV) const { return Used.count(GV); } 2663 2664 bool compilerUsedCount(GlobalValue *GV) const { 2665 return CompilerUsed.count(GV); 2666 } 2667 2668 bool usedErase(GlobalValue *GV) { return Used.erase(GV); } 2669 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } 2670 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } 2671 2672 bool compilerUsedInsert(GlobalValue *GV) { 2673 return CompilerUsed.insert(GV).second; 2674 } 2675 2676 void syncVariablesAndSets() { 2677 if (UsedV) 2678 setUsedInitializer(*UsedV, Used); 2679 if (CompilerUsedV) 2680 setUsedInitializer(*CompilerUsedV, CompilerUsed); 2681 } 2682 }; 2683 2684 } // end anonymous namespace 2685 2686 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { 2687 if (GA.use_empty()) // No use at all. 2688 return false; 2689 2690 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && 2691 "We should have removed the duplicated " 2692 "element from llvm.compiler.used"); 2693 if (!GA.hasOneUse()) 2694 // Strictly more than one use. So at least one is not in llvm.used and 2695 // llvm.compiler.used. 2696 return true; 2697 2698 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. 2699 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); 2700 } 2701 2702 static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, 2703 const LLVMUsed &U) { 2704 unsigned N = 2; 2705 assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) && 2706 "We should have removed the duplicated " 2707 "element from llvm.compiler.used"); 2708 if (U.usedCount(&V) || U.compilerUsedCount(&V)) 2709 ++N; 2710 return V.hasNUsesOrMore(N); 2711 } 2712 2713 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { 2714 if (!GA.hasLocalLinkage()) 2715 return true; 2716 2717 return U.usedCount(&GA) || U.compilerUsedCount(&GA); 2718 } 2719 2720 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, 2721 bool &RenameTarget) { 2722 RenameTarget = false; 2723 bool Ret = false; 2724 if (hasUseOtherThanLLVMUsed(GA, U)) 2725 Ret = true; 2726 2727 // If the alias is externally visible, we may still be able to simplify it. 2728 if (!mayHaveOtherReferences(GA, U)) 2729 return Ret; 2730 2731 // If the aliasee has internal linkage, give it the name and linkage 2732 // of the alias, and delete the alias. This turns: 2733 // define internal ... @f(...) 2734 // @a = alias ... @f 2735 // into: 2736 // define ... @a(...) 2737 Constant *Aliasee = GA.getAliasee(); 2738 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2739 if (!Target->hasLocalLinkage()) 2740 return Ret; 2741 2742 // Do not perform the transform if multiple aliases potentially target the 2743 // aliasee. This check also ensures that it is safe to replace the section 2744 // and other attributes of the aliasee with those of the alias. 2745 if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)) 2746 return Ret; 2747 2748 RenameTarget = true; 2749 return true; 2750 } 2751 2752 static bool 2753 OptimizeGlobalAliases(Module &M, 2754 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2755 bool Changed = false; 2756 LLVMUsed Used(M); 2757 2758 for (GlobalValue *GV : Used.used()) 2759 Used.compilerUsedErase(GV); 2760 2761 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2762 I != E;) { 2763 GlobalAlias *J = &*I++; 2764 2765 // Aliases without names cannot be referenced outside this module. 2766 if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) 2767 J->setLinkage(GlobalValue::InternalLinkage); 2768 2769 if (deleteIfDead(*J, NotDiscardableComdats)) { 2770 Changed = true; 2771 continue; 2772 } 2773 2774 // If the alias can change at link time, nothing can be done - bail out. 2775 if (J->isInterposable()) 2776 continue; 2777 2778 Constant *Aliasee = J->getAliasee(); 2779 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); 2780 // We can't trivially replace the alias with the aliasee if the aliasee is 2781 // non-trivial in some way. 2782 // TODO: Try to handle non-zero GEPs of local aliasees. 2783 if (!Target) 2784 continue; 2785 Target->removeDeadConstantUsers(); 2786 2787 // Make all users of the alias use the aliasee instead. 2788 bool RenameTarget; 2789 if (!hasUsesToReplace(*J, Used, RenameTarget)) 2790 continue; 2791 2792 J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); 2793 ++NumAliasesResolved; 2794 Changed = true; 2795 2796 if (RenameTarget) { 2797 // Give the aliasee the name, linkage and other attributes of the alias. 2798 Target->takeName(&*J); 2799 Target->setLinkage(J->getLinkage()); 2800 Target->setDSOLocal(J->isDSOLocal()); 2801 Target->setVisibility(J->getVisibility()); 2802 Target->setDLLStorageClass(J->getDLLStorageClass()); 2803 2804 if (Used.usedErase(&*J)) 2805 Used.usedInsert(Target); 2806 2807 if (Used.compilerUsedErase(&*J)) 2808 Used.compilerUsedInsert(Target); 2809 } else if (mayHaveOtherReferences(*J, Used)) 2810 continue; 2811 2812 // Delete the alias. 2813 M.getAliasList().erase(J); 2814 ++NumAliasesRemoved; 2815 Changed = true; 2816 } 2817 2818 Used.syncVariablesAndSets(); 2819 2820 return Changed; 2821 } 2822 2823 static Function * 2824 FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 2825 // Hack to get a default TLI before we have actual Function. 2826 auto FuncIter = M.begin(); 2827 if (FuncIter == M.end()) 2828 return nullptr; 2829 auto *TLI = &GetTLI(*FuncIter); 2830 2831 LibFunc F = LibFunc_cxa_atexit; 2832 if (!TLI->has(F)) 2833 return nullptr; 2834 2835 Function *Fn = M.getFunction(TLI->getName(F)); 2836 if (!Fn) 2837 return nullptr; 2838 2839 // Now get the actual TLI for Fn. 2840 TLI = &GetTLI(*Fn); 2841 2842 // Make sure that the function has the correct prototype. 2843 if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) 2844 return nullptr; 2845 2846 return Fn; 2847 } 2848 2849 /// Returns whether the given function is an empty C++ destructor and can 2850 /// therefore be eliminated. 2851 /// Note that we assume that other optimization passes have already simplified 2852 /// the code so we simply check for 'ret'. 2853 static bool cxxDtorIsEmpty(const Function &Fn) { 2854 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and 2855 // nounwind, but that doesn't seem worth doing. 2856 if (Fn.isDeclaration()) 2857 return false; 2858 2859 for (auto &I : Fn.getEntryBlock()) { 2860 if (isa<DbgInfoIntrinsic>(I)) 2861 continue; 2862 if (isa<ReturnInst>(I)) 2863 return true; 2864 break; 2865 } 2866 return false; 2867 } 2868 2869 static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { 2870 /// Itanium C++ ABI p3.3.5: 2871 /// 2872 /// After constructing a global (or local static) object, that will require 2873 /// destruction on exit, a termination function is registered as follows: 2874 /// 2875 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); 2876 /// 2877 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the 2878 /// call f(p) when DSO d is unloaded, before all such termination calls 2879 /// registered before this one. It returns zero if registration is 2880 /// successful, nonzero on failure. 2881 2882 // This pass will look for calls to __cxa_atexit where the function is trivial 2883 // and remove them. 2884 bool Changed = false; 2885 2886 for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); 2887 I != E;) { 2888 // We're only interested in calls. Theoretically, we could handle invoke 2889 // instructions as well, but neither llvm-gcc nor clang generate invokes 2890 // to __cxa_atexit. 2891 CallInst *CI = dyn_cast<CallInst>(*I++); 2892 if (!CI) 2893 continue; 2894 2895 Function *DtorFn = 2896 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); 2897 if (!DtorFn || !cxxDtorIsEmpty(*DtorFn)) 2898 continue; 2899 2900 // Just remove the call. 2901 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); 2902 CI->eraseFromParent(); 2903 2904 ++NumCXXDtorsRemoved; 2905 2906 Changed |= true; 2907 } 2908 2909 return Changed; 2910 } 2911 2912 static bool optimizeGlobalsInModule( 2913 Module &M, const DataLayout &DL, 2914 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2915 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2916 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2917 function_ref<DominatorTree &(Function &)> LookupDomTree) { 2918 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; 2919 bool Changed = false; 2920 bool LocalChange = true; 2921 while (LocalChange) { 2922 LocalChange = false; 2923 2924 NotDiscardableComdats.clear(); 2925 for (const GlobalVariable &GV : M.globals()) 2926 if (const Comdat *C = GV.getComdat()) 2927 if (!GV.isDiscardableIfUnused() || !GV.use_empty()) 2928 NotDiscardableComdats.insert(C); 2929 for (Function &F : M) 2930 if (const Comdat *C = F.getComdat()) 2931 if (!F.isDefTriviallyDead()) 2932 NotDiscardableComdats.insert(C); 2933 for (GlobalAlias &GA : M.aliases()) 2934 if (const Comdat *C = GA.getComdat()) 2935 if (!GA.isDiscardableIfUnused() || !GA.use_empty()) 2936 NotDiscardableComdats.insert(C); 2937 2938 // Delete functions that are trivially dead, ccc -> fastcc 2939 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, 2940 NotDiscardableComdats); 2941 2942 // Optimize global_ctors list. 2943 LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { 2944 return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); 2945 }); 2946 2947 // Optimize non-address-taken globals. 2948 LocalChange |= 2949 OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); 2950 2951 // Resolve aliases, when possible. 2952 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); 2953 2954 // Try to remove trivial global destructors if they are not removed 2955 // already. 2956 Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); 2957 if (CXAAtExitFn) 2958 LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); 2959 2960 Changed |= LocalChange; 2961 } 2962 2963 // TODO: Move all global ctors functions to the end of the module for code 2964 // layout. 2965 2966 return Changed; 2967 } 2968 2969 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { 2970 auto &DL = M.getDataLayout(); 2971 auto &FAM = 2972 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2973 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ 2974 return FAM.getResult<DominatorTreeAnalysis>(F); 2975 }; 2976 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2977 return FAM.getResult<TargetLibraryAnalysis>(F); 2978 }; 2979 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { 2980 return FAM.getResult<TargetIRAnalysis>(F); 2981 }; 2982 2983 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { 2984 return FAM.getResult<BlockFrequencyAnalysis>(F); 2985 }; 2986 2987 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) 2988 return PreservedAnalyses::all(); 2989 return PreservedAnalyses::none(); 2990 } 2991 2992 namespace { 2993 2994 struct GlobalOptLegacyPass : public ModulePass { 2995 static char ID; // Pass identification, replacement for typeid 2996 2997 GlobalOptLegacyPass() : ModulePass(ID) { 2998 initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); 2999 } 3000 3001 bool runOnModule(Module &M) override { 3002 if (skipModule(M)) 3003 return false; 3004 3005 auto &DL = M.getDataLayout(); 3006 auto LookupDomTree = [this](Function &F) -> DominatorTree & { 3007 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 3008 }; 3009 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { 3010 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 3011 }; 3012 auto GetTTI = [this](Function &F) -> TargetTransformInfo & { 3013 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 3014 }; 3015 3016 auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { 3017 return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 3018 }; 3019 3020 return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, 3021 LookupDomTree); 3022 } 3023 3024 void getAnalysisUsage(AnalysisUsage &AU) const override { 3025 AU.addRequired<TargetLibraryInfoWrapperPass>(); 3026 AU.addRequired<TargetTransformInfoWrapperPass>(); 3027 AU.addRequired<DominatorTreeWrapperPass>(); 3028 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 3029 } 3030 }; 3031 3032 } // end anonymous namespace 3033 3034 char GlobalOptLegacyPass::ID = 0; 3035 3036 INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", 3037 "Global Variable Optimizer", false, false) 3038 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 3039 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 3040 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 3041 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 3042 INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt", 3043 "Global Variable Optimizer", false, false) 3044 3045 ModulePass *llvm::createGlobalOptimizerPass() { 3046 return new GlobalOptLegacyPass(); 3047 } 3048