1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms simple global variables that never have their address
10 // taken. If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/IPO/GlobalOpt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/ADT/iterator_range.h"
24 #include "llvm/Analysis/BlockFrequencyInfo.h"
25 #include "llvm/Analysis/ConstantFolding.h"
26 #include "llvm/Analysis/MemoryBuiltins.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallingConv.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DebugInfoMetadata.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalAlias.h"
42 #include "llvm/IR/GlobalValue.h"
43 #include "llvm/IR/GlobalVariable.h"
44 #include "llvm/IR/IRBuilder.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Use.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/InitializePasses.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/AtomicOrdering.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/IPO.h"
65 #include "llvm/Transforms/Utils/CtorUtils.h"
66 #include "llvm/Transforms/Utils/Evaluator.h"
67 #include "llvm/Transforms/Utils/GlobalStatus.h"
68 #include "llvm/Transforms/Utils/Local.h"
69 #include <cassert>
70 #include <cstdint>
71 #include <utility>
72 #include <vector>
73
74 using namespace llvm;
75
76 #define DEBUG_TYPE "globalopt"
77
78 STATISTIC(NumMarked , "Number of globals marked constant");
79 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
80 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
81 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
82 STATISTIC(NumDeleted , "Number of globals deleted");
83 STATISTIC(NumGlobUses , "Number of global uses devirtualized");
84 STATISTIC(NumLocalized , "Number of globals localized");
85 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
86 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
87 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
88 STATISTIC(NumNestRemoved , "Number of nest attributes removed");
89 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
90 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
91 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
92 STATISTIC(NumInternalFunc, "Number of internal functions");
93 STATISTIC(NumColdCC, "Number of functions marked coldcc");
94
95 static cl::opt<bool>
96 EnableColdCCStressTest("enable-coldcc-stress-test",
97 cl::desc("Enable stress test of coldcc by adding "
98 "calling conv to all internal functions."),
99 cl::init(false), cl::Hidden);
100
101 static cl::opt<int> ColdCCRelFreq(
102 "coldcc-rel-freq", cl::Hidden, cl::init(2),
103 cl::desc(
104 "Maximum block frequency, expressed as a percentage of caller's "
105 "entry frequency, for a call site to be considered cold for enabling"
106 "coldcc"));
107
108 /// Is this global variable possibly used by a leak checker as a root? If so,
109 /// we might not really want to eliminate the stores to it.
isLeakCheckerRoot(GlobalVariable * GV)110 static bool isLeakCheckerRoot(GlobalVariable *GV) {
111 // A global variable is a root if it is a pointer, or could plausibly contain
112 // a pointer. There are two challenges; one is that we could have a struct
113 // the has an inner member which is a pointer. We recurse through the type to
114 // detect these (up to a point). The other is that we may actually be a union
115 // of a pointer and another type, and so our LLVM type is an integer which
116 // gets converted into a pointer, or our type is an [i8 x #] with a pointer
117 // potentially contained here.
118
119 if (GV->hasPrivateLinkage())
120 return false;
121
122 SmallVector<Type *, 4> Types;
123 Types.push_back(GV->getValueType());
124
125 unsigned Limit = 20;
126 do {
127 Type *Ty = Types.pop_back_val();
128 switch (Ty->getTypeID()) {
129 default: break;
130 case Type::PointerTyID:
131 return true;
132 case Type::FixedVectorTyID:
133 case Type::ScalableVectorTyID:
134 if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
135 return true;
136 break;
137 case Type::ArrayTyID:
138 Types.push_back(cast<ArrayType>(Ty)->getElementType());
139 break;
140 case Type::StructTyID: {
141 StructType *STy = cast<StructType>(Ty);
142 if (STy->isOpaque()) return true;
143 for (StructType::element_iterator I = STy->element_begin(),
144 E = STy->element_end(); I != E; ++I) {
145 Type *InnerTy = *I;
146 if (isa<PointerType>(InnerTy)) return true;
147 if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
148 isa<VectorType>(InnerTy))
149 Types.push_back(InnerTy);
150 }
151 break;
152 }
153 }
154 if (--Limit == 0) return true;
155 } while (!Types.empty());
156 return false;
157 }
158
159 /// Given a value that is stored to a global but never read, determine whether
160 /// it's safe to remove the store and the chain of computation that feeds the
161 /// store.
IsSafeComputationToRemove(Value * V,function_ref<TargetLibraryInfo & (Function &)> GetTLI)162 static bool IsSafeComputationToRemove(
163 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
164 do {
165 if (isa<Constant>(V))
166 return true;
167 if (!V->hasOneUse())
168 return false;
169 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
170 isa<GlobalValue>(V))
171 return false;
172 if (isAllocationFn(V, GetTLI))
173 return true;
174
175 Instruction *I = cast<Instruction>(V);
176 if (I->mayHaveSideEffects())
177 return false;
178 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
179 if (!GEP->hasAllConstantIndices())
180 return false;
181 } else if (I->getNumOperands() != 1) {
182 return false;
183 }
184
185 V = I->getOperand(0);
186 } while (true);
187 }
188
189 /// This GV is a pointer root. Loop over all users of the global and clean up
190 /// any that obviously don't assign the global a value that isn't dynamically
191 /// allocated.
192 static bool
CleanupPointerRootUsers(GlobalVariable * GV,function_ref<TargetLibraryInfo & (Function &)> GetTLI)193 CleanupPointerRootUsers(GlobalVariable *GV,
194 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
195 // A brief explanation of leak checkers. The goal is to find bugs where
196 // pointers are forgotten, causing an accumulating growth in memory
197 // usage over time. The common strategy for leak checkers is to explicitly
198 // allow the memory pointed to by globals at exit. This is popular because it
199 // also solves another problem where the main thread of a C++ program may shut
200 // down before other threads that are still expecting to use those globals. To
201 // handle that case, we expect the program may create a singleton and never
202 // destroy it.
203
204 bool Changed = false;
205
206 // If Dead[n].first is the only use of a malloc result, we can delete its
207 // chain of computation and the store to the global in Dead[n].second.
208 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
209
210 // Constants can't be pointers to dynamically allocated memory.
211 for (User *U : llvm::make_early_inc_range(GV->users())) {
212 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
213 Value *V = SI->getValueOperand();
214 if (isa<Constant>(V)) {
215 Changed = true;
216 SI->eraseFromParent();
217 } else if (Instruction *I = dyn_cast<Instruction>(V)) {
218 if (I->hasOneUse())
219 Dead.push_back(std::make_pair(I, SI));
220 }
221 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
222 if (isa<Constant>(MSI->getValue())) {
223 Changed = true;
224 MSI->eraseFromParent();
225 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
226 if (I->hasOneUse())
227 Dead.push_back(std::make_pair(I, MSI));
228 }
229 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
230 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
231 if (MemSrc && MemSrc->isConstant()) {
232 Changed = true;
233 MTI->eraseFromParent();
234 } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
235 if (I->hasOneUse())
236 Dead.push_back(std::make_pair(I, MTI));
237 }
238 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
239 if (CE->use_empty()) {
240 CE->destroyConstant();
241 Changed = true;
242 }
243 } else if (Constant *C = dyn_cast<Constant>(U)) {
244 if (isSafeToDestroyConstant(C)) {
245 C->destroyConstant();
246 // This could have invalidated UI, start over from scratch.
247 Dead.clear();
248 CleanupPointerRootUsers(GV, GetTLI);
249 return true;
250 }
251 }
252 }
253
254 for (int i = 0, e = Dead.size(); i != e; ++i) {
255 if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
256 Dead[i].second->eraseFromParent();
257 Instruction *I = Dead[i].first;
258 do {
259 if (isAllocationFn(I, GetTLI))
260 break;
261 Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
262 if (!J)
263 break;
264 I->eraseFromParent();
265 I = J;
266 } while (true);
267 I->eraseFromParent();
268 Changed = true;
269 }
270 }
271
272 return Changed;
273 }
274
275 /// We just marked GV constant. Loop over all users of the global, cleaning up
276 /// the obvious ones. This is largely just a quick scan over the use list to
277 /// clean up the easy and obvious cruft. This returns true if it made a change.
CleanupConstantGlobalUsers(GlobalVariable * GV,const DataLayout & DL)278 static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
279 const DataLayout &DL) {
280 Constant *Init = GV->getInitializer();
281 SmallVector<User *, 8> WorkList(GV->users());
282 SmallPtrSet<User *, 8> Visited;
283 bool Changed = false;
284
285 SmallVector<WeakTrackingVH> MaybeDeadInsts;
286 auto EraseFromParent = [&](Instruction *I) {
287 for (Value *Op : I->operands())
288 if (auto *OpI = dyn_cast<Instruction>(Op))
289 MaybeDeadInsts.push_back(OpI);
290 I->eraseFromParent();
291 Changed = true;
292 };
293 while (!WorkList.empty()) {
294 User *U = WorkList.pop_back_val();
295 if (!Visited.insert(U).second)
296 continue;
297
298 if (auto *BO = dyn_cast<BitCastOperator>(U))
299 append_range(WorkList, BO->users());
300 if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
301 append_range(WorkList, ASC->users());
302 else if (auto *GEP = dyn_cast<GEPOperator>(U))
303 append_range(WorkList, GEP->users());
304 else if (auto *LI = dyn_cast<LoadInst>(U)) {
305 // A load from a uniform value is always the same, regardless of any
306 // applied offset.
307 Type *Ty = LI->getType();
308 if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty)) {
309 LI->replaceAllUsesWith(Res);
310 EraseFromParent(LI);
311 continue;
312 }
313
314 Value *PtrOp = LI->getPointerOperand();
315 APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
316 PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
317 DL, Offset, /* AllowNonInbounds */ true);
318 if (PtrOp == GV) {
319 if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
320 LI->replaceAllUsesWith(Value);
321 EraseFromParent(LI);
322 }
323 }
324 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
325 // Store must be unreachable or storing Init into the global.
326 EraseFromParent(SI);
327 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
328 if (getUnderlyingObject(MI->getRawDest()) == GV)
329 EraseFromParent(MI);
330 }
331 }
332
333 Changed |=
334 RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
335 GV->removeDeadConstantUsers();
336 return Changed;
337 }
338
339 /// Look at all uses of the global and determine which (offset, type) pairs it
340 /// can be split into.
collectSRATypes(DenseMap<uint64_t,Type * > & Types,GlobalValue * GV,const DataLayout & DL)341 static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
342 const DataLayout &DL) {
343 SmallVector<Use *, 16> Worklist;
344 SmallPtrSet<Use *, 16> Visited;
345 auto AppendUses = [&](Value *V) {
346 for (Use &U : V->uses())
347 if (Visited.insert(&U).second)
348 Worklist.push_back(&U);
349 };
350 AppendUses(GV);
351 while (!Worklist.empty()) {
352 Use *U = Worklist.pop_back_val();
353 User *V = U->getUser();
354
355 auto *GEP = dyn_cast<GEPOperator>(V);
356 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
357 (GEP && GEP->hasAllConstantIndices())) {
358 AppendUses(V);
359 continue;
360 }
361
362 if (Value *Ptr = getLoadStorePointerOperand(V)) {
363 // This is storing the global address into somewhere, not storing into
364 // the global.
365 if (isa<StoreInst>(V) && U->getOperandNo() == 0)
366 return false;
367
368 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
369 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
370 /* AllowNonInbounds */ true);
371 if (Ptr != GV || Offset.getActiveBits() >= 64)
372 return false;
373
374 // TODO: We currently require that all accesses at a given offset must
375 // use the same type. This could be relaxed.
376 Type *Ty = getLoadStoreType(V);
377 auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
378 if (Ty != It->second)
379 return false;
380 continue;
381 }
382
383 // Ignore dead constant users.
384 if (auto *C = dyn_cast<Constant>(V)) {
385 if (!isSafeToDestroyConstant(C))
386 return false;
387 continue;
388 }
389
390 // Unknown user.
391 return false;
392 }
393
394 return true;
395 }
396
397 /// Copy over the debug info for a variable to its SRA replacements.
transferSRADebugInfo(GlobalVariable * GV,GlobalVariable * NGV,uint64_t FragmentOffsetInBits,uint64_t FragmentSizeInBits,uint64_t VarSize)398 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
399 uint64_t FragmentOffsetInBits,
400 uint64_t FragmentSizeInBits,
401 uint64_t VarSize) {
402 SmallVector<DIGlobalVariableExpression *, 1> GVs;
403 GV->getDebugInfo(GVs);
404 for (auto *GVE : GVs) {
405 DIVariable *Var = GVE->getVariable();
406 DIExpression *Expr = GVE->getExpression();
407 int64_t CurVarOffsetInBytes = 0;
408 uint64_t CurVarOffsetInBits = 0;
409
410 // Calculate the offset (Bytes), Continue if unknown.
411 if (!Expr->extractIfOffset(CurVarOffsetInBytes))
412 continue;
413
414 // Ignore negative offset.
415 if (CurVarOffsetInBytes < 0)
416 continue;
417
418 // Convert offset to bits.
419 CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
420
421 // Current var starts after the fragment, ignore.
422 if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
423 continue;
424
425 uint64_t CurVarSize = Var->getType()->getSizeInBits();
426 // Current variable ends before start of fragment, ignore.
427 if (CurVarSize != 0 &&
428 (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
429 continue;
430
431 // Current variable fits in the fragment.
432 if (CurVarOffsetInBits == FragmentOffsetInBits &&
433 CurVarSize == FragmentSizeInBits)
434 Expr = DIExpression::get(Expr->getContext(), {});
435 // If the FragmentSize is smaller than the variable,
436 // emit a fragment expression.
437 else if (FragmentSizeInBits < VarSize) {
438 if (auto E = DIExpression::createFragmentExpression(
439 Expr, FragmentOffsetInBits, FragmentSizeInBits))
440 Expr = *E;
441 else
442 return;
443 }
444 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
445 NGV->addDebugInfo(NGVE);
446 }
447 }
448
449 /// Perform scalar replacement of aggregates on the specified global variable.
450 /// This opens the door for other optimizations by exposing the behavior of the
451 /// program in a more fine-grained way. We have determined that this
452 /// transformation is safe already. We return the first global variable we
453 /// insert so that the caller can reprocess it.
SRAGlobal(GlobalVariable * GV,const DataLayout & DL)454 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
455 assert(GV->hasLocalLinkage());
456
457 // Collect types to split into.
458 DenseMap<uint64_t, Type *> Types;
459 if (!collectSRATypes(Types, GV, DL) || Types.empty())
460 return nullptr;
461
462 // Make sure we don't SRA back to the same type.
463 if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
464 return nullptr;
465
466 // Don't perform SRA if we would have to split into many globals.
467 if (Types.size() > 16)
468 return nullptr;
469
470 // Sort by offset.
471 SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
472 append_range(TypesVector, Types);
473 sort(TypesVector, llvm::less_first());
474
475 // Check that the types are non-overlapping.
476 uint64_t Offset = 0;
477 for (const auto &Pair : TypesVector) {
478 // Overlaps with previous type.
479 if (Pair.first < Offset)
480 return nullptr;
481
482 Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
483 }
484
485 // Some accesses go beyond the end of the global, don't bother.
486 if (Offset > DL.getTypeAllocSize(GV->getValueType()))
487 return nullptr;
488
489 // Collect initializers for new globals.
490 Constant *OrigInit = GV->getInitializer();
491 DenseMap<uint64_t, Constant *> Initializers;
492 for (const auto &Pair : Types) {
493 Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
494 APInt(64, Pair.first), DL);
495 if (!NewInit) {
496 LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
497 << *GV << " with type " << *Pair.second << " at offset "
498 << Pair.first << "\n");
499 return nullptr;
500 }
501 Initializers.insert({Pair.first, NewInit});
502 }
503
504 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
505
506 // Get the alignment of the global, either explicit or target-specific.
507 Align StartAlignment =
508 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
509 uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());
510
511 // Create replacement globals.
512 DenseMap<uint64_t, GlobalVariable *> NewGlobals;
513 unsigned NameSuffix = 0;
514 for (auto &Pair : TypesVector) {
515 uint64_t Offset = Pair.first;
516 Type *Ty = Pair.second;
517 GlobalVariable *NGV = new GlobalVariable(
518 *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
519 Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
520 GV->getThreadLocalMode(), GV->getAddressSpace());
521 NGV->copyAttributesFrom(GV);
522 NewGlobals.insert({Offset, NGV});
523
524 // Calculate the known alignment of the field. If the original aggregate
525 // had 256 byte alignment for example, something might depend on that:
526 // propagate info to each field.
527 Align NewAlign = commonAlignment(StartAlignment, Offset);
528 if (NewAlign > DL.getABITypeAlign(Ty))
529 NGV->setAlignment(NewAlign);
530
531 // Copy over the debug info for the variable.
532 transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
533 VarSize);
534 }
535
536 // Replace uses of the original global with uses of the new global.
537 SmallVector<Value *, 16> Worklist;
538 SmallPtrSet<Value *, 16> Visited;
539 SmallVector<WeakTrackingVH, 16> DeadInsts;
540 auto AppendUsers = [&](Value *V) {
541 for (User *U : V->users())
542 if (Visited.insert(U).second)
543 Worklist.push_back(U);
544 };
545 AppendUsers(GV);
546 while (!Worklist.empty()) {
547 Value *V = Worklist.pop_back_val();
548 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
549 isa<GEPOperator>(V)) {
550 AppendUsers(V);
551 if (isa<Instruction>(V))
552 DeadInsts.push_back(V);
553 continue;
554 }
555
556 if (Value *Ptr = getLoadStorePointerOperand(V)) {
557 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
558 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
559 /* AllowNonInbounds */ true);
560 assert(Ptr == GV && "Load/store must be from/to global");
561 GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
562 assert(NGV && "Must have replacement global for this offset");
563
564 // Update the pointer operand and recalculate alignment.
565 Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
566 Align NewAlign =
567 getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));
568
569 if (auto *LI = dyn_cast<LoadInst>(V)) {
570 LI->setOperand(0, NGV);
571 LI->setAlignment(NewAlign);
572 } else {
573 auto *SI = cast<StoreInst>(V);
574 SI->setOperand(1, NGV);
575 SI->setAlignment(NewAlign);
576 }
577 continue;
578 }
579
580 assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
581 "Other users can only be dead constants");
582 }
583
584 // Delete old instructions and global.
585 RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
586 GV->removeDeadConstantUsers();
587 GV->eraseFromParent();
588 ++NumSRA;
589
590 assert(NewGlobals.size() > 0);
591 return NewGlobals.begin()->second;
592 }
593
594 /// Return true if all users of the specified value will trap if the value is
595 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
596 /// reprocessing them.
AllUsesOfValueWillTrapIfNull(const Value * V,SmallPtrSetImpl<const PHINode * > & PHIs)597 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
598 SmallPtrSetImpl<const PHINode*> &PHIs) {
599 for (const User *U : V->users()) {
600 if (const Instruction *I = dyn_cast<Instruction>(U)) {
601 // If null pointer is considered valid, then all uses are non-trapping.
602 // Non address-space 0 globals have already been pruned by the caller.
603 if (NullPointerIsDefined(I->getFunction()))
604 return false;
605 }
606 if (isa<LoadInst>(U)) {
607 // Will trap.
608 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
609 if (SI->getOperand(0) == V) {
610 return false; // Storing the value.
611 }
612 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
613 if (CI->getCalledOperand() != V) {
614 return false; // Not calling the ptr
615 }
616 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
617 if (II->getCalledOperand() != V) {
618 return false; // Not calling the ptr
619 }
620 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
621 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
622 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
623 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
624 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
625 // If we've already seen this phi node, ignore it, it has already been
626 // checked.
627 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
628 return false;
629 } else if (isa<ICmpInst>(U) &&
630 !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
631 isa<LoadInst>(U->getOperand(0)) &&
632 isa<ConstantPointerNull>(U->getOperand(1))) {
633 assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
634 ->getPointerOperand()
635 ->stripPointerCasts()) &&
636 "Should be GlobalVariable");
637 // This and only this kind of non-signed ICmpInst is to be replaced with
638 // the comparing of the value of the created global init bool later in
639 // optimizeGlobalAddressOfAllocation for the global variable.
640 } else {
641 return false;
642 }
643 }
644 return true;
645 }
646
647 /// Return true if all uses of any loads from GV will trap if the loaded value
648 /// is null. Note that this also permits comparisons of the loaded value
649 /// against null, as a special case.
allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable * GV)650 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
651 SmallVector<const Value *, 4> Worklist;
652 Worklist.push_back(GV);
653 while (!Worklist.empty()) {
654 const Value *P = Worklist.pop_back_val();
655 for (auto *U : P->users()) {
656 if (auto *LI = dyn_cast<LoadInst>(U)) {
657 SmallPtrSet<const PHINode *, 8> PHIs;
658 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
659 return false;
660 } else if (auto *SI = dyn_cast<StoreInst>(U)) {
661 // Ignore stores to the global.
662 if (SI->getPointerOperand() != P)
663 return false;
664 } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
665 if (CE->stripPointerCasts() != GV)
666 return false;
667 // Check further the ConstantExpr.
668 Worklist.push_back(CE);
669 } else {
670 // We don't know or understand this user, bail out.
671 return false;
672 }
673 }
674 }
675
676 return true;
677 }
678
679 /// Get all the loads/store uses for global variable \p GV.
allUsesOfLoadAndStores(GlobalVariable * GV,SmallVector<Value *,4> & Uses)680 static void allUsesOfLoadAndStores(GlobalVariable *GV,
681 SmallVector<Value *, 4> &Uses) {
682 SmallVector<Value *, 4> Worklist;
683 Worklist.push_back(GV);
684 while (!Worklist.empty()) {
685 auto *P = Worklist.pop_back_val();
686 for (auto *U : P->users()) {
687 if (auto *CE = dyn_cast<ConstantExpr>(U)) {
688 Worklist.push_back(CE);
689 continue;
690 }
691
692 assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
693 "Expect only load or store instructions");
694 Uses.push_back(U);
695 }
696 }
697 }
698
OptimizeAwayTrappingUsesOfValue(Value * V,Constant * NewV)699 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
700 bool Changed = false;
701 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
702 Instruction *I = cast<Instruction>(*UI++);
703 // Uses are non-trapping if null pointer is considered valid.
704 // Non address-space 0 globals are already pruned by the caller.
705 if (NullPointerIsDefined(I->getFunction()))
706 return false;
707 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
708 LI->setOperand(0, NewV);
709 Changed = true;
710 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
711 if (SI->getOperand(1) == V) {
712 SI->setOperand(1, NewV);
713 Changed = true;
714 }
715 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
716 CallBase *CB = cast<CallBase>(I);
717 if (CB->getCalledOperand() == V) {
718 // Calling through the pointer! Turn into a direct call, but be careful
719 // that the pointer is not also being passed as an argument.
720 CB->setCalledOperand(NewV);
721 Changed = true;
722 bool PassedAsArg = false;
723 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
724 if (CB->getArgOperand(i) == V) {
725 PassedAsArg = true;
726 CB->setArgOperand(i, NewV);
727 }
728
729 if (PassedAsArg) {
730 // Being passed as an argument also. Be careful to not invalidate UI!
731 UI = V->user_begin();
732 }
733 }
734 } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
735 Changed |= OptimizeAwayTrappingUsesOfValue(CI,
736 ConstantExpr::getCast(CI->getOpcode(),
737 NewV, CI->getType()));
738 if (CI->use_empty()) {
739 Changed = true;
740 CI->eraseFromParent();
741 }
742 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
743 // Should handle GEP here.
744 SmallVector<Constant*, 8> Idxs;
745 Idxs.reserve(GEPI->getNumOperands()-1);
746 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
747 i != e; ++i)
748 if (Constant *C = dyn_cast<Constant>(*i))
749 Idxs.push_back(C);
750 else
751 break;
752 if (Idxs.size() == GEPI->getNumOperands()-1)
753 Changed |= OptimizeAwayTrappingUsesOfValue(
754 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
755 NewV, Idxs));
756 if (GEPI->use_empty()) {
757 Changed = true;
758 GEPI->eraseFromParent();
759 }
760 }
761 }
762
763 return Changed;
764 }
765
766 /// The specified global has only one non-null value stored into it. If there
767 /// are uses of the loaded value that would trap if the loaded value is
768 /// dynamically null, then we know that they cannot be reachable with a null
769 /// optimize away the load.
OptimizeAwayTrappingUsesOfLoads(GlobalVariable * GV,Constant * LV,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI)770 static bool OptimizeAwayTrappingUsesOfLoads(
771 GlobalVariable *GV, Constant *LV, const DataLayout &DL,
772 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
773 bool Changed = false;
774
775 // Keep track of whether we are able to remove all the uses of the global
776 // other than the store that defines it.
777 bool AllNonStoreUsesGone = true;
778
779 // Replace all uses of loads with uses of uses of the stored value.
780 for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
781 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
782 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
783 // If we were able to delete all uses of the loads
784 if (LI->use_empty()) {
785 LI->eraseFromParent();
786 Changed = true;
787 } else {
788 AllNonStoreUsesGone = false;
789 }
790 } else if (isa<StoreInst>(GlobalUser)) {
791 // Ignore the store that stores "LV" to the global.
792 assert(GlobalUser->getOperand(1) == GV &&
793 "Must be storing *to* the global");
794 } else {
795 AllNonStoreUsesGone = false;
796
797 // If we get here we could have other crazy uses that are transitively
798 // loaded.
799 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
800 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
801 isa<BitCastInst>(GlobalUser) ||
802 isa<GetElementPtrInst>(GlobalUser)) &&
803 "Only expect load and stores!");
804 }
805 }
806
807 if (Changed) {
808 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
809 << "\n");
810 ++NumGlobUses;
811 }
812
813 // If we nuked all of the loads, then none of the stores are needed either,
814 // nor is the global.
815 if (AllNonStoreUsesGone) {
816 if (isLeakCheckerRoot(GV)) {
817 Changed |= CleanupPointerRootUsers(GV, GetTLI);
818 } else {
819 Changed = true;
820 CleanupConstantGlobalUsers(GV, DL);
821 }
822 if (GV->use_empty()) {
823 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
824 Changed = true;
825 GV->eraseFromParent();
826 ++NumDeleted;
827 }
828 }
829 return Changed;
830 }
831
832 /// Walk the use list of V, constant folding all of the instructions that are
833 /// foldable.
ConstantPropUsersOf(Value * V,const DataLayout & DL,TargetLibraryInfo * TLI)834 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
835 TargetLibraryInfo *TLI) {
836 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
837 if (Instruction *I = dyn_cast<Instruction>(*UI++))
838 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
839 I->replaceAllUsesWith(NewC);
840
841 // Advance UI to the next non-I use to avoid invalidating it!
842 // Instructions could multiply use V.
843 while (UI != E && *UI == I)
844 ++UI;
845 if (isInstructionTriviallyDead(I, TLI))
846 I->eraseFromParent();
847 }
848 }
849
850 /// This function takes the specified global variable, and transforms the
851 /// program as if it always contained the result of the specified malloc.
852 /// Because it is always the result of the specified malloc, there is no reason
853 /// to actually DO the malloc. Instead, turn the malloc into a global, and any
854 /// loads of GV as uses of the new global.
855 static GlobalVariable *
OptimizeGlobalAddressOfAllocation(GlobalVariable * GV,CallInst * CI,uint64_t AllocSize,Constant * InitVal,const DataLayout & DL,TargetLibraryInfo * TLI)856 OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
857 uint64_t AllocSize, Constant *InitVal,
858 const DataLayout &DL,
859 TargetLibraryInfo *TLI) {
860 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
861 << '\n');
862
863 // Create global of type [AllocSize x i8].
864 Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
865 AllocSize);
866
867 // Create the new global variable. The contents of the allocated memory is
868 // undefined initially, so initialize with an undef value.
869 GlobalVariable *NewGV = new GlobalVariable(
870 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
871 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
872 GV->getThreadLocalMode());
873
874 // Initialize the global at the point of the original call. Note that this
875 // is a different point from the initialization referred to below for the
876 // nullability handling. Sublety: We have not proven the original global was
877 // only initialized once. As such, we can not fold this into the initializer
878 // of the new global as may need to re-init the storage multiple times.
879 if (!isa<UndefValue>(InitVal)) {
880 IRBuilder<> Builder(CI->getNextNode());
881 // TODO: Use alignment above if align!=1
882 Builder.CreateMemSet(NewGV, InitVal, AllocSize, None);
883 }
884
885 // Update users of the allocation to use the new global instead.
886 BitCastInst *TheBC = nullptr;
887 while (!CI->use_empty()) {
888 Instruction *User = cast<Instruction>(CI->user_back());
889 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
890 if (BCI->getType() == NewGV->getType()) {
891 BCI->replaceAllUsesWith(NewGV);
892 BCI->eraseFromParent();
893 } else {
894 BCI->setOperand(0, NewGV);
895 }
896 } else {
897 if (!TheBC)
898 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
899 User->replaceUsesOfWith(CI, TheBC);
900 }
901 }
902
903 SmallSetVector<Constant *, 1> RepValues;
904 RepValues.insert(NewGV);
905
906 // If there is a comparison against null, we will insert a global bool to
907 // keep track of whether the global was initialized yet or not.
908 GlobalVariable *InitBool =
909 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
910 GlobalValue::InternalLinkage,
911 ConstantInt::getFalse(GV->getContext()),
912 GV->getName()+".init", GV->getThreadLocalMode());
913 bool InitBoolUsed = false;
914
915 // Loop over all instruction uses of GV, processing them in turn.
916 SmallVector<Value *, 4> Guses;
917 allUsesOfLoadAndStores(GV, Guses);
918 for (auto *U : Guses) {
919 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
920 // The global is initialized when the store to it occurs. If the stored
921 // value is null value, the global bool is set to false, otherwise true.
922 new StoreInst(ConstantInt::getBool(
923 GV->getContext(),
924 !isa<ConstantPointerNull>(SI->getValueOperand())),
925 InitBool, false, Align(1), SI->getOrdering(),
926 SI->getSyncScopeID(), SI);
927 SI->eraseFromParent();
928 continue;
929 }
930
931 LoadInst *LI = cast<LoadInst>(U);
932 while (!LI->use_empty()) {
933 Use &LoadUse = *LI->use_begin();
934 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
935 if (!ICI) {
936 auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
937 RepValues.insert(CE);
938 LoadUse.set(CE);
939 continue;
940 }
941
942 // Replace the cmp X, 0 with a use of the bool value.
943 Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
944 InitBool->getName() + ".val", false, Align(1),
945 LI->getOrdering(), LI->getSyncScopeID(), LI);
946 InitBoolUsed = true;
947 switch (ICI->getPredicate()) {
948 default: llvm_unreachable("Unknown ICmp Predicate!");
949 case ICmpInst::ICMP_ULT: // X < null -> always false
950 LV = ConstantInt::getFalse(GV->getContext());
951 break;
952 case ICmpInst::ICMP_UGE: // X >= null -> always true
953 LV = ConstantInt::getTrue(GV->getContext());
954 break;
955 case ICmpInst::ICMP_ULE:
956 case ICmpInst::ICMP_EQ:
957 LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
958 break;
959 case ICmpInst::ICMP_NE:
960 case ICmpInst::ICMP_UGT:
961 break; // no change.
962 }
963 ICI->replaceAllUsesWith(LV);
964 ICI->eraseFromParent();
965 }
966 LI->eraseFromParent();
967 }
968
969 // If the initialization boolean was used, insert it, otherwise delete it.
970 if (!InitBoolUsed) {
971 while (!InitBool->use_empty()) // Delete initializations
972 cast<StoreInst>(InitBool->user_back())->eraseFromParent();
973 delete InitBool;
974 } else
975 GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
976
977 // Now the GV is dead, nuke it and the allocation..
978 GV->eraseFromParent();
979 CI->eraseFromParent();
980
981 // To further other optimizations, loop over all users of NewGV and try to
982 // constant prop them. This will promote GEP instructions with constant
983 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
984 for (auto *CE : RepValues)
985 ConstantPropUsersOf(CE, DL, TLI);
986
987 return NewGV;
988 }
989
990 /// Scan the use-list of GV checking to make sure that there are no complex uses
991 /// of GV. We permit simple things like dereferencing the pointer, but not
992 /// storing through the address, unless it is to the specified global.
993 static bool
valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst * CI,const GlobalVariable * GV)994 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
995 const GlobalVariable *GV) {
996 SmallPtrSet<const Value *, 4> Visited;
997 SmallVector<const Value *, 4> Worklist;
998 Worklist.push_back(CI);
999
1000 while (!Worklist.empty()) {
1001 const Value *V = Worklist.pop_back_val();
1002 if (!Visited.insert(V).second)
1003 continue;
1004
1005 for (const Use &VUse : V->uses()) {
1006 const User *U = VUse.getUser();
1007 if (isa<LoadInst>(U) || isa<CmpInst>(U))
1008 continue; // Fine, ignore.
1009
1010 if (auto *SI = dyn_cast<StoreInst>(U)) {
1011 if (SI->getValueOperand() == V &&
1012 SI->getPointerOperand()->stripPointerCasts() != GV)
1013 return false; // Storing the pointer not into GV... bad.
1014 continue; // Otherwise, storing through it, or storing into GV... fine.
1015 }
1016
1017 if (auto *BCI = dyn_cast<BitCastInst>(U)) {
1018 Worklist.push_back(BCI);
1019 continue;
1020 }
1021
1022 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1023 Worklist.push_back(GEPI);
1024 continue;
1025 }
1026
1027 return false;
1028 }
1029 }
1030
1031 return true;
1032 }
1033
1034 /// If we have a global that is only initialized with a fixed size allocation
1035 /// try to transform the program to use global memory instead of heap
1036 /// allocated memory. This eliminates dynamic allocation, avoids an indirection
1037 /// accessing the data, and exposes the resultant global to further GlobalOpt.
tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable * GV,CallInst * CI,const DataLayout & DL,TargetLibraryInfo * TLI)1038 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
1039 CallInst *CI,
1040 const DataLayout &DL,
1041 TargetLibraryInfo *TLI) {
1042 if (!isRemovableAlloc(CI, TLI))
1043 // Must be able to remove the call when we get done..
1044 return false;
1045
1046 Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
1047 Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
1048 if (!InitVal)
1049 // Must be able to emit a memset for initialization
1050 return false;
1051
1052 uint64_t AllocSize;
1053 if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
1054 return false;
1055
1056 // Restrict this transformation to only working on small allocations
1057 // (2048 bytes currently), as we don't want to introduce a 16M global or
1058 // something.
1059 if (AllocSize >= 2048)
1060 return false;
1061
1062 // We can't optimize this global unless all uses of it are *known* to be
1063 // of the malloc value, not of the null initializer value (consider a use
1064 // that compares the global's value against zero to see if the malloc has
1065 // been reached). To do this, we check to see if all uses of the global
1066 // would trap if the global were null: this proves that they must all
1067 // happen after the malloc.
1068 if (!allUsesOfLoadedValueWillTrapIfNull(GV))
1069 return false;
1070
1071 // We can't optimize this if the malloc itself is used in a complex way,
1072 // for example, being stored into multiple globals. This allows the
1073 // malloc to be stored into the specified global, loaded, gep, icmp'd.
1074 // These are all things we could transform to using the global for.
1075 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
1076 return false;
1077
1078 OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
1079 return true;
1080 }
1081
1082 // Try to optimize globals based on the knowledge that only one value (besides
1083 // its initializer) is ever stored to the global.
1084 static bool
optimizeOnceStoredGlobal(GlobalVariable * GV,Value * StoredOnceVal,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI)1085 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1086 const DataLayout &DL,
1087 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1088 // Ignore no-op GEPs and bitcasts.
1089 StoredOnceVal = StoredOnceVal->stripPointerCasts();
1090
1091 // If we are dealing with a pointer global that is initialized to null and
1092 // only has one (non-null) value stored into it, then we can optimize any
1093 // users of the loaded value (often calls and loads) that would trap if the
1094 // value was null.
1095 if (GV->getInitializer()->getType()->isPointerTy() &&
1096 GV->getInitializer()->isNullValue() &&
1097 StoredOnceVal->getType()->isPointerTy() &&
1098 !NullPointerIsDefined(
1099 nullptr /* F */,
1100 GV->getInitializer()->getType()->getPointerAddressSpace())) {
1101 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1102 if (GV->getInitializer()->getType() != SOVC->getType())
1103 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1104
1105 // Optimize away any trapping uses of the loaded value.
1106 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1107 return true;
1108 } else if (isAllocationFn(StoredOnceVal, GetTLI)) {
1109 if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
1110 auto *TLI = &GetTLI(*CI->getFunction());
1111 if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
1112 return true;
1113 }
1114 }
1115 }
1116
1117 return false;
1118 }
1119
1120 /// At this point, we have learned that the only two values ever stored into GV
1121 /// are its initializer and OtherVal. See if we can shrink the global into a
1122 /// boolean and select between the two values whenever it is used. This exposes
1123 /// the values to other scalar optimizations.
TryToShrinkGlobalToBoolean(GlobalVariable * GV,Constant * OtherVal)1124 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1125 Type *GVElType = GV->getValueType();
1126
1127 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1128 // an FP value, pointer or vector, don't do this optimization because a select
1129 // between them is very expensive and unlikely to lead to later
1130 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1131 // where v1 and v2 both require constant pool loads, a big loss.
1132 if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1133 GVElType->isFloatingPointTy() ||
1134 GVElType->isPointerTy() || GVElType->isVectorTy())
1135 return false;
1136
1137 // Walk the use list of the global seeing if all the uses are load or store.
1138 // If there is anything else, bail out.
1139 for (User *U : GV->users()) {
1140 if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1141 return false;
1142 if (getLoadStoreType(U) != GVElType)
1143 return false;
1144 }
1145
1146 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
1147
1148 // Create the new global, initializing it to false.
1149 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1150 false,
1151 GlobalValue::InternalLinkage,
1152 ConstantInt::getFalse(GV->getContext()),
1153 GV->getName()+".b",
1154 GV->getThreadLocalMode(),
1155 GV->getType()->getAddressSpace());
1156 NewGV->copyAttributesFrom(GV);
1157 GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1158
1159 Constant *InitVal = GV->getInitializer();
1160 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1161 "No reason to shrink to bool!");
1162
1163 SmallVector<DIGlobalVariableExpression *, 1> GVs;
1164 GV->getDebugInfo(GVs);
1165
1166 // If initialized to zero and storing one into the global, we can use a cast
1167 // instead of a select to synthesize the desired value.
1168 bool IsOneZero = false;
1169 bool EmitOneOrZero = true;
1170 auto *CI = dyn_cast<ConstantInt>(OtherVal);
1171 if (CI && CI->getValue().getActiveBits() <= 64) {
1172 IsOneZero = InitVal->isNullValue() && CI->isOne();
1173
1174 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1175 if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1176 uint64_t ValInit = CIInit->getZExtValue();
1177 uint64_t ValOther = CI->getZExtValue();
1178 uint64_t ValMinus = ValOther - ValInit;
1179
1180 for(auto *GVe : GVs){
1181 DIGlobalVariable *DGV = GVe->getVariable();
1182 DIExpression *E = GVe->getExpression();
1183 const DataLayout &DL = GV->getParent()->getDataLayout();
1184 unsigned SizeInOctets =
1185 DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;
1186
1187 // It is expected that the address of global optimized variable is on
1188 // top of the stack. After optimization, value of that variable will
1189 // be ether 0 for initial value or 1 for other value. The following
1190 // expression should return constant integer value depending on the
1191 // value at global object address:
1192 // val * (ValOther - ValInit) + ValInit:
1193 // DW_OP_deref DW_OP_constu <ValMinus>
1194 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1195 SmallVector<uint64_t, 12> Ops = {
1196 dwarf::DW_OP_deref_size, SizeInOctets,
1197 dwarf::DW_OP_constu, ValMinus,
1198 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1199 dwarf::DW_OP_plus};
1200 bool WithStackValue = true;
1201 E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1202 DIGlobalVariableExpression *DGVE =
1203 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1204 NewGV->addDebugInfo(DGVE);
1205 }
1206 EmitOneOrZero = false;
1207 }
1208 }
1209
1210 if (EmitOneOrZero) {
1211 // FIXME: This will only emit address for debugger on which will
1212 // be written only 0 or 1.
1213 for(auto *GV : GVs)
1214 NewGV->addDebugInfo(GV);
1215 }
1216
1217 while (!GV->use_empty()) {
1218 Instruction *UI = cast<Instruction>(GV->user_back());
1219 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1220 // Change the store into a boolean store.
1221 bool StoringOther = SI->getOperand(0) == OtherVal;
1222 // Only do this if we weren't storing a loaded value.
1223 Value *StoreVal;
1224 if (StoringOther || SI->getOperand(0) == InitVal) {
1225 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1226 StoringOther);
1227 } else {
1228 // Otherwise, we are storing a previously loaded copy. To do this,
1229 // change the copy from copying the original value to just copying the
1230 // bool.
1231 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1232
1233 // If we've already replaced the input, StoredVal will be a cast or
1234 // select instruction. If not, it will be a load of the original
1235 // global.
1236 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1237 assert(LI->getOperand(0) == GV && "Not a copy!");
1238 // Insert a new load, to preserve the saved value.
1239 StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1240 LI->getName() + ".b", false, Align(1),
1241 LI->getOrdering(), LI->getSyncScopeID(), LI);
1242 } else {
1243 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1244 "This is not a form that we understand!");
1245 StoreVal = StoredVal->getOperand(0);
1246 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1247 }
1248 }
1249 StoreInst *NSI =
1250 new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1251 SI->getSyncScopeID(), SI);
1252 NSI->setDebugLoc(SI->getDebugLoc());
1253 } else {
1254 // Change the load into a load of bool then a select.
1255 LoadInst *LI = cast<LoadInst>(UI);
1256 LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
1257 LI->getName() + ".b", false, Align(1),
1258 LI->getOrdering(), LI->getSyncScopeID(), LI);
1259 Instruction *NSI;
1260 if (IsOneZero)
1261 NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1262 else
1263 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1264 NSI->takeName(LI);
1265 // Since LI is split into two instructions, NLI and NSI both inherit the
1266 // same DebugLoc
1267 NLI->setDebugLoc(LI->getDebugLoc());
1268 NSI->setDebugLoc(LI->getDebugLoc());
1269 LI->replaceAllUsesWith(NSI);
1270 }
1271 UI->eraseFromParent();
1272 }
1273
1274 // Retain the name of the old global variable. People who are debugging their
1275 // programs may expect these variables to be named the same.
1276 NewGV->takeName(GV);
1277 GV->eraseFromParent();
1278 return true;
1279 }
1280
1281 static bool
deleteIfDead(GlobalValue & GV,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats,function_ref<void (Function &)> DeleteFnCallback=nullptr)1282 deleteIfDead(GlobalValue &GV,
1283 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1284 function_ref<void(Function &)> DeleteFnCallback = nullptr) {
1285 GV.removeDeadConstantUsers();
1286
1287 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1288 return false;
1289
1290 if (const Comdat *C = GV.getComdat())
1291 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1292 return false;
1293
1294 bool Dead;
1295 if (auto *F = dyn_cast<Function>(&GV))
1296 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1297 else
1298 Dead = GV.use_empty();
1299 if (!Dead)
1300 return false;
1301
1302 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1303 if (auto *F = dyn_cast<Function>(&GV)) {
1304 if (DeleteFnCallback)
1305 DeleteFnCallback(*F);
1306 }
1307 GV.eraseFromParent();
1308 ++NumDeleted;
1309 return true;
1310 }
1311
isPointerValueDeadOnEntryToFunction(const Function * F,GlobalValue * GV,function_ref<DominatorTree & (Function &)> LookupDomTree)1312 static bool isPointerValueDeadOnEntryToFunction(
1313 const Function *F, GlobalValue *GV,
1314 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1315 // Find all uses of GV. We expect them all to be in F, and if we can't
1316 // identify any of the uses we bail out.
1317 //
1318 // On each of these uses, identify if the memory that GV points to is
1319 // used/required/live at the start of the function. If it is not, for example
1320 // if the first thing the function does is store to the GV, the GV can
1321 // possibly be demoted.
1322 //
1323 // We don't do an exhaustive search for memory operations - simply look
1324 // through bitcasts as they're quite common and benign.
1325 const DataLayout &DL = GV->getParent()->getDataLayout();
1326 SmallVector<LoadInst *, 4> Loads;
1327 SmallVector<StoreInst *, 4> Stores;
1328 for (auto *U : GV->users()) {
1329 if (Operator::getOpcode(U) == Instruction::BitCast) {
1330 for (auto *UU : U->users()) {
1331 if (auto *LI = dyn_cast<LoadInst>(UU))
1332 Loads.push_back(LI);
1333 else if (auto *SI = dyn_cast<StoreInst>(UU))
1334 Stores.push_back(SI);
1335 else
1336 return false;
1337 }
1338 continue;
1339 }
1340
1341 Instruction *I = dyn_cast<Instruction>(U);
1342 if (!I)
1343 return false;
1344 assert(I->getParent()->getParent() == F);
1345
1346 if (auto *LI = dyn_cast<LoadInst>(I))
1347 Loads.push_back(LI);
1348 else if (auto *SI = dyn_cast<StoreInst>(I))
1349 Stores.push_back(SI);
1350 else
1351 return false;
1352 }
1353
1354 // We have identified all uses of GV into loads and stores. Now check if all
1355 // of them are known not to depend on the value of the global at the function
1356 // entry point. We do this by ensuring that every load is dominated by at
1357 // least one store.
1358 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1359
1360 // The below check is quadratic. Check we're not going to do too many tests.
1361 // FIXME: Even though this will always have worst-case quadratic time, we
1362 // could put effort into minimizing the average time by putting stores that
1363 // have been shown to dominate at least one load at the beginning of the
1364 // Stores array, making subsequent dominance checks more likely to succeed
1365 // early.
1366 //
1367 // The threshold here is fairly large because global->local demotion is a
1368 // very powerful optimization should it fire.
1369 const unsigned Threshold = 100;
1370 if (Loads.size() * Stores.size() > Threshold)
1371 return false;
1372
1373 for (auto *L : Loads) {
1374 auto *LTy = L->getType();
1375 if (none_of(Stores, [&](const StoreInst *S) {
1376 auto *STy = S->getValueOperand()->getType();
1377 // The load is only dominated by the store if DomTree says so
1378 // and the number of bits loaded in L is less than or equal to
1379 // the number of bits stored in S.
1380 return DT.dominates(S, L) &&
1381 DL.getTypeStoreSize(LTy).getFixedSize() <=
1382 DL.getTypeStoreSize(STy).getFixedSize();
1383 }))
1384 return false;
1385 }
1386 // All loads have known dependences inside F, so the global can be localized.
1387 return true;
1388 }
1389
1390 /// C may have non-instruction users. Can all of those users be turned into
1391 /// instructions?
allNonInstructionUsersCanBeMadeInstructions(Constant * C)1392 static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1393 // We don't do this exhaustively. The most common pattern that we really need
1394 // to care about is a constant GEP or constant bitcast - so just looking
1395 // through one single ConstantExpr.
1396 //
1397 // The set of constants that this function returns true for must be able to be
1398 // handled by makeAllConstantUsesInstructions.
1399 for (auto *U : C->users()) {
1400 if (isa<Instruction>(U))
1401 continue;
1402 if (!isa<ConstantExpr>(U))
1403 // Non instruction, non-constantexpr user; cannot convert this.
1404 return false;
1405 for (auto *UU : U->users())
1406 if (!isa<Instruction>(UU))
1407 // A constantexpr used by another constant. We don't try and recurse any
1408 // further but just bail out at this point.
1409 return false;
1410 }
1411
1412 return true;
1413 }
1414
1415 /// C may have non-instruction users, and
1416 /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1417 /// non-instruction users to instructions.
makeAllConstantUsesInstructions(Constant * C)1418 static void makeAllConstantUsesInstructions(Constant *C) {
1419 SmallVector<ConstantExpr*,4> Users;
1420 for (auto *U : C->users()) {
1421 if (isa<ConstantExpr>(U))
1422 Users.push_back(cast<ConstantExpr>(U));
1423 else
1424 // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1425 // should not have returned true for C.
1426 assert(
1427 isa<Instruction>(U) &&
1428 "Can't transform non-constantexpr non-instruction to instruction!");
1429 }
1430
1431 SmallVector<Value*,4> UUsers;
1432 for (auto *U : Users) {
1433 UUsers.clear();
1434 append_range(UUsers, U->users());
1435 for (auto *UU : UUsers) {
1436 Instruction *UI = cast<Instruction>(UU);
1437 Instruction *NewU = U->getAsInstruction(UI);
1438 UI->replaceUsesOfWith(U, NewU);
1439 }
1440 // We've replaced all the uses, so destroy the constant. (destroyConstant
1441 // will update value handles and metadata.)
1442 U->destroyConstant();
1443 }
1444 }
1445
1446 // For a global variable with one store, if the store dominates any loads,
1447 // those loads will always load the stored value (as opposed to the
1448 // initializer), even in the presence of recursion.
forwardStoredOnceStore(GlobalVariable * GV,const StoreInst * StoredOnceStore,function_ref<DominatorTree & (Function &)> LookupDomTree)1449 static bool forwardStoredOnceStore(
1450 GlobalVariable *GV, const StoreInst *StoredOnceStore,
1451 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1452 const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
1453 // We can do this optimization for non-constants in nosync + norecurse
1454 // functions, but globals used in exactly one norecurse functions are already
1455 // promoted to an alloca.
1456 if (!isa<Constant>(StoredOnceValue))
1457 return false;
1458 const Function *F = StoredOnceStore->getFunction();
1459 SmallVector<LoadInst *> Loads;
1460 for (User *U : GV->users()) {
1461 if (auto *LI = dyn_cast<LoadInst>(U)) {
1462 if (LI->getFunction() == F &&
1463 LI->getType() == StoredOnceValue->getType() && LI->isSimple())
1464 Loads.push_back(LI);
1465 }
1466 }
1467 // Only compute DT if we have any loads to examine.
1468 bool MadeChange = false;
1469 if (!Loads.empty()) {
1470 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1471 for (auto *LI : Loads) {
1472 if (DT.dominates(StoredOnceStore, LI)) {
1473 LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
1474 LI->eraseFromParent();
1475 MadeChange = true;
1476 }
1477 }
1478 }
1479 return MadeChange;
1480 }
1481
1482 /// Analyze the specified global variable and optimize
1483 /// it if possible. If we make a change, return true.
1484 static bool
processInternalGlobal(GlobalVariable * GV,const GlobalStatus & GS,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree)1485 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1486 function_ref<TargetTransformInfo &(Function &)> GetTTI,
1487 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1488 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1489 auto &DL = GV->getParent()->getDataLayout();
1490 // If this is a first class global and has only one accessing function and
1491 // this function is non-recursive, we replace the global with a local alloca
1492 // in this function.
1493 //
1494 // NOTE: It doesn't make sense to promote non-single-value types since we
1495 // are just replacing static memory to stack memory.
1496 //
1497 // If the global is in different address space, don't bring it to stack.
1498 if (!GS.HasMultipleAccessingFunctions &&
1499 GS.AccessingFunction &&
1500 GV->getValueType()->isSingleValueType() &&
1501 GV->getType()->getAddressSpace() == 0 &&
1502 !GV->isExternallyInitialized() &&
1503 allNonInstructionUsersCanBeMadeInstructions(GV) &&
1504 GS.AccessingFunction->doesNotRecurse() &&
1505 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1506 LookupDomTree)) {
1507 const DataLayout &DL = GV->getParent()->getDataLayout();
1508
1509 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1510 Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1511 ->getEntryBlock().begin());
1512 Type *ElemTy = GV->getValueType();
1513 // FIXME: Pass Global's alignment when globals have alignment
1514 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1515 GV->getName(), &FirstI);
1516 if (!isa<UndefValue>(GV->getInitializer()))
1517 new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1518
1519 makeAllConstantUsesInstructions(GV);
1520
1521 GV->replaceAllUsesWith(Alloca);
1522 GV->eraseFromParent();
1523 ++NumLocalized;
1524 return true;
1525 }
1526
1527 bool Changed = false;
1528
1529 // If the global is never loaded (but may be stored to), it is dead.
1530 // Delete it now.
1531 if (!GS.IsLoaded) {
1532 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1533
1534 if (isLeakCheckerRoot(GV)) {
1535 // Delete any constant stores to the global.
1536 Changed = CleanupPointerRootUsers(GV, GetTLI);
1537 } else {
1538 // Delete any stores we can find to the global. We may not be able to
1539 // make it completely dead though.
1540 Changed = CleanupConstantGlobalUsers(GV, DL);
1541 }
1542
1543 // If the global is dead now, delete it.
1544 if (GV->use_empty()) {
1545 GV->eraseFromParent();
1546 ++NumDeleted;
1547 Changed = true;
1548 }
1549 return Changed;
1550
1551 }
1552 if (GS.StoredType <= GlobalStatus::InitializerStored) {
1553 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1554
1555 // Don't actually mark a global constant if it's atomic because atomic loads
1556 // are implemented by a trivial cmpxchg in some edge-cases and that usually
1557 // requires write access to the variable even if it's not actually changed.
1558 if (GS.Ordering == AtomicOrdering::NotAtomic) {
1559 assert(!GV->isConstant() && "Expected a non-constant global");
1560 GV->setConstant(true);
1561 Changed = true;
1562 }
1563
1564 // Clean up any obviously simplifiable users now.
1565 Changed |= CleanupConstantGlobalUsers(GV, DL);
1566
1567 // If the global is dead now, just nuke it.
1568 if (GV->use_empty()) {
1569 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
1570 << "all users and delete global!\n");
1571 GV->eraseFromParent();
1572 ++NumDeleted;
1573 return true;
1574 }
1575
1576 // Fall through to the next check; see if we can optimize further.
1577 ++NumMarked;
1578 }
1579 if (!GV->getInitializer()->getType()->isSingleValueType()) {
1580 const DataLayout &DL = GV->getParent()->getDataLayout();
1581 if (SRAGlobal(GV, DL))
1582 return true;
1583 }
1584 Value *StoredOnceValue = GS.getStoredOnceValue();
1585 if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
1586 Function &StoreFn =
1587 const_cast<Function &>(*GS.StoredOnceStore->getFunction());
1588 bool CanHaveNonUndefGlobalInitializer =
1589 GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
1590 GV->getType()->getAddressSpace());
1591 // If the initial value for the global was an undef value, and if only
1592 // one other value was stored into it, we can just change the
1593 // initializer to be the stored value, then delete all stores to the
1594 // global. This allows us to mark it constant.
1595 // This is restricted to address spaces that allow globals to have
1596 // initializers. NVPTX, for example, does not support initializers for
1597 // shared memory (AS 3).
1598 auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
1599 if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
1600 DL.getTypeAllocSize(SOVConstant->getType()) ==
1601 DL.getTypeAllocSize(GV->getValueType()) &&
1602 CanHaveNonUndefGlobalInitializer) {
1603 if (SOVConstant->getType() == GV->getValueType()) {
1604 // Change the initializer in place.
1605 GV->setInitializer(SOVConstant);
1606 } else {
1607 // Create a new global with adjusted type.
1608 auto *NGV = new GlobalVariable(
1609 *GV->getParent(), SOVConstant->getType(), GV->isConstant(),
1610 GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
1611 GV->getAddressSpace());
1612 NGV->takeName(GV);
1613 NGV->copyAttributesFrom(GV);
1614 GV->replaceAllUsesWith(ConstantExpr::getBitCast(NGV, GV->getType()));
1615 GV->eraseFromParent();
1616 GV = NGV;
1617 }
1618
1619 // Clean up any obviously simplifiable users now.
1620 CleanupConstantGlobalUsers(GV, DL);
1621
1622 if (GV->use_empty()) {
1623 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
1624 << "simplify all users and delete global!\n");
1625 GV->eraseFromParent();
1626 ++NumDeleted;
1627 }
1628 ++NumSubstitute;
1629 return true;
1630 }
1631
1632 // Try to optimize globals based on the knowledge that only one value
1633 // (besides its initializer) is ever stored to the global.
1634 if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
1635 return true;
1636
1637 // Try to forward the store to any loads. If we have more than one store, we
1638 // may have a store of the initializer between StoredOnceStore and a load.
1639 if (GS.NumStores == 1)
1640 if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
1641 return true;
1642
1643 // Otherwise, if the global was not a boolean, we can shrink it to be a
1644 // boolean. Skip this optimization for AS that doesn't allow an initializer.
1645 if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
1646 (!isa<UndefValue>(GV->getInitializer()) ||
1647 CanHaveNonUndefGlobalInitializer)) {
1648 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1649 ++NumShrunkToBool;
1650 return true;
1651 }
1652 }
1653 }
1654
1655 return Changed;
1656 }
1657
1658 /// Analyze the specified global variable and optimize it if possible. If we
1659 /// make a change, return true.
1660 static bool
processGlobal(GlobalValue & GV,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree)1661 processGlobal(GlobalValue &GV,
1662 function_ref<TargetTransformInfo &(Function &)> GetTTI,
1663 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1664 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1665 if (GV.getName().startswith("llvm."))
1666 return false;
1667
1668 GlobalStatus GS;
1669
1670 if (GlobalStatus::analyzeGlobal(&GV, GS))
1671 return false;
1672
1673 bool Changed = false;
1674 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
1675 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1676 : GlobalValue::UnnamedAddr::Local;
1677 if (NewUnnamedAddr != GV.getUnnamedAddr()) {
1678 GV.setUnnamedAddr(NewUnnamedAddr);
1679 NumUnnamed++;
1680 Changed = true;
1681 }
1682 }
1683
1684 // Do more involved optimizations if the global is internal.
1685 if (!GV.hasLocalLinkage())
1686 return Changed;
1687
1688 auto *GVar = dyn_cast<GlobalVariable>(&GV);
1689 if (!GVar)
1690 return Changed;
1691
1692 if (GVar->isConstant() || !GVar->hasInitializer())
1693 return Changed;
1694
1695 return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
1696 Changed;
1697 }
1698
1699 /// Walk all of the direct calls of the specified function, changing them to
1700 /// FastCC.
ChangeCalleesToFastCall(Function * F)1701 static void ChangeCalleesToFastCall(Function *F) {
1702 for (User *U : F->users()) {
1703 if (isa<BlockAddress>(U))
1704 continue;
1705 cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
1706 }
1707 }
1708
StripAttr(LLVMContext & C,AttributeList Attrs,Attribute::AttrKind A)1709 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
1710 Attribute::AttrKind A) {
1711 unsigned AttrIndex;
1712 if (Attrs.hasAttrSomewhere(A, &AttrIndex))
1713 return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
1714 return Attrs;
1715 }
1716
RemoveAttribute(Function * F,Attribute::AttrKind A)1717 static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
1718 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
1719 for (User *U : F->users()) {
1720 if (isa<BlockAddress>(U))
1721 continue;
1722 CallBase *CB = cast<CallBase>(U);
1723 CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
1724 }
1725 }
1726
1727 /// Return true if this is a calling convention that we'd like to change. The
1728 /// idea here is that we don't want to mess with the convention if the user
1729 /// explicitly requested something with performance implications like coldcc,
1730 /// GHC, or anyregcc.
hasChangeableCC(Function * F)1731 static bool hasChangeableCC(Function *F) {
1732 CallingConv::ID CC = F->getCallingConv();
1733
1734 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1735 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
1736 return false;
1737
1738 // FIXME: Change CC for the whole chain of musttail calls when possible.
1739 //
1740 // Can't change CC of the function that either has musttail calls, or is a
1741 // musttail callee itself
1742 for (User *U : F->users()) {
1743 if (isa<BlockAddress>(U))
1744 continue;
1745 CallInst* CI = dyn_cast<CallInst>(U);
1746 if (!CI)
1747 continue;
1748
1749 if (CI->isMustTailCall())
1750 return false;
1751 }
1752
1753 for (BasicBlock &BB : *F)
1754 if (BB.getTerminatingMustTailCall())
1755 return false;
1756
1757 return true;
1758 }
1759
1760 /// Return true if the block containing the call site has a BlockFrequency of
1761 /// less than ColdCCRelFreq% of the entry block.
isColdCallSite(CallBase & CB,BlockFrequencyInfo & CallerBFI)1762 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
1763 const BranchProbability ColdProb(ColdCCRelFreq, 100);
1764 auto *CallSiteBB = CB.getParent();
1765 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
1766 auto CallerEntryFreq =
1767 CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
1768 return CallSiteFreq < CallerEntryFreq * ColdProb;
1769 }
1770
1771 // This function checks if the input function F is cold at all call sites. It
1772 // also looks each call site's containing function, returning false if the
1773 // caller function contains other non cold calls. The input vector AllCallsCold
1774 // contains a list of functions that only have call sites in cold blocks.
1775 static bool
isValidCandidateForColdCC(Function & F,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,const std::vector<Function * > & AllCallsCold)1776 isValidCandidateForColdCC(Function &F,
1777 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1778 const std::vector<Function *> &AllCallsCold) {
1779
1780 if (F.user_empty())
1781 return false;
1782
1783 for (User *U : F.users()) {
1784 if (isa<BlockAddress>(U))
1785 continue;
1786
1787 CallBase &CB = cast<CallBase>(*U);
1788 Function *CallerFunc = CB.getParent()->getParent();
1789 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1790 if (!isColdCallSite(CB, CallerBFI))
1791 return false;
1792 if (!llvm::is_contained(AllCallsCold, CallerFunc))
1793 return false;
1794 }
1795 return true;
1796 }
1797
changeCallSitesToColdCC(Function * F)1798 static void changeCallSitesToColdCC(Function *F) {
1799 for (User *U : F->users()) {
1800 if (isa<BlockAddress>(U))
1801 continue;
1802 cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
1803 }
1804 }
1805
1806 // This function iterates over all the call instructions in the input Function
1807 // and checks that all call sites are in cold blocks and are allowed to use the
1808 // coldcc calling convention.
1809 static bool
hasOnlyColdCalls(Function & F,function_ref<BlockFrequencyInfo & (Function &)> GetBFI)1810 hasOnlyColdCalls(Function &F,
1811 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1812 for (BasicBlock &BB : F) {
1813 for (Instruction &I : BB) {
1814 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1815 // Skip over isline asm instructions since they aren't function calls.
1816 if (CI->isInlineAsm())
1817 continue;
1818 Function *CalledFn = CI->getCalledFunction();
1819 if (!CalledFn)
1820 return false;
1821 if (!CalledFn->hasLocalLinkage())
1822 return false;
1823 // Skip over intrinsics since they won't remain as function calls.
1824 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
1825 continue;
1826 // Check if it's valid to use coldcc calling convention.
1827 if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1828 CalledFn->hasAddressTaken())
1829 return false;
1830 BlockFrequencyInfo &CallerBFI = GetBFI(F);
1831 if (!isColdCallSite(*CI, CallerBFI))
1832 return false;
1833 }
1834 }
1835 }
1836 return true;
1837 }
1838
hasMustTailCallers(Function * F)1839 static bool hasMustTailCallers(Function *F) {
1840 for (User *U : F->users()) {
1841 CallBase *CB = dyn_cast<CallBase>(U);
1842 if (!CB) {
1843 assert(isa<BlockAddress>(U) &&
1844 "Expected either CallBase or BlockAddress");
1845 continue;
1846 }
1847 if (CB->isMustTailCall())
1848 return true;
1849 }
1850 return false;
1851 }
1852
hasInvokeCallers(Function * F)1853 static bool hasInvokeCallers(Function *F) {
1854 for (User *U : F->users())
1855 if (isa<InvokeInst>(U))
1856 return true;
1857 return false;
1858 }
1859
RemovePreallocated(Function * F)1860 static void RemovePreallocated(Function *F) {
1861 RemoveAttribute(F, Attribute::Preallocated);
1862
1863 auto *M = F->getParent();
1864
1865 IRBuilder<> Builder(M->getContext());
1866
1867 // Cannot modify users() while iterating over it, so make a copy.
1868 SmallVector<User *, 4> PreallocatedCalls(F->users());
1869 for (User *U : PreallocatedCalls) {
1870 CallBase *CB = dyn_cast<CallBase>(U);
1871 if (!CB)
1872 continue;
1873
1874 assert(
1875 !CB->isMustTailCall() &&
1876 "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1877 // Create copy of call without "preallocated" operand bundle.
1878 SmallVector<OperandBundleDef, 1> OpBundles;
1879 CB->getOperandBundlesAsDefs(OpBundles);
1880 CallBase *PreallocatedSetup = nullptr;
1881 for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
1882 if (It->getTag() == "preallocated") {
1883 PreallocatedSetup = cast<CallBase>(*It->input_begin());
1884 OpBundles.erase(It);
1885 break;
1886 }
1887 }
1888 assert(PreallocatedSetup && "Did not find preallocated bundle");
1889 uint64_t ArgCount =
1890 cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
1891
1892 assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
1893 "Unknown indirect call type");
1894 CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
1895 CB->replaceAllUsesWith(NewCB);
1896 NewCB->takeName(CB);
1897 CB->eraseFromParent();
1898
1899 Builder.SetInsertPoint(PreallocatedSetup);
1900 auto *StackSave =
1901 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1902
1903 Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
1904 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1905 StackSave);
1906
1907 // Replace @llvm.call.preallocated.arg() with alloca.
1908 // Cannot modify users() while iterating over it, so make a copy.
1909 // @llvm.call.preallocated.arg() can be called with the same index multiple
1910 // times. So for each @llvm.call.preallocated.arg(), we see if we have
1911 // already created a Value* for the index, and if not, create an alloca and
1912 // bitcast right after the @llvm.call.preallocated.setup() so that it
1913 // dominates all uses.
1914 SmallVector<Value *, 2> ArgAllocas(ArgCount);
1915 SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
1916 for (auto *User : PreallocatedArgs) {
1917 auto *UseCall = cast<CallBase>(User);
1918 assert(UseCall->getCalledFunction()->getIntrinsicID() ==
1919 Intrinsic::call_preallocated_arg &&
1920 "preallocated token use was not a llvm.call.preallocated.arg");
1921 uint64_t AllocArgIndex =
1922 cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
1923 Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
1924 if (!AllocaReplacement) {
1925 auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
1926 auto *ArgType =
1927 UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
1928 auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
1929 Builder.SetInsertPoint(InsertBefore);
1930 auto *Alloca =
1931 Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
1932 auto *BitCast = Builder.CreateBitCast(
1933 Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
1934 ArgAllocas[AllocArgIndex] = BitCast;
1935 AllocaReplacement = BitCast;
1936 }
1937
1938 UseCall->replaceAllUsesWith(AllocaReplacement);
1939 UseCall->eraseFromParent();
1940 }
1941 // Remove @llvm.call.preallocated.setup().
1942 cast<Instruction>(PreallocatedSetup)->eraseFromParent();
1943 }
1944 }
1945
1946 static bool
OptimizeFunctions(Module & M,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,function_ref<DominatorTree & (Function &)> LookupDomTree,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats,function_ref<void (Function & F)> ChangedCFGCallback,function_ref<void (Function & F)> DeleteFnCallback)1947 OptimizeFunctions(Module &M,
1948 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1949 function_ref<TargetTransformInfo &(Function &)> GetTTI,
1950 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1951 function_ref<DominatorTree &(Function &)> LookupDomTree,
1952 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1953 function_ref<void(Function &F)> ChangedCFGCallback,
1954 function_ref<void(Function &F)> DeleteFnCallback) {
1955
1956 bool Changed = false;
1957
1958 std::vector<Function *> AllCallsCold;
1959 for (Function &F : llvm::make_early_inc_range(M))
1960 if (hasOnlyColdCalls(F, GetBFI))
1961 AllCallsCold.push_back(&F);
1962
1963 // Optimize functions.
1964 for (Function &F : llvm::make_early_inc_range(M)) {
1965 // Don't perform global opt pass on naked functions; we don't want fast
1966 // calling conventions for naked functions.
1967 if (F.hasFnAttribute(Attribute::Naked))
1968 continue;
1969
1970 // Functions without names cannot be referenced outside this module.
1971 if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
1972 F.setLinkage(GlobalValue::InternalLinkage);
1973
1974 if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
1975 Changed = true;
1976 continue;
1977 }
1978
1979 // LLVM's definition of dominance allows instructions that are cyclic
1980 // in unreachable blocks, e.g.:
1981 // %pat = select i1 %condition, @global, i16* %pat
1982 // because any instruction dominates an instruction in a block that's
1983 // not reachable from entry.
1984 // So, remove unreachable blocks from the function, because a) there's
1985 // no point in analyzing them and b) GlobalOpt should otherwise grow
1986 // some more complicated logic to break these cycles.
1987 // Notify the analysis manager that we've modified the function's CFG.
1988 if (!F.isDeclaration()) {
1989 if (removeUnreachableBlocks(F)) {
1990 Changed = true;
1991 ChangedCFGCallback(F);
1992 }
1993 }
1994
1995 Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
1996
1997 if (!F.hasLocalLinkage())
1998 continue;
1999
2000 // If we have an inalloca parameter that we can safely remove the
2001 // inalloca attribute from, do so. This unlocks optimizations that
2002 // wouldn't be safe in the presence of inalloca.
2003 // FIXME: We should also hoist alloca affected by this to the entry
2004 // block if possible.
2005 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2006 !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
2007 RemoveAttribute(&F, Attribute::InAlloca);
2008 Changed = true;
2009 }
2010
2011 // FIXME: handle invokes
2012 // FIXME: handle musttail
2013 if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
2014 if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
2015 !hasInvokeCallers(&F)) {
2016 RemovePreallocated(&F);
2017 Changed = true;
2018 }
2019 continue;
2020 }
2021
2022 if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2023 NumInternalFunc++;
2024 TargetTransformInfo &TTI = GetTTI(F);
2025 // Change the calling convention to coldcc if either stress testing is
2026 // enabled or the target would like to use coldcc on functions which are
2027 // cold at all call sites and the callers contain no other non coldcc
2028 // calls.
2029 if (EnableColdCCStressTest ||
2030 (TTI.useColdCCForColdCall(F) &&
2031 isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
2032 F.setCallingConv(CallingConv::Cold);
2033 changeCallSitesToColdCC(&F);
2034 Changed = true;
2035 NumColdCC++;
2036 }
2037 }
2038
2039 if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2040 // If this function has a calling convention worth changing, is not a
2041 // varargs function, and is only called directly, promote it to use the
2042 // Fast calling convention.
2043 F.setCallingConv(CallingConv::Fast);
2044 ChangeCalleesToFastCall(&F);
2045 ++NumFastCallFns;
2046 Changed = true;
2047 }
2048
2049 if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2050 !F.hasAddressTaken()) {
2051 // The function is not used by a trampoline intrinsic, so it is safe
2052 // to remove the 'nest' attribute.
2053 RemoveAttribute(&F, Attribute::Nest);
2054 ++NumNestRemoved;
2055 Changed = true;
2056 }
2057 }
2058 return Changed;
2059 }
2060
2061 static bool
OptimizeGlobalVars(Module & M,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)2062 OptimizeGlobalVars(Module &M,
2063 function_ref<TargetTransformInfo &(Function &)> GetTTI,
2064 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2065 function_ref<DominatorTree &(Function &)> LookupDomTree,
2066 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2067 bool Changed = false;
2068
2069 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
2070 // Global variables without names cannot be referenced outside this module.
2071 if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
2072 GV.setLinkage(GlobalValue::InternalLinkage);
2073 // Simplify the initializer.
2074 if (GV.hasInitializer())
2075 if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
2076 auto &DL = M.getDataLayout();
2077 // TLI is not used in the case of a Constant, so use default nullptr
2078 // for that optional parameter, since we don't have a Function to
2079 // provide GetTLI anyway.
2080 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2081 if (New != C)
2082 GV.setInitializer(New);
2083 }
2084
2085 if (deleteIfDead(GV, NotDiscardableComdats)) {
2086 Changed = true;
2087 continue;
2088 }
2089
2090 Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
2091 }
2092 return Changed;
2093 }
2094
2095 /// Evaluate static constructors in the function, if we can. Return true if we
2096 /// can, false otherwise.
EvaluateStaticConstructor(Function * F,const DataLayout & DL,TargetLibraryInfo * TLI)2097 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2098 TargetLibraryInfo *TLI) {
2099 // Skip external functions.
2100 if (F->isDeclaration())
2101 return false;
2102 // Call the function.
2103 Evaluator Eval(DL, TLI);
2104 Constant *RetValDummy;
2105 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2106 SmallVector<Constant*, 0>());
2107
2108 if (EvalSuccess) {
2109 ++NumCtorsEvaluated;
2110
2111 // We succeeded at evaluation: commit the result.
2112 auto NewInitializers = Eval.getMutatedInitializers();
2113 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2114 << F->getName() << "' to " << NewInitializers.size()
2115 << " stores.\n");
2116 for (const auto &Pair : NewInitializers)
2117 Pair.first->setInitializer(Pair.second);
2118 for (GlobalVariable *GV : Eval.getInvariants())
2119 GV->setConstant(true);
2120 }
2121
2122 return EvalSuccess;
2123 }
2124
compareNames(Constant * const * A,Constant * const * B)2125 static int compareNames(Constant *const *A, Constant *const *B) {
2126 Value *AStripped = (*A)->stripPointerCasts();
2127 Value *BStripped = (*B)->stripPointerCasts();
2128 return AStripped->getName().compare(BStripped->getName());
2129 }
2130
setUsedInitializer(GlobalVariable & V,const SmallPtrSetImpl<GlobalValue * > & Init)2131 static void setUsedInitializer(GlobalVariable &V,
2132 const SmallPtrSetImpl<GlobalValue *> &Init) {
2133 if (Init.empty()) {
2134 V.eraseFromParent();
2135 return;
2136 }
2137
2138 // Type of pointer to the array of pointers.
2139 PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2140
2141 SmallVector<Constant *, 8> UsedArray;
2142 for (GlobalValue *GV : Init) {
2143 Constant *Cast
2144 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2145 UsedArray.push_back(Cast);
2146 }
2147 // Sort to get deterministic order.
2148 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2149 ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2150
2151 Module *M = V.getParent();
2152 V.removeFromParent();
2153 GlobalVariable *NV =
2154 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2155 ConstantArray::get(ATy, UsedArray), "");
2156 NV->takeName(&V);
2157 NV->setSection("llvm.metadata");
2158 delete &V;
2159 }
2160
2161 namespace {
2162
2163 /// An easy to access representation of llvm.used and llvm.compiler.used.
2164 class LLVMUsed {
2165 SmallPtrSet<GlobalValue *, 4> Used;
2166 SmallPtrSet<GlobalValue *, 4> CompilerUsed;
2167 GlobalVariable *UsedV;
2168 GlobalVariable *CompilerUsedV;
2169
2170 public:
LLVMUsed(Module & M)2171 LLVMUsed(Module &M) {
2172 SmallVector<GlobalValue *, 4> Vec;
2173 UsedV = collectUsedGlobalVariables(M, Vec, false);
2174 Used = {Vec.begin(), Vec.end()};
2175 Vec.clear();
2176 CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
2177 CompilerUsed = {Vec.begin(), Vec.end()};
2178 }
2179
2180 using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
2181 using used_iterator_range = iterator_range<iterator>;
2182
usedBegin()2183 iterator usedBegin() { return Used.begin(); }
usedEnd()2184 iterator usedEnd() { return Used.end(); }
2185
used()2186 used_iterator_range used() {
2187 return used_iterator_range(usedBegin(), usedEnd());
2188 }
2189
compilerUsedBegin()2190 iterator compilerUsedBegin() { return CompilerUsed.begin(); }
compilerUsedEnd()2191 iterator compilerUsedEnd() { return CompilerUsed.end(); }
2192
compilerUsed()2193 used_iterator_range compilerUsed() {
2194 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2195 }
2196
usedCount(GlobalValue * GV) const2197 bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2198
compilerUsedCount(GlobalValue * GV) const2199 bool compilerUsedCount(GlobalValue *GV) const {
2200 return CompilerUsed.count(GV);
2201 }
2202
usedErase(GlobalValue * GV)2203 bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
compilerUsedErase(GlobalValue * GV)2204 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
usedInsert(GlobalValue * GV)2205 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2206
compilerUsedInsert(GlobalValue * GV)2207 bool compilerUsedInsert(GlobalValue *GV) {
2208 return CompilerUsed.insert(GV).second;
2209 }
2210
syncVariablesAndSets()2211 void syncVariablesAndSets() {
2212 if (UsedV)
2213 setUsedInitializer(*UsedV, Used);
2214 if (CompilerUsedV)
2215 setUsedInitializer(*CompilerUsedV, CompilerUsed);
2216 }
2217 };
2218
2219 } // end anonymous namespace
2220
hasUseOtherThanLLVMUsed(GlobalAlias & GA,const LLVMUsed & U)2221 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2222 if (GA.use_empty()) // No use at all.
2223 return false;
2224
2225 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2226 "We should have removed the duplicated "
2227 "element from llvm.compiler.used");
2228 if (!GA.hasOneUse())
2229 // Strictly more than one use. So at least one is not in llvm.used and
2230 // llvm.compiler.used.
2231 return true;
2232
2233 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2234 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2235 }
2236
hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue & V,const LLVMUsed & U)2237 static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2238 const LLVMUsed &U) {
2239 unsigned N = 2;
2240 assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2241 "We should have removed the duplicated "
2242 "element from llvm.compiler.used");
2243 if (U.usedCount(&V) || U.compilerUsedCount(&V))
2244 ++N;
2245 return V.hasNUsesOrMore(N);
2246 }
2247
mayHaveOtherReferences(GlobalAlias & GA,const LLVMUsed & U)2248 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2249 if (!GA.hasLocalLinkage())
2250 return true;
2251
2252 return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2253 }
2254
hasUsesToReplace(GlobalAlias & GA,const LLVMUsed & U,bool & RenameTarget)2255 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2256 bool &RenameTarget) {
2257 RenameTarget = false;
2258 bool Ret = false;
2259 if (hasUseOtherThanLLVMUsed(GA, U))
2260 Ret = true;
2261
2262 // If the alias is externally visible, we may still be able to simplify it.
2263 if (!mayHaveOtherReferences(GA, U))
2264 return Ret;
2265
2266 // If the aliasee has internal linkage, give it the name and linkage
2267 // of the alias, and delete the alias. This turns:
2268 // define internal ... @f(...)
2269 // @a = alias ... @f
2270 // into:
2271 // define ... @a(...)
2272 Constant *Aliasee = GA.getAliasee();
2273 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2274 if (!Target->hasLocalLinkage())
2275 return Ret;
2276
2277 // Do not perform the transform if multiple aliases potentially target the
2278 // aliasee. This check also ensures that it is safe to replace the section
2279 // and other attributes of the aliasee with those of the alias.
2280 if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2281 return Ret;
2282
2283 RenameTarget = true;
2284 return true;
2285 }
2286
2287 static bool
OptimizeGlobalAliases(Module & M,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)2288 OptimizeGlobalAliases(Module &M,
2289 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2290 bool Changed = false;
2291 LLVMUsed Used(M);
2292
2293 for (GlobalValue *GV : Used.used())
2294 Used.compilerUsedErase(GV);
2295
2296 // Return whether GV is explicitly or implicitly dso_local and not replaceable
2297 // by another definition in the current linkage unit.
2298 auto IsModuleLocal = [](GlobalValue &GV) {
2299 return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
2300 (GV.isDSOLocal() || GV.isImplicitDSOLocal());
2301 };
2302
2303 for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
2304 // Aliases without names cannot be referenced outside this module.
2305 if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
2306 J.setLinkage(GlobalValue::InternalLinkage);
2307
2308 if (deleteIfDead(J, NotDiscardableComdats)) {
2309 Changed = true;
2310 continue;
2311 }
2312
2313 // If the alias can change at link time, nothing can be done - bail out.
2314 if (!IsModuleLocal(J))
2315 continue;
2316
2317 Constant *Aliasee = J.getAliasee();
2318 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2319 // We can't trivially replace the alias with the aliasee if the aliasee is
2320 // non-trivial in some way. We also can't replace the alias with the aliasee
2321 // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
2322 // alias can be used to access the definition as if preemption did not
2323 // happen.
2324 // TODO: Try to handle non-zero GEPs of local aliasees.
2325 if (!Target || !IsModuleLocal(*Target))
2326 continue;
2327
2328 Target->removeDeadConstantUsers();
2329
2330 // Make all users of the alias use the aliasee instead.
2331 bool RenameTarget;
2332 if (!hasUsesToReplace(J, Used, RenameTarget))
2333 continue;
2334
2335 J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
2336 ++NumAliasesResolved;
2337 Changed = true;
2338
2339 if (RenameTarget) {
2340 // Give the aliasee the name, linkage and other attributes of the alias.
2341 Target->takeName(&J);
2342 Target->setLinkage(J.getLinkage());
2343 Target->setDSOLocal(J.isDSOLocal());
2344 Target->setVisibility(J.getVisibility());
2345 Target->setDLLStorageClass(J.getDLLStorageClass());
2346
2347 if (Used.usedErase(&J))
2348 Used.usedInsert(Target);
2349
2350 if (Used.compilerUsedErase(&J))
2351 Used.compilerUsedInsert(Target);
2352 } else if (mayHaveOtherReferences(J, Used))
2353 continue;
2354
2355 // Delete the alias.
2356 M.getAliasList().erase(&J);
2357 ++NumAliasesRemoved;
2358 Changed = true;
2359 }
2360
2361 Used.syncVariablesAndSets();
2362
2363 return Changed;
2364 }
2365
2366 static Function *
FindCXAAtExit(Module & M,function_ref<TargetLibraryInfo & (Function &)> GetTLI)2367 FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
2368 // Hack to get a default TLI before we have actual Function.
2369 auto FuncIter = M.begin();
2370 if (FuncIter == M.end())
2371 return nullptr;
2372 auto *TLI = &GetTLI(*FuncIter);
2373
2374 LibFunc F = LibFunc_cxa_atexit;
2375 if (!TLI->has(F))
2376 return nullptr;
2377
2378 Function *Fn = M.getFunction(TLI->getName(F));
2379 if (!Fn)
2380 return nullptr;
2381
2382 // Now get the actual TLI for Fn.
2383 TLI = &GetTLI(*Fn);
2384
2385 // Make sure that the function has the correct prototype.
2386 if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2387 return nullptr;
2388
2389 return Fn;
2390 }
2391
2392 /// Returns whether the given function is an empty C++ destructor and can
2393 /// therefore be eliminated.
2394 /// Note that we assume that other optimization passes have already simplified
2395 /// the code so we simply check for 'ret'.
cxxDtorIsEmpty(const Function & Fn)2396 static bool cxxDtorIsEmpty(const Function &Fn) {
2397 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2398 // nounwind, but that doesn't seem worth doing.
2399 if (Fn.isDeclaration())
2400 return false;
2401
2402 for (auto &I : Fn.getEntryBlock()) {
2403 if (I.isDebugOrPseudoInst())
2404 continue;
2405 if (isa<ReturnInst>(I))
2406 return true;
2407 break;
2408 }
2409 return false;
2410 }
2411
OptimizeEmptyGlobalCXXDtors(Function * CXAAtExitFn)2412 static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2413 /// Itanium C++ ABI p3.3.5:
2414 ///
2415 /// After constructing a global (or local static) object, that will require
2416 /// destruction on exit, a termination function is registered as follows:
2417 ///
2418 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2419 ///
2420 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2421 /// call f(p) when DSO d is unloaded, before all such termination calls
2422 /// registered before this one. It returns zero if registration is
2423 /// successful, nonzero on failure.
2424
2425 // This pass will look for calls to __cxa_atexit where the function is trivial
2426 // and remove them.
2427 bool Changed = false;
2428
2429 for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
2430 // We're only interested in calls. Theoretically, we could handle invoke
2431 // instructions as well, but neither llvm-gcc nor clang generate invokes
2432 // to __cxa_atexit.
2433 CallInst *CI = dyn_cast<CallInst>(U);
2434 if (!CI)
2435 continue;
2436
2437 Function *DtorFn =
2438 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2439 if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2440 continue;
2441
2442 // Just remove the call.
2443 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2444 CI->eraseFromParent();
2445
2446 ++NumCXXDtorsRemoved;
2447
2448 Changed |= true;
2449 }
2450
2451 return Changed;
2452 }
2453
2454 static bool
optimizeGlobalsInModule(Module & M,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,function_ref<DominatorTree & (Function &)> LookupDomTree,function_ref<void (Function & F)> ChangedCFGCallback,function_ref<void (Function & F)> DeleteFnCallback)2455 optimizeGlobalsInModule(Module &M, const DataLayout &DL,
2456 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2457 function_ref<TargetTransformInfo &(Function &)> GetTTI,
2458 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2459 function_ref<DominatorTree &(Function &)> LookupDomTree,
2460 function_ref<void(Function &F)> ChangedCFGCallback,
2461 function_ref<void(Function &F)> DeleteFnCallback) {
2462 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2463 bool Changed = false;
2464 bool LocalChange = true;
2465 Optional<uint32_t> FirstNotFullyEvaluatedPriority;
2466
2467 while (LocalChange) {
2468 LocalChange = false;
2469
2470 NotDiscardableComdats.clear();
2471 for (const GlobalVariable &GV : M.globals())
2472 if (const Comdat *C = GV.getComdat())
2473 if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2474 NotDiscardableComdats.insert(C);
2475 for (Function &F : M)
2476 if (const Comdat *C = F.getComdat())
2477 if (!F.isDefTriviallyDead())
2478 NotDiscardableComdats.insert(C);
2479 for (GlobalAlias &GA : M.aliases())
2480 if (const Comdat *C = GA.getComdat())
2481 if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2482 NotDiscardableComdats.insert(C);
2483
2484 // Delete functions that are trivially dead, ccc -> fastcc
2485 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2486 NotDiscardableComdats, ChangedCFGCallback,
2487 DeleteFnCallback);
2488
2489 // Optimize global_ctors list.
2490 LocalChange |=
2491 optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
2492 if (FirstNotFullyEvaluatedPriority &&
2493 *FirstNotFullyEvaluatedPriority != Priority)
2494 return false;
2495 bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2496 if (!Evaluated)
2497 FirstNotFullyEvaluatedPriority = Priority;
2498 return Evaluated;
2499 });
2500
2501 // Optimize non-address-taken globals.
2502 LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
2503 NotDiscardableComdats);
2504
2505 // Resolve aliases, when possible.
2506 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2507
2508 // Try to remove trivial global destructors if they are not removed
2509 // already.
2510 Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
2511 if (CXAAtExitFn)
2512 LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2513
2514 Changed |= LocalChange;
2515 }
2516
2517 // TODO: Move all global ctors functions to the end of the module for code
2518 // layout.
2519
2520 return Changed;
2521 }
2522
run(Module & M,ModuleAnalysisManager & AM)2523 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2524 auto &DL = M.getDataLayout();
2525 auto &FAM =
2526 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2527 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2528 return FAM.getResult<DominatorTreeAnalysis>(F);
2529 };
2530 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2531 return FAM.getResult<TargetLibraryAnalysis>(F);
2532 };
2533 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2534 return FAM.getResult<TargetIRAnalysis>(F);
2535 };
2536
2537 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2538 return FAM.getResult<BlockFrequencyAnalysis>(F);
2539 };
2540 auto ChangedCFGCallback = [&FAM](Function &F) {
2541 FAM.invalidate(F, PreservedAnalyses::none());
2542 };
2543 auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
2544
2545 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
2546 ChangedCFGCallback, DeleteFnCallback))
2547 return PreservedAnalyses::all();
2548
2549 PreservedAnalyses PA = PreservedAnalyses::none();
2550 // We made sure to clear analyses for deleted functions.
2551 PA.preserve<FunctionAnalysisManagerModuleProxy>();
2552 // The only place we modify the CFG is when calling
2553 // removeUnreachableBlocks(), but there we make sure to invalidate analyses
2554 // for modified functions.
2555 PA.preserveSet<CFGAnalyses>();
2556 return PA;
2557 }
2558
2559 namespace {
2560
2561 struct GlobalOptLegacyPass : public ModulePass {
2562 static char ID; // Pass identification, replacement for typeid
2563
GlobalOptLegacyPass__anondb0a2e990e11::GlobalOptLegacyPass2564 GlobalOptLegacyPass() : ModulePass(ID) {
2565 initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
2566 }
2567
runOnModule__anondb0a2e990e11::GlobalOptLegacyPass2568 bool runOnModule(Module &M) override {
2569 if (skipModule(M))
2570 return false;
2571
2572 auto &DL = M.getDataLayout();
2573 auto LookupDomTree = [this](Function &F) -> DominatorTree & {
2574 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2575 };
2576 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
2577 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2578 };
2579 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
2580 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2581 };
2582
2583 auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
2584 return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2585 };
2586
2587 auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
2588 auto &DT = LookupDomTree(F);
2589 DT.recalculate(F);
2590 };
2591
2592 return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
2593 ChangedCFGCallback, nullptr);
2594 }
2595
getAnalysisUsage__anondb0a2e990e11::GlobalOptLegacyPass2596 void getAnalysisUsage(AnalysisUsage &AU) const override {
2597 AU.addRequired<TargetLibraryInfoWrapperPass>();
2598 AU.addRequired<TargetTransformInfoWrapperPass>();
2599 AU.addRequired<DominatorTreeWrapperPass>();
2600 AU.addRequired<BlockFrequencyInfoWrapperPass>();
2601 }
2602 };
2603
2604 } // end anonymous namespace
2605
2606 char GlobalOptLegacyPass::ID = 0;
2607
2608 INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
2609 "Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)2610 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2611 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2612 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
2613 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2614 INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
2615 "Global Variable Optimizer", false, false)
2616
2617 ModulePass *llvm::createGlobalOptimizerPass() {
2618 return new GlobalOptLegacyPass();
2619 }
2620