1 //===- GlobalSplit.cpp - global variable splitter -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass uses inrange annotations on GEP indices to split globals where 11 // beneficial. Clang currently attaches these annotations to references to 12 // virtual table globals under the Itanium ABI for the benefit of the 13 // whole-program virtual call optimization and control flow integrity passes. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/GlobalVariable.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/Pass.h" 25 26 #include <set> 27 28 using namespace llvm; 29 30 namespace { 31 32 bool splitGlobal(GlobalVariable &GV) { 33 // If the address of the global is taken outside of the module, we cannot 34 // apply this transformation. 35 if (!GV.hasLocalLinkage()) 36 return false; 37 38 // We currently only know how to split ConstantStructs. 39 auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer()); 40 if (!Init) 41 return false; 42 43 // Verify that each user of the global is an inrange getelementptr constant. 44 // From this it follows that any loads from or stores to that global must use 45 // a pointer derived from an inrange getelementptr constant, which is 46 // sufficient to allow us to apply the splitting transform. 47 for (User *U : GV.users()) { 48 if (!isa<Constant>(U)) 49 return false; 50 51 auto *GEP = dyn_cast<GEPOperator>(U); 52 if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 || 53 !isa<ConstantInt>(GEP->getOperand(1)) || 54 !cast<ConstantInt>(GEP->getOperand(1))->isZero() || 55 !isa<ConstantInt>(GEP->getOperand(2))) 56 return false; 57 } 58 59 SmallVector<MDNode *, 2> Types; 60 GV.getMetadata(LLVMContext::MD_type, Types); 61 62 const DataLayout &DL = GV.getParent()->getDataLayout(); 63 const StructLayout *SL = DL.getStructLayout(Init->getType()); 64 65 IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); 66 67 std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands()); 68 for (unsigned I = 0; I != Init->getNumOperands(); ++I) { 69 // Build a global representing this split piece. 70 auto *SplitGV = 71 new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), 72 GV.isConstant(), GlobalValue::PrivateLinkage, 73 Init->getOperand(I), GV.getName() + "." + utostr(I)); 74 SplitGlobals[I] = SplitGV; 75 76 unsigned SplitBegin = SL->getElementOffset(I); 77 unsigned SplitEnd = (I == Init->getNumOperands() - 1) 78 ? SL->getSizeInBytes() 79 : SL->getElementOffset(I + 1); 80 81 // Rebuild type metadata, adjusting by the split offset. 82 // FIXME: See if we can use DW_OP_piece to preserve debug metadata here. 83 for (MDNode *Type : Types) { 84 uint64_t ByteOffset = cast<ConstantInt>( 85 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 86 ->getZExtValue(); 87 if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd) 88 continue; 89 SplitGV->addMetadata( 90 LLVMContext::MD_type, 91 *MDNode::get(GV.getContext(), 92 {ConstantAsMetadata::get( 93 ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), 94 Type->getOperand(1)})); 95 } 96 } 97 98 for (User *U : GV.users()) { 99 auto *GEP = cast<GEPOperator>(U); 100 unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 101 if (I >= SplitGlobals.size()) 102 continue; 103 104 SmallVector<Value *, 4> Ops; 105 Ops.push_back(ConstantInt::get(Int32Ty, 0)); 106 for (unsigned I = 3; I != GEP->getNumOperands(); ++I) 107 Ops.push_back(GEP->getOperand(I)); 108 109 auto *NewGEP = ConstantExpr::getGetElementPtr( 110 SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops, 111 GEP->isInBounds()); 112 GEP->replaceAllUsesWith(NewGEP); 113 } 114 115 // Finally, remove the original global. Any remaining uses refer to invalid 116 // elements of the global, so replace with undef. 117 if (!GV.use_empty()) 118 GV.replaceAllUsesWith(UndefValue::get(GV.getType())); 119 GV.eraseFromParent(); 120 return true; 121 } 122 123 bool splitGlobals(Module &M) { 124 // First, see if the module uses either of the llvm.type.test or 125 // llvm.type.checked.load intrinsics, which indicates that splitting globals 126 // may be beneficial. 127 Function *TypeTestFunc = 128 M.getFunction(Intrinsic::getName(Intrinsic::type_test)); 129 Function *TypeCheckedLoadFunc = 130 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); 131 if ((!TypeTestFunc || TypeTestFunc->use_empty()) && 132 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) 133 return false; 134 135 bool Changed = false; 136 for (auto I = M.global_begin(); I != M.global_end();) { 137 GlobalVariable &GV = *I; 138 ++I; 139 Changed |= splitGlobal(GV); 140 } 141 return Changed; 142 } 143 144 struct GlobalSplit : public ModulePass { 145 static char ID; 146 GlobalSplit() : ModulePass(ID) { 147 initializeGlobalSplitPass(*PassRegistry::getPassRegistry()); 148 } 149 bool runOnModule(Module &M) { 150 if (skipModule(M)) 151 return false; 152 153 return splitGlobals(M); 154 } 155 }; 156 157 } 158 159 INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) 160 char GlobalSplit::ID = 0; 161 162 ModulePass *llvm::createGlobalSplitPass() { 163 return new GlobalSplit; 164 } 165