17d523365SDimitry Andric //===- SplitModule.cpp - Split a module into partitions -------------------===// 27d523365SDimitry Andric // 37d523365SDimitry Andric // The LLVM Compiler Infrastructure 47d523365SDimitry Andric // 57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source 67d523365SDimitry Andric // License. See LICENSE.TXT for details. 77d523365SDimitry Andric // 87d523365SDimitry Andric //===----------------------------------------------------------------------===// 97d523365SDimitry Andric // 107d523365SDimitry Andric // This file defines the function llvm::SplitModule, which splits a module 117d523365SDimitry Andric // into multiple linkable partitions. It can be used to implement parallel code 127d523365SDimitry Andric // generation for link-time optimization. 137d523365SDimitry Andric // 147d523365SDimitry Andric //===----------------------------------------------------------------------===// 157d523365SDimitry Andric 163ca95b02SDimitry Andric #define DEBUG_TYPE "split-module" 173ca95b02SDimitry Andric 187d523365SDimitry Andric #include "llvm/Transforms/Utils/SplitModule.h" 193ca95b02SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h" 207d523365SDimitry Andric #include "llvm/ADT/Hashing.h" 213ca95b02SDimitry Andric #include "llvm/ADT/MapVector.h" 223ca95b02SDimitry Andric #include "llvm/ADT/SetVector.h" 233ca95b02SDimitry Andric #include "llvm/IR/Constants.h" 247d523365SDimitry Andric #include "llvm/IR/Function.h" 257d523365SDimitry Andric #include "llvm/IR/GlobalAlias.h" 267d523365SDimitry Andric #include "llvm/IR/GlobalObject.h" 277d523365SDimitry Andric #include "llvm/IR/GlobalValue.h" 287d523365SDimitry Andric #include "llvm/IR/Module.h" 293ca95b02SDimitry Andric #include "llvm/Support/Debug.h" 307d523365SDimitry Andric #include "llvm/Support/MD5.h" 317d523365SDimitry Andric #include "llvm/Support/raw_ostream.h" 327d523365SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 333ca95b02SDimitry Andric #include <queue> 347d523365SDimitry Andric 357d523365SDimitry Andric using namespace llvm; 367d523365SDimitry Andric 373ca95b02SDimitry Andric namespace { 383ca95b02SDimitry Andric typedef EquivalenceClasses<const GlobalValue *> ClusterMapType; 393ca95b02SDimitry Andric typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType; 403ca95b02SDimitry Andric typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType; 413ca95b02SDimitry Andric } 423ca95b02SDimitry Andric 433ca95b02SDimitry Andric static void addNonConstUser(ClusterMapType &GVtoClusterMap, 443ca95b02SDimitry Andric const GlobalValue *GV, const User *U) { 453ca95b02SDimitry Andric assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); 463ca95b02SDimitry Andric 473ca95b02SDimitry Andric if (const Instruction *I = dyn_cast<Instruction>(U)) { 483ca95b02SDimitry Andric const GlobalValue *F = I->getParent()->getParent(); 493ca95b02SDimitry Andric GVtoClusterMap.unionSets(GV, F); 503ca95b02SDimitry Andric } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) || 513ca95b02SDimitry Andric isa<GlobalVariable>(U)) { 523ca95b02SDimitry Andric GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U)); 533ca95b02SDimitry Andric } else { 543ca95b02SDimitry Andric llvm_unreachable("Underimplemented use case"); 553ca95b02SDimitry Andric } 563ca95b02SDimitry Andric } 573ca95b02SDimitry Andric 583ca95b02SDimitry Andric // Adds all GlobalValue users of V to the same cluster as GV. 593ca95b02SDimitry Andric static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, 603ca95b02SDimitry Andric const GlobalValue *GV, const Value *V) { 613ca95b02SDimitry Andric for (auto *U : V->users()) { 623ca95b02SDimitry Andric SmallVector<const User *, 4> Worklist; 633ca95b02SDimitry Andric Worklist.push_back(U); 643ca95b02SDimitry Andric while (!Worklist.empty()) { 653ca95b02SDimitry Andric const User *UU = Worklist.pop_back_val(); 663ca95b02SDimitry Andric // For each constant that is not a GV (a pure const) recurse. 673ca95b02SDimitry Andric if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { 683ca95b02SDimitry Andric Worklist.append(UU->user_begin(), UU->user_end()); 693ca95b02SDimitry Andric continue; 703ca95b02SDimitry Andric } 713ca95b02SDimitry Andric addNonConstUser(GVtoClusterMap, GV, UU); 723ca95b02SDimitry Andric } 733ca95b02SDimitry Andric } 743ca95b02SDimitry Andric } 753ca95b02SDimitry Andric 763ca95b02SDimitry Andric // Find partitions for module in the way that no locals need to be 773ca95b02SDimitry Andric // globalized. 783ca95b02SDimitry Andric // Try to balance pack those partitions into N files since this roughly equals 793ca95b02SDimitry Andric // thread balancing for the backend codegen step. 803ca95b02SDimitry Andric static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, 813ca95b02SDimitry Andric unsigned N) { 823ca95b02SDimitry Andric // At this point module should have the proper mix of globals and locals. 833ca95b02SDimitry Andric // As we attempt to partition this module, we must not change any 843ca95b02SDimitry Andric // locals to globals. 853ca95b02SDimitry Andric DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); 863ca95b02SDimitry Andric ClusterMapType GVtoClusterMap; 873ca95b02SDimitry Andric ComdatMembersType ComdatMembers; 883ca95b02SDimitry Andric 893ca95b02SDimitry Andric auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { 903ca95b02SDimitry Andric if (GV.isDeclaration()) 913ca95b02SDimitry Andric return; 923ca95b02SDimitry Andric 933ca95b02SDimitry Andric if (!GV.hasName()) 943ca95b02SDimitry Andric GV.setName("__llvmsplit_unnamed"); 953ca95b02SDimitry Andric 963ca95b02SDimitry Andric // Comdat groups must not be partitioned. For comdat groups that contain 973ca95b02SDimitry Andric // locals, record all their members here so we can keep them together. 983ca95b02SDimitry Andric // Comdat groups that only contain external globals are already handled by 993ca95b02SDimitry Andric // the MD5-based partitioning. 1003ca95b02SDimitry Andric if (const Comdat *C = GV.getComdat()) { 1013ca95b02SDimitry Andric auto &Member = ComdatMembers[C]; 1023ca95b02SDimitry Andric if (Member) 1033ca95b02SDimitry Andric GVtoClusterMap.unionSets(Member, &GV); 1043ca95b02SDimitry Andric else 1053ca95b02SDimitry Andric Member = &GV; 1063ca95b02SDimitry Andric } 1073ca95b02SDimitry Andric 1083ca95b02SDimitry Andric // For aliases we should not separate them from their aliasees regardless 1093ca95b02SDimitry Andric // of linkage. 1103ca95b02SDimitry Andric if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) { 1113ca95b02SDimitry Andric if (const GlobalObject *Base = GIS->getBaseObject()) 1123ca95b02SDimitry Andric GVtoClusterMap.unionSets(&GV, Base); 1133ca95b02SDimitry Andric } 1143ca95b02SDimitry Andric 1153ca95b02SDimitry Andric if (const Function *F = dyn_cast<Function>(&GV)) { 1163ca95b02SDimitry Andric for (const BasicBlock &BB : *F) { 1173ca95b02SDimitry Andric BlockAddress *BA = BlockAddress::lookup(&BB); 1183ca95b02SDimitry Andric if (!BA || !BA->isConstantUsed()) 1193ca95b02SDimitry Andric continue; 1203ca95b02SDimitry Andric addAllGlobalValueUsers(GVtoClusterMap, F, BA); 1213ca95b02SDimitry Andric } 1223ca95b02SDimitry Andric } 1233ca95b02SDimitry Andric 1243ca95b02SDimitry Andric if (GV.hasLocalLinkage()) 1253ca95b02SDimitry Andric addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); 1263ca95b02SDimitry Andric }; 1273ca95b02SDimitry Andric 1283ca95b02SDimitry Andric std::for_each(M->begin(), M->end(), recordGVSet); 1293ca95b02SDimitry Andric std::for_each(M->global_begin(), M->global_end(), recordGVSet); 1303ca95b02SDimitry Andric std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); 1313ca95b02SDimitry Andric 1323ca95b02SDimitry Andric // Assigned all GVs to merged clusters while balancing number of objects in 1333ca95b02SDimitry Andric // each. 1343ca95b02SDimitry Andric auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, 1353ca95b02SDimitry Andric const std::pair<unsigned, unsigned> &b) { 1363ca95b02SDimitry Andric if (a.second || b.second) 1373ca95b02SDimitry Andric return a.second > b.second; 1383ca95b02SDimitry Andric else 1393ca95b02SDimitry Andric return a.first > b.first; 1403ca95b02SDimitry Andric }; 1413ca95b02SDimitry Andric 1423ca95b02SDimitry Andric std::priority_queue<std::pair<unsigned, unsigned>, 1433ca95b02SDimitry Andric std::vector<std::pair<unsigned, unsigned>>, 1443ca95b02SDimitry Andric decltype(CompareClusters)> 1453ca95b02SDimitry Andric BalancinQueue(CompareClusters); 1463ca95b02SDimitry Andric // Pre-populate priority queue with N slot blanks. 1473ca95b02SDimitry Andric for (unsigned i = 0; i < N; ++i) 1483ca95b02SDimitry Andric BalancinQueue.push(std::make_pair(i, 0)); 1493ca95b02SDimitry Andric 1503ca95b02SDimitry Andric typedef std::pair<unsigned, ClusterMapType::iterator> SortType; 1513ca95b02SDimitry Andric SmallVector<SortType, 64> Sets; 1523ca95b02SDimitry Andric SmallPtrSet<const GlobalValue *, 32> Visited; 1533ca95b02SDimitry Andric 1543ca95b02SDimitry Andric // To guarantee determinism, we have to sort SCC according to size. 1553ca95b02SDimitry Andric // When size is the same, use leader's name. 1563ca95b02SDimitry Andric for (ClusterMapType::iterator I = GVtoClusterMap.begin(), 1573ca95b02SDimitry Andric E = GVtoClusterMap.end(); I != E; ++I) 1583ca95b02SDimitry Andric if (I->isLeader()) 1593ca95b02SDimitry Andric Sets.push_back( 1603ca95b02SDimitry Andric std::make_pair(std::distance(GVtoClusterMap.member_begin(I), 1613ca95b02SDimitry Andric GVtoClusterMap.member_end()), I)); 1623ca95b02SDimitry Andric 1633ca95b02SDimitry Andric std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { 1643ca95b02SDimitry Andric if (a.first == b.first) 1653ca95b02SDimitry Andric return a.second->getData()->getName() > b.second->getData()->getName(); 1663ca95b02SDimitry Andric else 1673ca95b02SDimitry Andric return a.first > b.first; 1683ca95b02SDimitry Andric }); 1693ca95b02SDimitry Andric 1703ca95b02SDimitry Andric for (auto &I : Sets) { 1713ca95b02SDimitry Andric unsigned CurrentClusterID = BalancinQueue.top().first; 1723ca95b02SDimitry Andric unsigned CurrentClusterSize = BalancinQueue.top().second; 1733ca95b02SDimitry Andric BalancinQueue.pop(); 1743ca95b02SDimitry Andric 1753ca95b02SDimitry Andric DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first 1763ca95b02SDimitry Andric << ") ----> " << I.second->getData()->getName() << "\n"); 1773ca95b02SDimitry Andric 1783ca95b02SDimitry Andric for (ClusterMapType::member_iterator MI = 1793ca95b02SDimitry Andric GVtoClusterMap.findLeader(I.second); 1803ca95b02SDimitry Andric MI != GVtoClusterMap.member_end(); ++MI) { 1813ca95b02SDimitry Andric if (!Visited.insert(*MI).second) 1823ca95b02SDimitry Andric continue; 1833ca95b02SDimitry Andric DEBUG(dbgs() << "----> " << (*MI)->getName() 1843ca95b02SDimitry Andric << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); 1853ca95b02SDimitry Andric Visited.insert(*MI); 1863ca95b02SDimitry Andric ClusterIDMap[*MI] = CurrentClusterID; 1873ca95b02SDimitry Andric CurrentClusterSize++; 1883ca95b02SDimitry Andric } 1893ca95b02SDimitry Andric // Add this set size to the number of entries in this cluster. 1903ca95b02SDimitry Andric BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); 1913ca95b02SDimitry Andric } 1923ca95b02SDimitry Andric } 1933ca95b02SDimitry Andric 1947d523365SDimitry Andric static void externalize(GlobalValue *GV) { 1957d523365SDimitry Andric if (GV->hasLocalLinkage()) { 1967d523365SDimitry Andric GV->setLinkage(GlobalValue::ExternalLinkage); 1977d523365SDimitry Andric GV->setVisibility(GlobalValue::HiddenVisibility); 1987d523365SDimitry Andric } 1997d523365SDimitry Andric 2007d523365SDimitry Andric // Unnamed entities must be named consistently between modules. setName will 2017d523365SDimitry Andric // give a distinct name to each such entity. 2027d523365SDimitry Andric if (!GV->hasName()) 2037d523365SDimitry Andric GV->setName("__llvmsplit_unnamed"); 2047d523365SDimitry Andric } 2057d523365SDimitry Andric 2067d523365SDimitry Andric // Returns whether GV should be in partition (0-based) I of N. 2077d523365SDimitry Andric static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { 2083ca95b02SDimitry Andric if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV)) 2093ca95b02SDimitry Andric if (const GlobalObject *Base = GIS->getBaseObject()) 2107d523365SDimitry Andric GV = Base; 2117d523365SDimitry Andric 2127d523365SDimitry Andric StringRef Name; 2137d523365SDimitry Andric if (const Comdat *C = GV->getComdat()) 2147d523365SDimitry Andric Name = C->getName(); 2157d523365SDimitry Andric else 2167d523365SDimitry Andric Name = GV->getName(); 2177d523365SDimitry Andric 2187d523365SDimitry Andric // Partition by MD5 hash. We only need a few bits for evenness as the number 2197d523365SDimitry Andric // of partitions will generally be in the 1-2 figure range; the low 16 bits 2207d523365SDimitry Andric // are enough. 2217d523365SDimitry Andric MD5 H; 2227d523365SDimitry Andric MD5::MD5Result R; 2237d523365SDimitry Andric H.update(Name); 2247d523365SDimitry Andric H.final(R); 2257d523365SDimitry Andric return (R[0] | (R[1] << 8)) % N == I; 2267d523365SDimitry Andric } 2277d523365SDimitry Andric 2287d523365SDimitry Andric void llvm::SplitModule( 2297d523365SDimitry Andric std::unique_ptr<Module> M, unsigned N, 2303ca95b02SDimitry Andric function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, 2313ca95b02SDimitry Andric bool PreserveLocals) { 2323ca95b02SDimitry Andric if (!PreserveLocals) { 2337d523365SDimitry Andric for (Function &F : *M) 2347d523365SDimitry Andric externalize(&F); 2357d523365SDimitry Andric for (GlobalVariable &GV : M->globals()) 2367d523365SDimitry Andric externalize(&GV); 2377d523365SDimitry Andric for (GlobalAlias &GA : M->aliases()) 2387d523365SDimitry Andric externalize(&GA); 2393ca95b02SDimitry Andric for (GlobalIFunc &GIF : M->ifuncs()) 2403ca95b02SDimitry Andric externalize(&GIF); 2413ca95b02SDimitry Andric } 2423ca95b02SDimitry Andric 2433ca95b02SDimitry Andric // This performs splitting without a need for externalization, which might not 2443ca95b02SDimitry Andric // always be possible. 2453ca95b02SDimitry Andric ClusterIDMapType ClusterIDMap; 2463ca95b02SDimitry Andric findPartitions(M.get(), ClusterIDMap, N); 2477d523365SDimitry Andric 2487d523365SDimitry Andric // FIXME: We should be able to reuse M as the last partition instead of 2497d523365SDimitry Andric // cloning it. 2503ca95b02SDimitry Andric for (unsigned I = 0; I < N; ++I) { 2517d523365SDimitry Andric ValueToValueMapTy VMap; 2527d523365SDimitry Andric std::unique_ptr<Module> MPart( 2533ca95b02SDimitry Andric CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { 2543ca95b02SDimitry Andric if (ClusterIDMap.count(GV)) 2553ca95b02SDimitry Andric return (ClusterIDMap[GV] == I); 2563ca95b02SDimitry Andric else 2577d523365SDimitry Andric return isInPartition(GV, I, N); 2587d523365SDimitry Andric })); 2597d523365SDimitry Andric if (I != 0) 2607d523365SDimitry Andric MPart->setModuleInlineAsm(""); 2617d523365SDimitry Andric ModuleCallback(std::move(MPart)); 2627d523365SDimitry Andric } 2637d523365SDimitry Andric } 264