17d523365SDimitry Andric //===- SplitModule.cpp - Split a module into partitions -------------------===// 27d523365SDimitry Andric // 37d523365SDimitry Andric // The LLVM Compiler Infrastructure 47d523365SDimitry Andric // 57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source 67d523365SDimitry Andric // License. See LICENSE.TXT for details. 77d523365SDimitry Andric // 87d523365SDimitry Andric //===----------------------------------------------------------------------===// 97d523365SDimitry Andric // 107d523365SDimitry Andric // This file defines the function llvm::SplitModule, which splits a module 117d523365SDimitry Andric // into multiple linkable partitions. It can be used to implement parallel code 127d523365SDimitry Andric // generation for link-time optimization. 137d523365SDimitry Andric // 147d523365SDimitry Andric //===----------------------------------------------------------------------===// 157d523365SDimitry Andric 167d523365SDimitry Andric #include "llvm/Transforms/Utils/SplitModule.h" 172cab237bSDimitry Andric #include "llvm/ADT/DenseMap.h" 183ca95b02SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h" 192cab237bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 202cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h" 212cab237bSDimitry Andric #include "llvm/ADT/StringRef.h" 222cab237bSDimitry Andric #include "llvm/IR/Comdat.h" 232cab237bSDimitry Andric #include "llvm/IR/Constant.h" 243ca95b02SDimitry Andric #include "llvm/IR/Constants.h" 257d523365SDimitry Andric #include "llvm/IR/Function.h" 267d523365SDimitry Andric #include "llvm/IR/GlobalAlias.h" 277d523365SDimitry Andric #include "llvm/IR/GlobalObject.h" 282cab237bSDimitry Andric #include "llvm/IR/GlobalIndirectSymbol.h" 297d523365SDimitry Andric #include "llvm/IR/GlobalValue.h" 302cab237bSDimitry Andric #include "llvm/IR/GlobalVariable.h" 312cab237bSDimitry Andric #include "llvm/IR/Instruction.h" 327d523365SDimitry Andric #include "llvm/IR/Module.h" 332cab237bSDimitry Andric #include "llvm/IR/User.h" 342cab237bSDimitry Andric #include "llvm/IR/Value.h" 352cab237bSDimitry Andric #include "llvm/Support/Casting.h" 363ca95b02SDimitry Andric #include "llvm/Support/Debug.h" 372cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h" 387d523365SDimitry Andric #include "llvm/Support/MD5.h" 397d523365SDimitry Andric #include "llvm/Support/raw_ostream.h" 407d523365SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 412cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 422cab237bSDimitry Andric #include <algorithm> 432cab237bSDimitry Andric #include <cassert> 442cab237bSDimitry Andric #include <iterator> 452cab237bSDimitry Andric #include <memory> 463ca95b02SDimitry Andric #include <queue> 472cab237bSDimitry Andric #include <utility> 482cab237bSDimitry Andric #include <vector> 497d523365SDimitry Andric 507d523365SDimitry Andric using namespace llvm; 517d523365SDimitry Andric 522cab237bSDimitry Andric #define DEBUG_TYPE "split-module" 532cab237bSDimitry Andric 543ca95b02SDimitry Andric namespace { 552cab237bSDimitry Andric 562cab237bSDimitry Andric using ClusterMapType = EquivalenceClasses<const GlobalValue *>; 572cab237bSDimitry Andric using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>; 582cab237bSDimitry Andric using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>; 592cab237bSDimitry Andric 602cab237bSDimitry Andric } // end anonymous namespace 613ca95b02SDimitry Andric 623ca95b02SDimitry Andric static void addNonConstUser(ClusterMapType &GVtoClusterMap, 633ca95b02SDimitry Andric const GlobalValue *GV, const User *U) { 643ca95b02SDimitry Andric assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); 653ca95b02SDimitry Andric 663ca95b02SDimitry Andric if (const Instruction *I = dyn_cast<Instruction>(U)) { 673ca95b02SDimitry Andric const GlobalValue *F = I->getParent()->getParent(); 683ca95b02SDimitry Andric GVtoClusterMap.unionSets(GV, F); 693ca95b02SDimitry Andric } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) || 703ca95b02SDimitry Andric isa<GlobalVariable>(U)) { 713ca95b02SDimitry Andric GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U)); 723ca95b02SDimitry Andric } else { 733ca95b02SDimitry Andric llvm_unreachable("Underimplemented use case"); 743ca95b02SDimitry Andric } 753ca95b02SDimitry Andric } 763ca95b02SDimitry Andric 773ca95b02SDimitry Andric // Adds all GlobalValue users of V to the same cluster as GV. 783ca95b02SDimitry Andric static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, 793ca95b02SDimitry Andric const GlobalValue *GV, const Value *V) { 803ca95b02SDimitry Andric for (auto *U : V->users()) { 813ca95b02SDimitry Andric SmallVector<const User *, 4> Worklist; 823ca95b02SDimitry Andric Worklist.push_back(U); 833ca95b02SDimitry Andric while (!Worklist.empty()) { 843ca95b02SDimitry Andric const User *UU = Worklist.pop_back_val(); 853ca95b02SDimitry Andric // For each constant that is not a GV (a pure const) recurse. 863ca95b02SDimitry Andric if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { 873ca95b02SDimitry Andric Worklist.append(UU->user_begin(), UU->user_end()); 883ca95b02SDimitry Andric continue; 893ca95b02SDimitry Andric } 903ca95b02SDimitry Andric addNonConstUser(GVtoClusterMap, GV, UU); 913ca95b02SDimitry Andric } 923ca95b02SDimitry Andric } 933ca95b02SDimitry Andric } 943ca95b02SDimitry Andric 953ca95b02SDimitry Andric // Find partitions for module in the way that no locals need to be 963ca95b02SDimitry Andric // globalized. 973ca95b02SDimitry Andric // Try to balance pack those partitions into N files since this roughly equals 983ca95b02SDimitry Andric // thread balancing for the backend codegen step. 993ca95b02SDimitry Andric static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, 1003ca95b02SDimitry Andric unsigned N) { 1013ca95b02SDimitry Andric // At this point module should have the proper mix of globals and locals. 1023ca95b02SDimitry Andric // As we attempt to partition this module, we must not change any 1033ca95b02SDimitry Andric // locals to globals. 1043ca95b02SDimitry Andric DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); 1053ca95b02SDimitry Andric ClusterMapType GVtoClusterMap; 1063ca95b02SDimitry Andric ComdatMembersType ComdatMembers; 1073ca95b02SDimitry Andric 1083ca95b02SDimitry Andric auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { 1093ca95b02SDimitry Andric if (GV.isDeclaration()) 1103ca95b02SDimitry Andric return; 1113ca95b02SDimitry Andric 1123ca95b02SDimitry Andric if (!GV.hasName()) 1133ca95b02SDimitry Andric GV.setName("__llvmsplit_unnamed"); 1143ca95b02SDimitry Andric 1153ca95b02SDimitry Andric // Comdat groups must not be partitioned. For comdat groups that contain 1163ca95b02SDimitry Andric // locals, record all their members here so we can keep them together. 1173ca95b02SDimitry Andric // Comdat groups that only contain external globals are already handled by 1183ca95b02SDimitry Andric // the MD5-based partitioning. 1193ca95b02SDimitry Andric if (const Comdat *C = GV.getComdat()) { 1203ca95b02SDimitry Andric auto &Member = ComdatMembers[C]; 1213ca95b02SDimitry Andric if (Member) 1223ca95b02SDimitry Andric GVtoClusterMap.unionSets(Member, &GV); 1233ca95b02SDimitry Andric else 1243ca95b02SDimitry Andric Member = &GV; 1253ca95b02SDimitry Andric } 1263ca95b02SDimitry Andric 1273ca95b02SDimitry Andric // For aliases we should not separate them from their aliasees regardless 1283ca95b02SDimitry Andric // of linkage. 1293ca95b02SDimitry Andric if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) { 1303ca95b02SDimitry Andric if (const GlobalObject *Base = GIS->getBaseObject()) 1313ca95b02SDimitry Andric GVtoClusterMap.unionSets(&GV, Base); 1323ca95b02SDimitry Andric } 1333ca95b02SDimitry Andric 1343ca95b02SDimitry Andric if (const Function *F = dyn_cast<Function>(&GV)) { 1353ca95b02SDimitry Andric for (const BasicBlock &BB : *F) { 1363ca95b02SDimitry Andric BlockAddress *BA = BlockAddress::lookup(&BB); 1373ca95b02SDimitry Andric if (!BA || !BA->isConstantUsed()) 1383ca95b02SDimitry Andric continue; 1393ca95b02SDimitry Andric addAllGlobalValueUsers(GVtoClusterMap, F, BA); 1403ca95b02SDimitry Andric } 1413ca95b02SDimitry Andric } 1423ca95b02SDimitry Andric 1433ca95b02SDimitry Andric if (GV.hasLocalLinkage()) 1443ca95b02SDimitry Andric addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); 1453ca95b02SDimitry Andric }; 1463ca95b02SDimitry Andric 1472cab237bSDimitry Andric llvm::for_each(M->functions(), recordGVSet); 1482cab237bSDimitry Andric llvm::for_each(M->globals(), recordGVSet); 1492cab237bSDimitry Andric llvm::for_each(M->aliases(), recordGVSet); 1503ca95b02SDimitry Andric 1513ca95b02SDimitry Andric // Assigned all GVs to merged clusters while balancing number of objects in 1523ca95b02SDimitry Andric // each. 1533ca95b02SDimitry Andric auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, 1543ca95b02SDimitry Andric const std::pair<unsigned, unsigned> &b) { 1553ca95b02SDimitry Andric if (a.second || b.second) 1563ca95b02SDimitry Andric return a.second > b.second; 1573ca95b02SDimitry Andric else 1583ca95b02SDimitry Andric return a.first > b.first; 1593ca95b02SDimitry Andric }; 1603ca95b02SDimitry Andric 1613ca95b02SDimitry Andric std::priority_queue<std::pair<unsigned, unsigned>, 1623ca95b02SDimitry Andric std::vector<std::pair<unsigned, unsigned>>, 1633ca95b02SDimitry Andric decltype(CompareClusters)> 1643ca95b02SDimitry Andric BalancinQueue(CompareClusters); 1653ca95b02SDimitry Andric // Pre-populate priority queue with N slot blanks. 1663ca95b02SDimitry Andric for (unsigned i = 0; i < N; ++i) 1673ca95b02SDimitry Andric BalancinQueue.push(std::make_pair(i, 0)); 1683ca95b02SDimitry Andric 1692cab237bSDimitry Andric using SortType = std::pair<unsigned, ClusterMapType::iterator>; 1702cab237bSDimitry Andric 1713ca95b02SDimitry Andric SmallVector<SortType, 64> Sets; 1723ca95b02SDimitry Andric SmallPtrSet<const GlobalValue *, 32> Visited; 1733ca95b02SDimitry Andric 1743ca95b02SDimitry Andric // To guarantee determinism, we have to sort SCC according to size. 1753ca95b02SDimitry Andric // When size is the same, use leader's name. 1763ca95b02SDimitry Andric for (ClusterMapType::iterator I = GVtoClusterMap.begin(), 1773ca95b02SDimitry Andric E = GVtoClusterMap.end(); I != E; ++I) 1783ca95b02SDimitry Andric if (I->isLeader()) 1793ca95b02SDimitry Andric Sets.push_back( 1803ca95b02SDimitry Andric std::make_pair(std::distance(GVtoClusterMap.member_begin(I), 1813ca95b02SDimitry Andric GVtoClusterMap.member_end()), I)); 1823ca95b02SDimitry Andric 1833ca95b02SDimitry Andric std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { 1843ca95b02SDimitry Andric if (a.first == b.first) 1853ca95b02SDimitry Andric return a.second->getData()->getName() > b.second->getData()->getName(); 1863ca95b02SDimitry Andric else 1873ca95b02SDimitry Andric return a.first > b.first; 1883ca95b02SDimitry Andric }); 1893ca95b02SDimitry Andric 1903ca95b02SDimitry Andric for (auto &I : Sets) { 1913ca95b02SDimitry Andric unsigned CurrentClusterID = BalancinQueue.top().first; 1923ca95b02SDimitry Andric unsigned CurrentClusterSize = BalancinQueue.top().second; 1933ca95b02SDimitry Andric BalancinQueue.pop(); 1943ca95b02SDimitry Andric 1953ca95b02SDimitry Andric DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first 1963ca95b02SDimitry Andric << ") ----> " << I.second->getData()->getName() << "\n"); 1973ca95b02SDimitry Andric 1983ca95b02SDimitry Andric for (ClusterMapType::member_iterator MI = 1993ca95b02SDimitry Andric GVtoClusterMap.findLeader(I.second); 2003ca95b02SDimitry Andric MI != GVtoClusterMap.member_end(); ++MI) { 2013ca95b02SDimitry Andric if (!Visited.insert(*MI).second) 2023ca95b02SDimitry Andric continue; 2033ca95b02SDimitry Andric DEBUG(dbgs() << "----> " << (*MI)->getName() 2043ca95b02SDimitry Andric << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); 2053ca95b02SDimitry Andric Visited.insert(*MI); 2063ca95b02SDimitry Andric ClusterIDMap[*MI] = CurrentClusterID; 2073ca95b02SDimitry Andric CurrentClusterSize++; 2083ca95b02SDimitry Andric } 2093ca95b02SDimitry Andric // Add this set size to the number of entries in this cluster. 2103ca95b02SDimitry Andric BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); 2113ca95b02SDimitry Andric } 2123ca95b02SDimitry Andric } 2133ca95b02SDimitry Andric 2147d523365SDimitry Andric static void externalize(GlobalValue *GV) { 2157d523365SDimitry Andric if (GV->hasLocalLinkage()) { 2167d523365SDimitry Andric GV->setLinkage(GlobalValue::ExternalLinkage); 2177d523365SDimitry Andric GV->setVisibility(GlobalValue::HiddenVisibility); 2187d523365SDimitry Andric } 2197d523365SDimitry Andric 2207d523365SDimitry Andric // Unnamed entities must be named consistently between modules. setName will 2217d523365SDimitry Andric // give a distinct name to each such entity. 2227d523365SDimitry Andric if (!GV->hasName()) 2237d523365SDimitry Andric GV->setName("__llvmsplit_unnamed"); 2247d523365SDimitry Andric } 2257d523365SDimitry Andric 2267d523365SDimitry Andric // Returns whether GV should be in partition (0-based) I of N. 2277d523365SDimitry Andric static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { 2283ca95b02SDimitry Andric if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV)) 2293ca95b02SDimitry Andric if (const GlobalObject *Base = GIS->getBaseObject()) 2307d523365SDimitry Andric GV = Base; 2317d523365SDimitry Andric 2327d523365SDimitry Andric StringRef Name; 2337d523365SDimitry Andric if (const Comdat *C = GV->getComdat()) 2347d523365SDimitry Andric Name = C->getName(); 2357d523365SDimitry Andric else 2367d523365SDimitry Andric Name = GV->getName(); 2377d523365SDimitry Andric 2387d523365SDimitry Andric // Partition by MD5 hash. We only need a few bits for evenness as the number 2397d523365SDimitry Andric // of partitions will generally be in the 1-2 figure range; the low 16 bits 2407d523365SDimitry Andric // are enough. 2417d523365SDimitry Andric MD5 H; 2427d523365SDimitry Andric MD5::MD5Result R; 2437d523365SDimitry Andric H.update(Name); 2447d523365SDimitry Andric H.final(R); 2457d523365SDimitry Andric return (R[0] | (R[1] << 8)) % N == I; 2467d523365SDimitry Andric } 2477d523365SDimitry Andric 2487d523365SDimitry Andric void llvm::SplitModule( 2497d523365SDimitry Andric std::unique_ptr<Module> M, unsigned N, 2503ca95b02SDimitry Andric function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, 2513ca95b02SDimitry Andric bool PreserveLocals) { 2523ca95b02SDimitry Andric if (!PreserveLocals) { 2537d523365SDimitry Andric for (Function &F : *M) 2547d523365SDimitry Andric externalize(&F); 2557d523365SDimitry Andric for (GlobalVariable &GV : M->globals()) 2567d523365SDimitry Andric externalize(&GV); 2577d523365SDimitry Andric for (GlobalAlias &GA : M->aliases()) 2587d523365SDimitry Andric externalize(&GA); 2593ca95b02SDimitry Andric for (GlobalIFunc &GIF : M->ifuncs()) 2603ca95b02SDimitry Andric externalize(&GIF); 2613ca95b02SDimitry Andric } 2623ca95b02SDimitry Andric 2633ca95b02SDimitry Andric // This performs splitting without a need for externalization, which might not 2643ca95b02SDimitry Andric // always be possible. 2653ca95b02SDimitry Andric ClusterIDMapType ClusterIDMap; 2663ca95b02SDimitry Andric findPartitions(M.get(), ClusterIDMap, N); 2677d523365SDimitry Andric 2687d523365SDimitry Andric // FIXME: We should be able to reuse M as the last partition instead of 2697d523365SDimitry Andric // cloning it. 2703ca95b02SDimitry Andric for (unsigned I = 0; I < N; ++I) { 2717d523365SDimitry Andric ValueToValueMapTy VMap; 2727d523365SDimitry Andric std::unique_ptr<Module> MPart( 2733ca95b02SDimitry Andric CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { 2743ca95b02SDimitry Andric if (ClusterIDMap.count(GV)) 2753ca95b02SDimitry Andric return (ClusterIDMap[GV] == I); 2763ca95b02SDimitry Andric else 2777d523365SDimitry Andric return isInPartition(GV, I, N); 2787d523365SDimitry Andric })); 2797d523365SDimitry Andric if (I != 0) 2807d523365SDimitry Andric MPart->setModuleInlineAsm(""); 2817d523365SDimitry Andric ModuleCallback(std::move(MPart)); 2827d523365SDimitry Andric } 2837d523365SDimitry Andric } 284