19775a620SHiroshi Yamauchi //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 29775a620SHiroshi Yamauchi // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69775a620SHiroshi Yamauchi // 79775a620SHiroshi Yamauchi //===----------------------------------------------------------------------===// 89775a620SHiroshi Yamauchi // 99775a620SHiroshi Yamauchi // This pass merges conditional blocks of code and reduces the number of 109775a620SHiroshi Yamauchi // conditional branches in the hot paths based on profiles. 119775a620SHiroshi Yamauchi // 129775a620SHiroshi Yamauchi //===----------------------------------------------------------------------===// 139775a620SHiroshi Yamauchi 149775a620SHiroshi Yamauchi #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 159775a620SHiroshi Yamauchi #include "llvm/ADT/DenseMap.h" 169775a620SHiroshi Yamauchi #include "llvm/ADT/DenseSet.h" 179775a620SHiroshi Yamauchi #include "llvm/ADT/SmallVector.h" 189775a620SHiroshi Yamauchi #include "llvm/ADT/StringSet.h" 199775a620SHiroshi Yamauchi #include "llvm/Analysis/BlockFrequencyInfo.h" 209abad481SBenjamin Kramer #include "llvm/Analysis/GlobalsModRef.h" 21fd2c699dSHiroshi Yamauchi #include "llvm/Analysis/OptimizationRemarkEmitter.h" 229775a620SHiroshi Yamauchi #include "llvm/Analysis/ProfileSummaryInfo.h" 239775a620SHiroshi Yamauchi #include "llvm/Analysis/RegionInfo.h" 249775a620SHiroshi Yamauchi #include "llvm/Analysis/RegionIterator.h" 259775a620SHiroshi Yamauchi #include "llvm/Analysis/ValueTracking.h" 269775a620SHiroshi Yamauchi #include "llvm/IR/CFG.h" 279775a620SHiroshi Yamauchi #include "llvm/IR/Dominators.h" 289775a620SHiroshi Yamauchi #include "llvm/IR/IRBuilder.h" 2926a0d53bSWei Wang #include "llvm/IR/IntrinsicInst.h" 309775a620SHiroshi Yamauchi #include "llvm/IR/MDBuilder.h" 316b9524a0SArthur Eubanks #include "llvm/IR/PassManager.h" 3205da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 339775a620SHiroshi Yamauchi #include "llvm/Support/BranchProbability.h" 344c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h" 359775a620SHiroshi Yamauchi #include "llvm/Support/MemoryBuffer.h" 369abad481SBenjamin Kramer #include "llvm/Transforms/Utils.h" 379abad481SBenjamin Kramer #include "llvm/Transforms/Utils/BasicBlockUtils.h" 389abad481SBenjamin Kramer #include "llvm/Transforms/Utils/Cloning.h" 399abad481SBenjamin Kramer #include "llvm/Transforms/Utils/ValueMapper.h" 409775a620SHiroshi Yamauchi 419775a620SHiroshi Yamauchi #include <set> 429775a620SHiroshi Yamauchi #include <sstream> 439775a620SHiroshi Yamauchi 449775a620SHiroshi Yamauchi using namespace llvm; 459775a620SHiroshi Yamauchi 469775a620SHiroshi Yamauchi #define DEBUG_TYPE "chr" 479775a620SHiroshi Yamauchi 489775a620SHiroshi Yamauchi #define CHR_DEBUG(X) LLVM_DEBUG(X) 499775a620SHiroshi Yamauchi 509775a620SHiroshi Yamauchi static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 519775a620SHiroshi Yamauchi cl::desc("Apply CHR for all functions")); 529775a620SHiroshi Yamauchi 539775a620SHiroshi Yamauchi static cl::opt<double> CHRBiasThreshold( 549775a620SHiroshi Yamauchi "chr-bias-threshold", cl::init(0.99), cl::Hidden, 559775a620SHiroshi Yamauchi cl::desc("CHR considers a branch bias greater than this ratio as biased")); 569775a620SHiroshi Yamauchi 579775a620SHiroshi Yamauchi static cl::opt<unsigned> CHRMergeThreshold( 589775a620SHiroshi Yamauchi "chr-merge-threshold", cl::init(2), cl::Hidden, 599775a620SHiroshi Yamauchi cl::desc("CHR merges a group of N branches/selects where N >= this value")); 609775a620SHiroshi Yamauchi 619775a620SHiroshi Yamauchi static cl::opt<std::string> CHRModuleList( 629775a620SHiroshi Yamauchi "chr-module-list", cl::init(""), cl::Hidden, 639775a620SHiroshi Yamauchi cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 649775a620SHiroshi Yamauchi 659775a620SHiroshi Yamauchi static cl::opt<std::string> CHRFunctionList( 669775a620SHiroshi Yamauchi "chr-function-list", cl::init(""), cl::Hidden, 679775a620SHiroshi Yamauchi cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 689775a620SHiroshi Yamauchi 699775a620SHiroshi Yamauchi static StringSet<> CHRModules; 709775a620SHiroshi Yamauchi static StringSet<> CHRFunctions; 719775a620SHiroshi Yamauchi 72b3b61de0SFangrui Song static void parseCHRFilterFiles() { 739775a620SHiroshi Yamauchi if (!CHRModuleList.empty()) { 749775a620SHiroshi Yamauchi auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 759775a620SHiroshi Yamauchi if (!FileOrErr) { 769775a620SHiroshi Yamauchi errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 779775a620SHiroshi Yamauchi std::exit(1); 789775a620SHiroshi Yamauchi } 799775a620SHiroshi Yamauchi StringRef Buf = FileOrErr->get()->getBuffer(); 809775a620SHiroshi Yamauchi SmallVector<StringRef, 0> Lines; 819775a620SHiroshi Yamauchi Buf.split(Lines, '\n'); 829775a620SHiroshi Yamauchi for (StringRef Line : Lines) { 839775a620SHiroshi Yamauchi Line = Line.trim(); 849775a620SHiroshi Yamauchi if (!Line.empty()) 859775a620SHiroshi Yamauchi CHRModules.insert(Line); 869775a620SHiroshi Yamauchi } 879775a620SHiroshi Yamauchi } 889775a620SHiroshi Yamauchi if (!CHRFunctionList.empty()) { 899775a620SHiroshi Yamauchi auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 909775a620SHiroshi Yamauchi if (!FileOrErr) { 919775a620SHiroshi Yamauchi errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 929775a620SHiroshi Yamauchi std::exit(1); 939775a620SHiroshi Yamauchi } 949775a620SHiroshi Yamauchi StringRef Buf = FileOrErr->get()->getBuffer(); 959775a620SHiroshi Yamauchi SmallVector<StringRef, 0> Lines; 969775a620SHiroshi Yamauchi Buf.split(Lines, '\n'); 979775a620SHiroshi Yamauchi for (StringRef Line : Lines) { 989775a620SHiroshi Yamauchi Line = Line.trim(); 999775a620SHiroshi Yamauchi if (!Line.empty()) 1009775a620SHiroshi Yamauchi CHRFunctions.insert(Line); 1019775a620SHiroshi Yamauchi } 1029775a620SHiroshi Yamauchi } 1039775a620SHiroshi Yamauchi } 1049775a620SHiroshi Yamauchi 1059775a620SHiroshi Yamauchi namespace { 1069775a620SHiroshi Yamauchi class ControlHeightReductionLegacyPass : public FunctionPass { 1079775a620SHiroshi Yamauchi public: 1089775a620SHiroshi Yamauchi static char ID; 1099775a620SHiroshi Yamauchi 1109775a620SHiroshi Yamauchi ControlHeightReductionLegacyPass() : FunctionPass(ID) { 1119775a620SHiroshi Yamauchi initializeControlHeightReductionLegacyPassPass( 1129775a620SHiroshi Yamauchi *PassRegistry::getPassRegistry()); 113b3b61de0SFangrui Song parseCHRFilterFiles(); 1149775a620SHiroshi Yamauchi } 1159775a620SHiroshi Yamauchi 1169775a620SHiroshi Yamauchi bool runOnFunction(Function &F) override; 1179775a620SHiroshi Yamauchi void getAnalysisUsage(AnalysisUsage &AU) const override { 1189775a620SHiroshi Yamauchi AU.addRequired<BlockFrequencyInfoWrapperPass>(); 1199775a620SHiroshi Yamauchi AU.addRequired<DominatorTreeWrapperPass>(); 1209775a620SHiroshi Yamauchi AU.addRequired<ProfileSummaryInfoWrapperPass>(); 1219775a620SHiroshi Yamauchi AU.addRequired<RegionInfoPass>(); 1229775a620SHiroshi Yamauchi AU.addPreserved<GlobalsAAWrapperPass>(); 1239775a620SHiroshi Yamauchi } 1249775a620SHiroshi Yamauchi }; 1259775a620SHiroshi Yamauchi } // end anonymous namespace 1269775a620SHiroshi Yamauchi 1279775a620SHiroshi Yamauchi char ControlHeightReductionLegacyPass::ID = 0; 1289775a620SHiroshi Yamauchi 1299775a620SHiroshi Yamauchi INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 1309775a620SHiroshi Yamauchi "chr", 1319775a620SHiroshi Yamauchi "Reduce control height in the hot paths", 1329775a620SHiroshi Yamauchi false, false) 1339775a620SHiroshi Yamauchi INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 1349775a620SHiroshi Yamauchi INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1359775a620SHiroshi Yamauchi INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 1369775a620SHiroshi Yamauchi INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 1379775a620SHiroshi Yamauchi INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 1389775a620SHiroshi Yamauchi "chr", 1399775a620SHiroshi Yamauchi "Reduce control height in the hot paths", 1409775a620SHiroshi Yamauchi false, false) 1419775a620SHiroshi Yamauchi 1429775a620SHiroshi Yamauchi FunctionPass *llvm::createControlHeightReductionLegacyPass() { 1439775a620SHiroshi Yamauchi return new ControlHeightReductionLegacyPass(); 1449775a620SHiroshi Yamauchi } 1459775a620SHiroshi Yamauchi 1469775a620SHiroshi Yamauchi namespace { 1479775a620SHiroshi Yamauchi 1489775a620SHiroshi Yamauchi struct CHRStats { 149fda6a1adSKazu Hirata CHRStats() = default; 1509775a620SHiroshi Yamauchi void print(raw_ostream &OS) const { 1519775a620SHiroshi Yamauchi OS << "CHRStats: NumBranches " << NumBranches 1529775a620SHiroshi Yamauchi << " NumBranchesDelta " << NumBranchesDelta 1539775a620SHiroshi Yamauchi << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 1549775a620SHiroshi Yamauchi } 155fda6a1adSKazu Hirata // The original number of conditional branches / selects 156fda6a1adSKazu Hirata uint64_t NumBranches = 0; 157fda6a1adSKazu Hirata // The decrease of the number of conditional branches / selects in the hot 158fda6a1adSKazu Hirata // paths due to CHR. 159fda6a1adSKazu Hirata uint64_t NumBranchesDelta = 0; 160fda6a1adSKazu Hirata // NumBranchesDelta weighted by the profile count at the scope entry. 161fda6a1adSKazu Hirata uint64_t WeightedNumBranchesDelta = 0; 1629775a620SHiroshi Yamauchi }; 1639775a620SHiroshi Yamauchi 1649775a620SHiroshi Yamauchi // RegInfo - some properties of a Region. 1659775a620SHiroshi Yamauchi struct RegInfo { 166*df792bcbSKazu Hirata RegInfo() = default; 167*df792bcbSKazu Hirata RegInfo(Region *RegionIn) : R(RegionIn) {} 168*df792bcbSKazu Hirata Region *R = nullptr; 169*df792bcbSKazu Hirata bool HasBranch = false; 1709775a620SHiroshi Yamauchi SmallVector<SelectInst *, 8> Selects; 1719775a620SHiroshi Yamauchi }; 1729775a620SHiroshi Yamauchi 1739775a620SHiroshi Yamauchi typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 1749775a620SHiroshi Yamauchi 1759775a620SHiroshi Yamauchi // CHRScope - a sequence of regions to CHR together. It corresponds to a 1769775a620SHiroshi Yamauchi // sequence of conditional blocks. It can have subscopes which correspond to 1779775a620SHiroshi Yamauchi // nested conditional blocks. Nested CHRScopes form a tree. 1789775a620SHiroshi Yamauchi class CHRScope { 1799775a620SHiroshi Yamauchi public: 1809775a620SHiroshi Yamauchi CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 1819775a620SHiroshi Yamauchi assert(RI.R && "Null RegionIn"); 1829775a620SHiroshi Yamauchi RegInfos.push_back(RI); 1839775a620SHiroshi Yamauchi } 1849775a620SHiroshi Yamauchi 1859775a620SHiroshi Yamauchi Region *getParentRegion() { 1869775a620SHiroshi Yamauchi assert(RegInfos.size() > 0 && "Empty CHRScope"); 1879775a620SHiroshi Yamauchi Region *Parent = RegInfos[0].R->getParent(); 1889775a620SHiroshi Yamauchi assert(Parent && "Unexpected to call this on the top-level region"); 1899775a620SHiroshi Yamauchi return Parent; 1909775a620SHiroshi Yamauchi } 1919775a620SHiroshi Yamauchi 1929775a620SHiroshi Yamauchi BasicBlock *getEntryBlock() { 1939775a620SHiroshi Yamauchi assert(RegInfos.size() > 0 && "Empty CHRScope"); 1949775a620SHiroshi Yamauchi return RegInfos.front().R->getEntry(); 1959775a620SHiroshi Yamauchi } 1969775a620SHiroshi Yamauchi 1979775a620SHiroshi Yamauchi BasicBlock *getExitBlock() { 1989775a620SHiroshi Yamauchi assert(RegInfos.size() > 0 && "Empty CHRScope"); 1999775a620SHiroshi Yamauchi return RegInfos.back().R->getExit(); 2009775a620SHiroshi Yamauchi } 2019775a620SHiroshi Yamauchi 2029775a620SHiroshi Yamauchi bool appendable(CHRScope *Next) { 2039775a620SHiroshi Yamauchi // The next scope is appendable only if this scope is directly connected to 2049775a620SHiroshi Yamauchi // it (which implies it post-dominates this scope) and this scope dominates 2059775a620SHiroshi Yamauchi // it (no edge to the next scope outside this scope). 2069775a620SHiroshi Yamauchi BasicBlock *NextEntry = Next->getEntryBlock(); 2079775a620SHiroshi Yamauchi if (getExitBlock() != NextEntry) 2089775a620SHiroshi Yamauchi // Not directly connected. 2099775a620SHiroshi Yamauchi return false; 2109775a620SHiroshi Yamauchi Region *LastRegion = RegInfos.back().R; 2119775a620SHiroshi Yamauchi for (BasicBlock *Pred : predecessors(NextEntry)) 2129775a620SHiroshi Yamauchi if (!LastRegion->contains(Pred)) 2139775a620SHiroshi Yamauchi // There's an edge going into the entry of the next scope from outside 2149775a620SHiroshi Yamauchi // of this scope. 2159775a620SHiroshi Yamauchi return false; 2169775a620SHiroshi Yamauchi return true; 2179775a620SHiroshi Yamauchi } 2189775a620SHiroshi Yamauchi 2199775a620SHiroshi Yamauchi void append(CHRScope *Next) { 2209775a620SHiroshi Yamauchi assert(RegInfos.size() > 0 && "Empty CHRScope"); 2219775a620SHiroshi Yamauchi assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 2229775a620SHiroshi Yamauchi assert(getParentRegion() == Next->getParentRegion() && 2239775a620SHiroshi Yamauchi "Must be siblings"); 2249775a620SHiroshi Yamauchi assert(getExitBlock() == Next->getEntryBlock() && 2259775a620SHiroshi Yamauchi "Must be adjacent"); 226f1542efdSBenjamin Kramer RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 227f1542efdSBenjamin Kramer Subs.append(Next->Subs.begin(), Next->Subs.end()); 2289775a620SHiroshi Yamauchi } 2299775a620SHiroshi Yamauchi 2309775a620SHiroshi Yamauchi void addSub(CHRScope *SubIn) { 2319775a620SHiroshi Yamauchi #ifndef NDEBUG 232b3b61de0SFangrui Song bool IsChild = false; 2339775a620SHiroshi Yamauchi for (RegInfo &RI : RegInfos) 2349775a620SHiroshi Yamauchi if (RI.R == SubIn->getParentRegion()) { 235b3b61de0SFangrui Song IsChild = true; 2369775a620SHiroshi Yamauchi break; 2379775a620SHiroshi Yamauchi } 238b3b61de0SFangrui Song assert(IsChild && "Must be a child"); 2399775a620SHiroshi Yamauchi #endif 2409775a620SHiroshi Yamauchi Subs.push_back(SubIn); 2419775a620SHiroshi Yamauchi } 2429775a620SHiroshi Yamauchi 2439775a620SHiroshi Yamauchi // Split this scope at the boundary region into two, which will belong to the 2449775a620SHiroshi Yamauchi // tail and returns the tail. 2459775a620SHiroshi Yamauchi CHRScope *split(Region *Boundary) { 2469775a620SHiroshi Yamauchi assert(Boundary && "Boundary null"); 2479775a620SHiroshi Yamauchi assert(RegInfos.begin()->R != Boundary && 2489775a620SHiroshi Yamauchi "Can't be split at beginning"); 249f1542efdSBenjamin Kramer auto BoundaryIt = llvm::find_if( 250f1542efdSBenjamin Kramer RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 2519775a620SHiroshi Yamauchi if (BoundaryIt == RegInfos.end()) 2529775a620SHiroshi Yamauchi return nullptr; 253f1542efdSBenjamin Kramer ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 2549775a620SHiroshi Yamauchi DenseSet<Region *> TailRegionSet; 255f1542efdSBenjamin Kramer for (const RegInfo &RI : TailRegInfos) 2569775a620SHiroshi Yamauchi TailRegionSet.insert(RI.R); 257f1542efdSBenjamin Kramer 258f1542efdSBenjamin Kramer auto TailIt = 259f1542efdSBenjamin Kramer std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 2609775a620SHiroshi Yamauchi assert(Sub && "null Sub"); 2619775a620SHiroshi Yamauchi Region *Parent = Sub->getParentRegion(); 262f1542efdSBenjamin Kramer if (TailRegionSet.count(Parent)) 263f1542efdSBenjamin Kramer return false; 264f1542efdSBenjamin Kramer 265df812115SKazu Hirata assert(llvm::any_of( 266df812115SKazu Hirata RegInfos, 267df812115SKazu Hirata [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 2689775a620SHiroshi Yamauchi "Must be in head"); 269f1542efdSBenjamin Kramer return true; 270f1542efdSBenjamin Kramer }); 271f1542efdSBenjamin Kramer ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 272f1542efdSBenjamin Kramer 2739775a620SHiroshi Yamauchi assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 274f1542efdSBenjamin Kramer auto *Scope = new CHRScope(TailRegInfos, TailSubs); 275f1542efdSBenjamin Kramer RegInfos.erase(BoundaryIt, RegInfos.end()); 276f1542efdSBenjamin Kramer Subs.erase(TailIt, Subs.end()); 277f1542efdSBenjamin Kramer return Scope; 2789775a620SHiroshi Yamauchi } 2799775a620SHiroshi Yamauchi 2809775a620SHiroshi Yamauchi bool contains(Instruction *I) const { 2819775a620SHiroshi Yamauchi BasicBlock *Parent = I->getParent(); 2829775a620SHiroshi Yamauchi for (const RegInfo &RI : RegInfos) 2839775a620SHiroshi Yamauchi if (RI.R->contains(Parent)) 2849775a620SHiroshi Yamauchi return true; 2859775a620SHiroshi Yamauchi return false; 2869775a620SHiroshi Yamauchi } 2879775a620SHiroshi Yamauchi 2889775a620SHiroshi Yamauchi void print(raw_ostream &OS) const; 2899775a620SHiroshi Yamauchi 2909775a620SHiroshi Yamauchi SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 2919775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> Subs; // Subscopes. 2929775a620SHiroshi Yamauchi 2939775a620SHiroshi Yamauchi // The instruction at which to insert the CHR conditional branch (and hoist 2949775a620SHiroshi Yamauchi // the dependent condition values). 2959775a620SHiroshi Yamauchi Instruction *BranchInsertPoint; 2969775a620SHiroshi Yamauchi 2979775a620SHiroshi Yamauchi // True-biased and false-biased regions (conditional blocks), 2989775a620SHiroshi Yamauchi // respectively. Used only for the outermost scope and includes regions in 2999775a620SHiroshi Yamauchi // subscopes. The rest are unbiased. 3009775a620SHiroshi Yamauchi DenseSet<Region *> TrueBiasedRegions; 3019775a620SHiroshi Yamauchi DenseSet<Region *> FalseBiasedRegions; 3029775a620SHiroshi Yamauchi // Among the biased regions, the regions that get CHRed. 3039775a620SHiroshi Yamauchi SmallVector<RegInfo, 8> CHRRegions; 3049775a620SHiroshi Yamauchi 3059775a620SHiroshi Yamauchi // True-biased and false-biased selects, respectively. Used only for the 3069775a620SHiroshi Yamauchi // outermost scope and includes ones in subscopes. 3079775a620SHiroshi Yamauchi DenseSet<SelectInst *> TrueBiasedSelects; 3089775a620SHiroshi Yamauchi DenseSet<SelectInst *> FalseBiasedSelects; 3099775a620SHiroshi Yamauchi 3109775a620SHiroshi Yamauchi // Map from one of the above regions to the instructions to stop 3119775a620SHiroshi Yamauchi // hoisting instructions at through use-def chains. 3129775a620SHiroshi Yamauchi HoistStopMapTy HoistStopMap; 3139775a620SHiroshi Yamauchi 3149775a620SHiroshi Yamauchi private: 315f1542efdSBenjamin Kramer CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 316f1542efdSBenjamin Kramer : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 317f1542efdSBenjamin Kramer Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 3189775a620SHiroshi Yamauchi }; 3199775a620SHiroshi Yamauchi 3209775a620SHiroshi Yamauchi class CHR { 3219775a620SHiroshi Yamauchi public: 3229775a620SHiroshi Yamauchi CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323fd2c699dSHiroshi Yamauchi ProfileSummaryInfo &PSIin, RegionInfo &RIin, 324fd2c699dSHiroshi Yamauchi OptimizationRemarkEmitter &OREin) 325fd2c699dSHiroshi Yamauchi : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 3269775a620SHiroshi Yamauchi 3279775a620SHiroshi Yamauchi ~CHR() { 3289775a620SHiroshi Yamauchi for (CHRScope *Scope : Scopes) { 3299775a620SHiroshi Yamauchi delete Scope; 3309775a620SHiroshi Yamauchi } 3319775a620SHiroshi Yamauchi } 3329775a620SHiroshi Yamauchi 3339775a620SHiroshi Yamauchi bool run(); 3349775a620SHiroshi Yamauchi 3359775a620SHiroshi Yamauchi private: 3369775a620SHiroshi Yamauchi // See the comments in CHR::run() for the high level flow of the algorithm and 3379775a620SHiroshi Yamauchi // what the following functions do. 3389775a620SHiroshi Yamauchi 3399775a620SHiroshi Yamauchi void findScopes(SmallVectorImpl<CHRScope *> &Output) { 3409775a620SHiroshi Yamauchi Region *R = RI.getTopLevelRegion(); 341f1542efdSBenjamin Kramer if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 3429775a620SHiroshi Yamauchi Output.push_back(Scope); 3439775a620SHiroshi Yamauchi } 3449775a620SHiroshi Yamauchi } 3459775a620SHiroshi Yamauchi CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 3469775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Scopes); 3479775a620SHiroshi Yamauchi CHRScope *findScope(Region *R); 3489775a620SHiroshi Yamauchi void checkScopeHoistable(CHRScope *Scope); 3499775a620SHiroshi Yamauchi 3509775a620SHiroshi Yamauchi void splitScopes(SmallVectorImpl<CHRScope *> &Input, 3519775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output); 3529775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 3539775a620SHiroshi Yamauchi CHRScope *Outer, 3549775a620SHiroshi Yamauchi DenseSet<Value *> *OuterConditionValues, 3559775a620SHiroshi Yamauchi Instruction *OuterInsertPoint, 3569775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output, 3579775a620SHiroshi Yamauchi DenseSet<Instruction *> &Unhoistables); 3589775a620SHiroshi Yamauchi 3599775a620SHiroshi Yamauchi void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 3609775a620SHiroshi Yamauchi void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 3619775a620SHiroshi Yamauchi 3629775a620SHiroshi Yamauchi void filterScopes(SmallVectorImpl<CHRScope *> &Input, 3639775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output); 3649775a620SHiroshi Yamauchi 3659775a620SHiroshi Yamauchi void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 3669775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output); 3679775a620SHiroshi Yamauchi void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 3689775a620SHiroshi Yamauchi 3699775a620SHiroshi Yamauchi void sortScopes(SmallVectorImpl<CHRScope *> &Input, 3709775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output); 3719775a620SHiroshi Yamauchi 3729775a620SHiroshi Yamauchi void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 3739775a620SHiroshi Yamauchi void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 3749775a620SHiroshi Yamauchi void cloneScopeBlocks(CHRScope *Scope, 3759775a620SHiroshi Yamauchi BasicBlock *PreEntryBlock, 3769775a620SHiroshi Yamauchi BasicBlock *ExitBlock, 3779775a620SHiroshi Yamauchi Region *LastRegion, 3789775a620SHiroshi Yamauchi ValueToValueMapTy &VMap); 3799775a620SHiroshi Yamauchi BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 3809775a620SHiroshi Yamauchi BasicBlock *EntryBlock, 3819775a620SHiroshi Yamauchi BasicBlock *NewEntryBlock, 3829775a620SHiroshi Yamauchi ValueToValueMapTy &VMap); 3839775a620SHiroshi Yamauchi void fixupBranchesAndSelects(CHRScope *Scope, 3849775a620SHiroshi Yamauchi BasicBlock *PreEntryBlock, 3859775a620SHiroshi Yamauchi BranchInst *MergedBR, 3869775a620SHiroshi Yamauchi uint64_t ProfileCount); 3879775a620SHiroshi Yamauchi void fixupBranch(Region *R, 3889775a620SHiroshi Yamauchi CHRScope *Scope, 3899775a620SHiroshi Yamauchi IRBuilder<> &IRB, 3909775a620SHiroshi Yamauchi Value *&MergedCondition, BranchProbability &CHRBranchBias); 3919775a620SHiroshi Yamauchi void fixupSelect(SelectInst* SI, 3929775a620SHiroshi Yamauchi CHRScope *Scope, 3939775a620SHiroshi Yamauchi IRBuilder<> &IRB, 3949775a620SHiroshi Yamauchi Value *&MergedCondition, BranchProbability &CHRBranchBias); 3959775a620SHiroshi Yamauchi void addToMergedCondition(bool IsTrueBiased, Value *Cond, 3969775a620SHiroshi Yamauchi Instruction *BranchOrSelect, 3979775a620SHiroshi Yamauchi CHRScope *Scope, 3989775a620SHiroshi Yamauchi IRBuilder<> &IRB, 3999775a620SHiroshi Yamauchi Value *&MergedCondition); 4009775a620SHiroshi Yamauchi 4019775a620SHiroshi Yamauchi Function &F; 4029775a620SHiroshi Yamauchi BlockFrequencyInfo &BFI; 4039775a620SHiroshi Yamauchi DominatorTree &DT; 4049775a620SHiroshi Yamauchi ProfileSummaryInfo &PSI; 4059775a620SHiroshi Yamauchi RegionInfo &RI; 406fd2c699dSHiroshi Yamauchi OptimizationRemarkEmitter &ORE; 4079775a620SHiroshi Yamauchi CHRStats Stats; 4089775a620SHiroshi Yamauchi 4099775a620SHiroshi Yamauchi // All the true-biased regions in the function 4109775a620SHiroshi Yamauchi DenseSet<Region *> TrueBiasedRegionsGlobal; 4119775a620SHiroshi Yamauchi // All the false-biased regions in the function 4129775a620SHiroshi Yamauchi DenseSet<Region *> FalseBiasedRegionsGlobal; 4139775a620SHiroshi Yamauchi // All the true-biased selects in the function 4149775a620SHiroshi Yamauchi DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 4159775a620SHiroshi Yamauchi // All the false-biased selects in the function 4169775a620SHiroshi Yamauchi DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 4179775a620SHiroshi Yamauchi // A map from biased regions to their branch bias 4189775a620SHiroshi Yamauchi DenseMap<Region *, BranchProbability> BranchBiasMap; 4199775a620SHiroshi Yamauchi // A map from biased selects to their branch bias 4209775a620SHiroshi Yamauchi DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 4219775a620SHiroshi Yamauchi // All the scopes. 4229775a620SHiroshi Yamauchi DenseSet<CHRScope *> Scopes; 4239775a620SHiroshi Yamauchi }; 4249775a620SHiroshi Yamauchi 4259775a620SHiroshi Yamauchi } // end anonymous namespace 4269775a620SHiroshi Yamauchi 4275fb509b7SHiroshi Yamauchi static inline 4285fb509b7SHiroshi Yamauchi raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 4295fb509b7SHiroshi Yamauchi const CHRStats &Stats) { 4305fb509b7SHiroshi Yamauchi Stats.print(OS); 4315fb509b7SHiroshi Yamauchi return OS; 4325fb509b7SHiroshi Yamauchi } 4335fb509b7SHiroshi Yamauchi 4345fb509b7SHiroshi Yamauchi static inline 4355fb509b7SHiroshi Yamauchi raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 4365fb509b7SHiroshi Yamauchi Scope.print(OS); 4375fb509b7SHiroshi Yamauchi return OS; 4385fb509b7SHiroshi Yamauchi } 4395fb509b7SHiroshi Yamauchi 4409775a620SHiroshi Yamauchi static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 4419775a620SHiroshi Yamauchi if (ForceCHR) 4429775a620SHiroshi Yamauchi return true; 4439775a620SHiroshi Yamauchi 4449775a620SHiroshi Yamauchi if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 4459775a620SHiroshi Yamauchi if (CHRModules.count(F.getParent()->getName())) 4469775a620SHiroshi Yamauchi return true; 4475fb509b7SHiroshi Yamauchi return CHRFunctions.count(F.getName()); 4489775a620SHiroshi Yamauchi } 4499775a620SHiroshi Yamauchi 4509775a620SHiroshi Yamauchi assert(PSI.hasProfileSummary() && "Empty PSI?"); 4519775a620SHiroshi Yamauchi return PSI.isFunctionEntryHot(&F); 4529775a620SHiroshi Yamauchi } 4539775a620SHiroshi Yamauchi 454c8f348cbSFangrui Song static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 455c8f348cbSFangrui Song CHRStats *Stats) { 4565fb509b7SHiroshi Yamauchi StringRef FuncName = F.getName(); 4575fb509b7SHiroshi Yamauchi StringRef ModuleName = F.getParent()->getName(); 45806650941SHiroshi Yamauchi (void)(FuncName); // Unused in release build. 45906650941SHiroshi Yamauchi (void)(ModuleName); // Unused in release build. 4609775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 4615fb509b7SHiroshi Yamauchi << FuncName); 4629775a620SHiroshi Yamauchi if (Stats) 4639775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << " " << *Stats); 4649775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "\n"); 4659775a620SHiroshi Yamauchi CHR_DEBUG(F.dump()); 4669775a620SHiroshi Yamauchi } 4679775a620SHiroshi Yamauchi 4689775a620SHiroshi Yamauchi void CHRScope::print(raw_ostream &OS) const { 4699775a620SHiroshi Yamauchi assert(RegInfos.size() > 0 && "Empty CHRScope"); 4709775a620SHiroshi Yamauchi OS << "CHRScope["; 4719775a620SHiroshi Yamauchi OS << RegInfos.size() << ", Regions["; 4729775a620SHiroshi Yamauchi for (const RegInfo &RI : RegInfos) { 4739775a620SHiroshi Yamauchi OS << RI.R->getNameStr(); 4749775a620SHiroshi Yamauchi if (RI.HasBranch) 4759775a620SHiroshi Yamauchi OS << " B"; 4769775a620SHiroshi Yamauchi if (RI.Selects.size() > 0) 4779775a620SHiroshi Yamauchi OS << " S" << RI.Selects.size(); 4789775a620SHiroshi Yamauchi OS << ", "; 4799775a620SHiroshi Yamauchi } 4809775a620SHiroshi Yamauchi if (RegInfos[0].R->getParent()) { 4819775a620SHiroshi Yamauchi OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 4829775a620SHiroshi Yamauchi } else { 4839775a620SHiroshi Yamauchi // top level region 4849775a620SHiroshi Yamauchi OS << "]"; 4859775a620SHiroshi Yamauchi } 4869775a620SHiroshi Yamauchi OS << ", Subs["; 4879775a620SHiroshi Yamauchi for (CHRScope *Sub : Subs) { 4889775a620SHiroshi Yamauchi OS << *Sub << ", "; 4899775a620SHiroshi Yamauchi } 4909775a620SHiroshi Yamauchi OS << "]]"; 4919775a620SHiroshi Yamauchi } 4929775a620SHiroshi Yamauchi 4939775a620SHiroshi Yamauchi // Return true if the given instruction type can be hoisted by CHR. 4949775a620SHiroshi Yamauchi static bool isHoistableInstructionType(Instruction *I) { 4959775a620SHiroshi Yamauchi return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 4969775a620SHiroshi Yamauchi isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 4979775a620SHiroshi Yamauchi isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 4989775a620SHiroshi Yamauchi isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 4999775a620SHiroshi Yamauchi isa<InsertValueInst>(I); 5009775a620SHiroshi Yamauchi } 5019775a620SHiroshi Yamauchi 5029775a620SHiroshi Yamauchi // Return true if the given instruction can be hoisted by CHR. 5039775a620SHiroshi Yamauchi static bool isHoistable(Instruction *I, DominatorTree &DT) { 5049775a620SHiroshi Yamauchi if (!isHoistableInstructionType(I)) 5059775a620SHiroshi Yamauchi return false; 5069775a620SHiroshi Yamauchi return isSafeToSpeculativelyExecute(I, nullptr, &DT); 5079775a620SHiroshi Yamauchi } 5089775a620SHiroshi Yamauchi 5099775a620SHiroshi Yamauchi // Recursively traverse the use-def chains of the given value and return a set 5109775a620SHiroshi Yamauchi // of the unhoistable base values defined within the scope (excluding the 5119775a620SHiroshi Yamauchi // first-region entry block) or the (hoistable or unhoistable) base values that 5129775a620SHiroshi Yamauchi // are defined outside (including the first-region entry block) of the 5139775a620SHiroshi Yamauchi // scope. The returned set doesn't include constants. 514f1542efdSBenjamin Kramer static const std::set<Value *> & 515f1542efdSBenjamin Kramer getBaseValues(Value *V, DominatorTree &DT, 516d842f2eeSHiroshi Yamauchi DenseMap<Value *, std::set<Value *>> &Visited) { 517f1542efdSBenjamin Kramer auto It = Visited.find(V); 518f1542efdSBenjamin Kramer if (It != Visited.end()) { 519f1542efdSBenjamin Kramer return It->second; 520d842f2eeSHiroshi Yamauchi } 5219775a620SHiroshi Yamauchi std::set<Value *> Result; 5229775a620SHiroshi Yamauchi if (auto *I = dyn_cast<Instruction>(V)) { 523f1542efdSBenjamin Kramer // We don't stop at a block that's not in the Scope because we would miss 524f1542efdSBenjamin Kramer // some instructions that are based on the same base values if we stop 525f1542efdSBenjamin Kramer // there. 5269775a620SHiroshi Yamauchi if (!isHoistable(I, DT)) { 5279775a620SHiroshi Yamauchi Result.insert(I); 528f1542efdSBenjamin Kramer return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 5299775a620SHiroshi Yamauchi } 5309775a620SHiroshi Yamauchi // I is hoistable above the Scope. 5319775a620SHiroshi Yamauchi for (Value *Op : I->operands()) { 532f1542efdSBenjamin Kramer const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 5339775a620SHiroshi Yamauchi Result.insert(OpResult.begin(), OpResult.end()); 5349775a620SHiroshi Yamauchi } 535f1542efdSBenjamin Kramer return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 5369775a620SHiroshi Yamauchi } 5379775a620SHiroshi Yamauchi if (isa<Argument>(V)) { 5389775a620SHiroshi Yamauchi Result.insert(V); 5399775a620SHiroshi Yamauchi } 5409775a620SHiroshi Yamauchi // We don't include others like constants because those won't lead to any 5419775a620SHiroshi Yamauchi // chance of folding of conditions (eg two bit checks merged into one check) 5429775a620SHiroshi Yamauchi // after CHR. 543f1542efdSBenjamin Kramer return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 5449775a620SHiroshi Yamauchi } 5459775a620SHiroshi Yamauchi 5469775a620SHiroshi Yamauchi // Return true if V is already hoisted or can be hoisted (along with its 5479775a620SHiroshi Yamauchi // operands) above the insert point. When it returns true and HoistStops is 5489775a620SHiroshi Yamauchi // non-null, the instructions to stop hoisting at through the use-def chains are 5499775a620SHiroshi Yamauchi // inserted into HoistStops. 5509775a620SHiroshi Yamauchi static bool 5519775a620SHiroshi Yamauchi checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 5529775a620SHiroshi Yamauchi DenseSet<Instruction *> &Unhoistables, 553dfeb7974SHiroshi Yamauchi DenseSet<Instruction *> *HoistStops, 554dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> &Visited) { 5559775a620SHiroshi Yamauchi assert(InsertPoint && "Null InsertPoint"); 5569775a620SHiroshi Yamauchi if (auto *I = dyn_cast<Instruction>(V)) { 557f1542efdSBenjamin Kramer auto It = Visited.find(I); 558f1542efdSBenjamin Kramer if (It != Visited.end()) { 559f1542efdSBenjamin Kramer return It->second; 560dfeb7974SHiroshi Yamauchi } 5619775a620SHiroshi Yamauchi assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 5629775a620SHiroshi Yamauchi assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 5639775a620SHiroshi Yamauchi if (Unhoistables.count(I)) { 5649775a620SHiroshi Yamauchi // Don't hoist if they are not to be hoisted. 565dfeb7974SHiroshi Yamauchi Visited[I] = false; 5669775a620SHiroshi Yamauchi return false; 5679775a620SHiroshi Yamauchi } 5689775a620SHiroshi Yamauchi if (DT.dominates(I, InsertPoint)) { 5699775a620SHiroshi Yamauchi // We are already above the insert point. Stop here. 5709775a620SHiroshi Yamauchi if (HoistStops) 5719775a620SHiroshi Yamauchi HoistStops->insert(I); 572dfeb7974SHiroshi Yamauchi Visited[I] = true; 5739775a620SHiroshi Yamauchi return true; 5749775a620SHiroshi Yamauchi } 5759775a620SHiroshi Yamauchi // We aren't not above the insert point, check if we can hoist it above the 5769775a620SHiroshi Yamauchi // insert point. 5779775a620SHiroshi Yamauchi if (isHoistable(I, DT)) { 5789775a620SHiroshi Yamauchi // Check operands first. 5799775a620SHiroshi Yamauchi DenseSet<Instruction *> OpsHoistStops; 5809775a620SHiroshi Yamauchi bool AllOpsHoisted = true; 5819775a620SHiroshi Yamauchi for (Value *Op : I->operands()) { 582dfeb7974SHiroshi Yamauchi if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 583dfeb7974SHiroshi Yamauchi Visited)) { 5849775a620SHiroshi Yamauchi AllOpsHoisted = false; 5859775a620SHiroshi Yamauchi break; 5869775a620SHiroshi Yamauchi } 5879775a620SHiroshi Yamauchi } 5889775a620SHiroshi Yamauchi if (AllOpsHoisted) { 5899775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 5909775a620SHiroshi Yamauchi if (HoistStops) 5919775a620SHiroshi Yamauchi HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 592dfeb7974SHiroshi Yamauchi Visited[I] = true; 5939775a620SHiroshi Yamauchi return true; 5949775a620SHiroshi Yamauchi } 5959775a620SHiroshi Yamauchi } 596dfeb7974SHiroshi Yamauchi Visited[I] = false; 5979775a620SHiroshi Yamauchi return false; 5989775a620SHiroshi Yamauchi } 5999775a620SHiroshi Yamauchi // Non-instructions are considered hoistable. 6009775a620SHiroshi Yamauchi return true; 6019775a620SHiroshi Yamauchi } 6029775a620SHiroshi Yamauchi 6039775a620SHiroshi Yamauchi // Returns true and sets the true probability and false probability of an 6049775a620SHiroshi Yamauchi // MD_prof metadata if it's well-formed. 605b3b61de0SFangrui Song static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 6069775a620SHiroshi Yamauchi BranchProbability &FalseProb) { 6079775a620SHiroshi Yamauchi if (!MD) return false; 6089775a620SHiroshi Yamauchi MDString *MDName = cast<MDString>(MD->getOperand(0)); 6099775a620SHiroshi Yamauchi if (MDName->getString() != "branch_weights" || 6109775a620SHiroshi Yamauchi MD->getNumOperands() != 3) 6119775a620SHiroshi Yamauchi return false; 6129775a620SHiroshi Yamauchi ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 6139775a620SHiroshi Yamauchi ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 6149775a620SHiroshi Yamauchi if (!TrueWeight || !FalseWeight) 6159775a620SHiroshi Yamauchi return false; 61647c2bc58SRichard Trieu uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 61747c2bc58SRichard Trieu uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 61847c2bc58SRichard Trieu uint64_t SumWt = TrueWt + FalseWt; 61947c2bc58SRichard Trieu 62047c2bc58SRichard Trieu assert(SumWt >= TrueWt && SumWt >= FalseWt && 62147c2bc58SRichard Trieu "Overflow calculating branch probabilities."); 62247c2bc58SRichard Trieu 6237b9f8e17SHiroshi Yamauchi // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 6247b9f8e17SHiroshi Yamauchi if (SumWt == 0) 6257b9f8e17SHiroshi Yamauchi return false; 6267b9f8e17SHiroshi Yamauchi 62747c2bc58SRichard Trieu TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 62847c2bc58SRichard Trieu FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 6299775a620SHiroshi Yamauchi return true; 6309775a620SHiroshi Yamauchi } 6319775a620SHiroshi Yamauchi 6329775a620SHiroshi Yamauchi static BranchProbability getCHRBiasThreshold() { 6339775a620SHiroshi Yamauchi return BranchProbability::getBranchProbability( 6349775a620SHiroshi Yamauchi static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 6359775a620SHiroshi Yamauchi } 6369775a620SHiroshi Yamauchi 6379775a620SHiroshi Yamauchi // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 6389775a620SHiroshi Yamauchi // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 6399775a620SHiroshi Yamauchi // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 6409775a620SHiroshi Yamauchi // false. 6419775a620SHiroshi Yamauchi template <typename K, typename S, typename M> 642c55e9975SBenjamin Kramer static bool checkBias(K *Key, BranchProbability TrueProb, 643c55e9975SBenjamin Kramer BranchProbability FalseProb, S &TrueSet, S &FalseSet, 644c55e9975SBenjamin Kramer M &BiasMap) { 6459775a620SHiroshi Yamauchi BranchProbability Threshold = getCHRBiasThreshold(); 6469775a620SHiroshi Yamauchi if (TrueProb >= Threshold) { 6479775a620SHiroshi Yamauchi TrueSet.insert(Key); 6489775a620SHiroshi Yamauchi BiasMap[Key] = TrueProb; 6499775a620SHiroshi Yamauchi return true; 6509775a620SHiroshi Yamauchi } else if (FalseProb >= Threshold) { 6519775a620SHiroshi Yamauchi FalseSet.insert(Key); 6529775a620SHiroshi Yamauchi BiasMap[Key] = FalseProb; 6539775a620SHiroshi Yamauchi return true; 6549775a620SHiroshi Yamauchi } 6559775a620SHiroshi Yamauchi return false; 6569775a620SHiroshi Yamauchi } 6579775a620SHiroshi Yamauchi 6589775a620SHiroshi Yamauchi // Returns true and insert a region into the right biased set and the map if the 6599775a620SHiroshi Yamauchi // branch of the region is biased. 660b3b61de0SFangrui Song static bool checkBiasedBranch(BranchInst *BI, Region *R, 6619775a620SHiroshi Yamauchi DenseSet<Region *> &TrueBiasedRegionsGlobal, 6629775a620SHiroshi Yamauchi DenseSet<Region *> &FalseBiasedRegionsGlobal, 6639775a620SHiroshi Yamauchi DenseMap<Region *, BranchProbability> &BranchBiasMap) { 6649775a620SHiroshi Yamauchi if (!BI->isConditional()) 6659775a620SHiroshi Yamauchi return false; 6669775a620SHiroshi Yamauchi BranchProbability ThenProb, ElseProb; 667b3b61de0SFangrui Song if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 6689775a620SHiroshi Yamauchi ThenProb, ElseProb)) 6699775a620SHiroshi Yamauchi return false; 6709775a620SHiroshi Yamauchi BasicBlock *IfThen = BI->getSuccessor(0); 6719775a620SHiroshi Yamauchi BasicBlock *IfElse = BI->getSuccessor(1); 6729775a620SHiroshi Yamauchi assert((IfThen == R->getExit() || IfElse == R->getExit()) && 6739775a620SHiroshi Yamauchi IfThen != IfElse && 6749775a620SHiroshi Yamauchi "Invariant from findScopes"); 6759775a620SHiroshi Yamauchi if (IfThen == R->getExit()) { 6769775a620SHiroshi Yamauchi // Swap them so that IfThen/ThenProb means going into the conditional code 6779775a620SHiroshi Yamauchi // and IfElse/ElseProb means skipping it. 6789775a620SHiroshi Yamauchi std::swap(IfThen, IfElse); 6799775a620SHiroshi Yamauchi std::swap(ThenProb, ElseProb); 6809775a620SHiroshi Yamauchi } 6819775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "BI " << *BI << " "); 6829775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 6839775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 684b3b61de0SFangrui Song return checkBias(R, ThenProb, ElseProb, 6859775a620SHiroshi Yamauchi TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 6869775a620SHiroshi Yamauchi BranchBiasMap); 6879775a620SHiroshi Yamauchi } 6889775a620SHiroshi Yamauchi 6899775a620SHiroshi Yamauchi // Returns true and insert a select into the right biased set and the map if the 6909775a620SHiroshi Yamauchi // select is biased. 691b3b61de0SFangrui Song static bool checkBiasedSelect( 6929775a620SHiroshi Yamauchi SelectInst *SI, Region *R, 6939775a620SHiroshi Yamauchi DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 6949775a620SHiroshi Yamauchi DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 6959775a620SHiroshi Yamauchi DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 6969775a620SHiroshi Yamauchi BranchProbability TrueProb, FalseProb; 697b3b61de0SFangrui Song if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 6989775a620SHiroshi Yamauchi TrueProb, FalseProb)) 6999775a620SHiroshi Yamauchi return false; 7009775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "SI " << *SI << " "); 7019775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 7029775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 703b3b61de0SFangrui Song return checkBias(SI, TrueProb, FalseProb, 7049775a620SHiroshi Yamauchi TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 7059775a620SHiroshi Yamauchi SelectBiasMap); 7069775a620SHiroshi Yamauchi } 7079775a620SHiroshi Yamauchi 7089775a620SHiroshi Yamauchi // Returns the instruction at which to hoist the dependent condition values and 7099775a620SHiroshi Yamauchi // insert the CHR branch for a region. This is the terminator branch in the 7109775a620SHiroshi Yamauchi // entry block or the first select in the entry block, if any. 7119775a620SHiroshi Yamauchi static Instruction* getBranchInsertPoint(RegInfo &RI) { 7129775a620SHiroshi Yamauchi Region *R = RI.R; 7139775a620SHiroshi Yamauchi BasicBlock *EntryBB = R->getEntry(); 7149775a620SHiroshi Yamauchi // The hoist point is by default the terminator of the entry block, which is 7159775a620SHiroshi Yamauchi // the same as the branch instruction if RI.HasBranch is true. 7169775a620SHiroshi Yamauchi Instruction *HoistPoint = EntryBB->getTerminator(); 7179775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 7189775a620SHiroshi Yamauchi if (SI->getParent() == EntryBB) { 7199775a620SHiroshi Yamauchi // Pick the first select in Selects in the entry block. Note Selects is 7209775a620SHiroshi Yamauchi // sorted in the instruction order within a block (asserted below). 7219775a620SHiroshi Yamauchi HoistPoint = SI; 7229775a620SHiroshi Yamauchi break; 7239775a620SHiroshi Yamauchi } 7249775a620SHiroshi Yamauchi } 7259775a620SHiroshi Yamauchi assert(HoistPoint && "Null HoistPoint"); 7269775a620SHiroshi Yamauchi #ifndef NDEBUG 7279775a620SHiroshi Yamauchi // Check that HoistPoint is the first one in Selects in the entry block, 7289775a620SHiroshi Yamauchi // if any. 7299775a620SHiroshi Yamauchi DenseSet<Instruction *> EntryBlockSelectSet; 7309775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 7319775a620SHiroshi Yamauchi if (SI->getParent() == EntryBB) { 7329775a620SHiroshi Yamauchi EntryBlockSelectSet.insert(SI); 7339775a620SHiroshi Yamauchi } 7349775a620SHiroshi Yamauchi } 7359775a620SHiroshi Yamauchi for (Instruction &I : *EntryBB) { 73633bf1cadSKazu Hirata if (EntryBlockSelectSet.contains(&I)) { 7379775a620SHiroshi Yamauchi assert(&I == HoistPoint && 7389775a620SHiroshi Yamauchi "HoistPoint must be the first one in Selects"); 7399775a620SHiroshi Yamauchi break; 7409775a620SHiroshi Yamauchi } 7419775a620SHiroshi Yamauchi } 7429775a620SHiroshi Yamauchi #endif 7439775a620SHiroshi Yamauchi return HoistPoint; 7449775a620SHiroshi Yamauchi } 7459775a620SHiroshi Yamauchi 7469775a620SHiroshi Yamauchi // Find a CHR scope in the given region. 7479775a620SHiroshi Yamauchi CHRScope * CHR::findScope(Region *R) { 7489775a620SHiroshi Yamauchi CHRScope *Result = nullptr; 7499775a620SHiroshi Yamauchi BasicBlock *Entry = R->getEntry(); 7509775a620SHiroshi Yamauchi BasicBlock *Exit = R->getExit(); // null if top level. 7519775a620SHiroshi Yamauchi assert(Entry && "Entry must not be null"); 7529775a620SHiroshi Yamauchi assert((Exit == nullptr) == (R->isTopLevelRegion()) && 7539775a620SHiroshi Yamauchi "Only top level region has a null exit"); 7549775a620SHiroshi Yamauchi if (Entry) 7559775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 7569775a620SHiroshi Yamauchi else 7579775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Entry null\n"); 7589775a620SHiroshi Yamauchi if (Exit) 7599775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 7609775a620SHiroshi Yamauchi else 7619775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Exit null\n"); 7629775a620SHiroshi Yamauchi // Exclude cases where Entry is part of a subregion (hence it doesn't belong 7639775a620SHiroshi Yamauchi // to this region). 7649775a620SHiroshi Yamauchi bool EntryInSubregion = RI.getRegionFor(Entry) != R; 7659775a620SHiroshi Yamauchi if (EntryInSubregion) 7669775a620SHiroshi Yamauchi return nullptr; 7679775a620SHiroshi Yamauchi // Exclude loops 7689775a620SHiroshi Yamauchi for (BasicBlock *Pred : predecessors(Entry)) 7699775a620SHiroshi Yamauchi if (R->contains(Pred)) 7709775a620SHiroshi Yamauchi return nullptr; 771fae7debaSXun Li // If any of the basic blocks have address taken, we must skip this region 772fae7debaSXun Li // because we cannot clone basic blocks that have address taken. 77326a0d53bSWei Wang for (BasicBlock *BB : R->blocks()) { 774fae7debaSXun Li if (BB->hasAddressTaken()) 775fae7debaSXun Li return nullptr; 77626a0d53bSWei Wang // If we encounter llvm.coro.id, skip this region because if the basic block 77726a0d53bSWei Wang // is cloned, we end up inserting a token type PHI node to the block with 77826a0d53bSWei Wang // llvm.coro.begin. 77926a0d53bSWei Wang // FIXME: This could lead to less optimal codegen, because the region is 78026a0d53bSWei Wang // excluded, it can prevent CHR from merging adjacent regions into bigger 78126a0d53bSWei Wang // scope and hoisting more branches. 78226a0d53bSWei Wang for (Instruction &I : *BB) 78326a0d53bSWei Wang if (auto *II = dyn_cast<IntrinsicInst>(&I)) 78426a0d53bSWei Wang if (II->getIntrinsicID() == Intrinsic::coro_id) 78526a0d53bSWei Wang return nullptr; 78626a0d53bSWei Wang } 78726a0d53bSWei Wang 7889775a620SHiroshi Yamauchi if (Exit) { 7899775a620SHiroshi Yamauchi // Try to find an if-then block (check if R is an if-then). 7909775a620SHiroshi Yamauchi // if (cond) { 7919775a620SHiroshi Yamauchi // ... 7929775a620SHiroshi Yamauchi // } 7939775a620SHiroshi Yamauchi auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 7949775a620SHiroshi Yamauchi if (BI) 7959775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 7969775a620SHiroshi Yamauchi else 7979775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "BI null\n"); 7989775a620SHiroshi Yamauchi if (BI && BI->isConditional()) { 7999775a620SHiroshi Yamauchi BasicBlock *S0 = BI->getSuccessor(0); 8009775a620SHiroshi Yamauchi BasicBlock *S1 = BI->getSuccessor(1); 8019775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 8029775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 8039775a620SHiroshi Yamauchi if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 8049775a620SHiroshi Yamauchi RegInfo RI(R); 805b3b61de0SFangrui Song RI.HasBranch = checkBiasedBranch( 8069775a620SHiroshi Yamauchi BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 8079775a620SHiroshi Yamauchi BranchBiasMap); 8089775a620SHiroshi Yamauchi Result = new CHRScope(RI); 8099775a620SHiroshi Yamauchi Scopes.insert(Result); 8109775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 8119775a620SHiroshi Yamauchi ++Stats.NumBranches; 812fd2c699dSHiroshi Yamauchi if (!RI.HasBranch) { 813fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 814fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 815fd2c699dSHiroshi Yamauchi << "Branch not biased"; 816fd2c699dSHiroshi Yamauchi }); 817fd2c699dSHiroshi Yamauchi } 8189775a620SHiroshi Yamauchi } 8199775a620SHiroshi Yamauchi } 8209775a620SHiroshi Yamauchi } 8219775a620SHiroshi Yamauchi { 8229775a620SHiroshi Yamauchi // Try to look for selects in the direct child blocks (as opposed to in 8239775a620SHiroshi Yamauchi // subregions) of R. 8249775a620SHiroshi Yamauchi // ... 8259775a620SHiroshi Yamauchi // if (..) { // Some subregion 8269775a620SHiroshi Yamauchi // ... 8279775a620SHiroshi Yamauchi // } 8289775a620SHiroshi Yamauchi // if (..) { // Some subregion 8299775a620SHiroshi Yamauchi // ... 8309775a620SHiroshi Yamauchi // } 8319775a620SHiroshi Yamauchi // ... 8329775a620SHiroshi Yamauchi // a = cond ? b : c; 8339775a620SHiroshi Yamauchi // ... 8349775a620SHiroshi Yamauchi SmallVector<SelectInst *, 8> Selects; 8359775a620SHiroshi Yamauchi for (RegionNode *E : R->elements()) { 8369775a620SHiroshi Yamauchi if (E->isSubRegion()) 8379775a620SHiroshi Yamauchi continue; 8389775a620SHiroshi Yamauchi // This returns the basic block of E if E is a direct child of R (not a 8399775a620SHiroshi Yamauchi // subregion.) 8409775a620SHiroshi Yamauchi BasicBlock *BB = E->getEntry(); 8419775a620SHiroshi Yamauchi // Need to push in the order to make it easier to find the first Select 8429775a620SHiroshi Yamauchi // later. 8439775a620SHiroshi Yamauchi for (Instruction &I : *BB) { 8449775a620SHiroshi Yamauchi if (auto *SI = dyn_cast<SelectInst>(&I)) { 8459775a620SHiroshi Yamauchi Selects.push_back(SI); 8469775a620SHiroshi Yamauchi ++Stats.NumBranches; 8479775a620SHiroshi Yamauchi } 8489775a620SHiroshi Yamauchi } 8499775a620SHiroshi Yamauchi } 8509775a620SHiroshi Yamauchi if (Selects.size() > 0) { 8519775a620SHiroshi Yamauchi auto AddSelects = [&](RegInfo &RI) { 8529775a620SHiroshi Yamauchi for (auto *SI : Selects) 853b3b61de0SFangrui Song if (checkBiasedSelect(SI, RI.R, 8549775a620SHiroshi Yamauchi TrueBiasedSelectsGlobal, 8559775a620SHiroshi Yamauchi FalseBiasedSelectsGlobal, 8569775a620SHiroshi Yamauchi SelectBiasMap)) 8579775a620SHiroshi Yamauchi RI.Selects.push_back(SI); 858fd2c699dSHiroshi Yamauchi else 859fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 860fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 861fd2c699dSHiroshi Yamauchi << "Select not biased"; 862fd2c699dSHiroshi Yamauchi }); 8639775a620SHiroshi Yamauchi }; 8649775a620SHiroshi Yamauchi if (!Result) { 8659775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Found a select-only region\n"); 8669775a620SHiroshi Yamauchi RegInfo RI(R); 8679775a620SHiroshi Yamauchi AddSelects(RI); 8689775a620SHiroshi Yamauchi Result = new CHRScope(RI); 8699775a620SHiroshi Yamauchi Scopes.insert(Result); 8709775a620SHiroshi Yamauchi } else { 8719775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 8729775a620SHiroshi Yamauchi AddSelects(Result->RegInfos[0]); 8739775a620SHiroshi Yamauchi } 8749775a620SHiroshi Yamauchi } 8759775a620SHiroshi Yamauchi } 8769775a620SHiroshi Yamauchi 8779775a620SHiroshi Yamauchi if (Result) { 8789775a620SHiroshi Yamauchi checkScopeHoistable(Result); 8799775a620SHiroshi Yamauchi } 8809775a620SHiroshi Yamauchi return Result; 8819775a620SHiroshi Yamauchi } 8829775a620SHiroshi Yamauchi 8839775a620SHiroshi Yamauchi // Check that any of the branch and the selects in the region could be 8849775a620SHiroshi Yamauchi // hoisted above the the CHR branch insert point (the most dominating of 8859775a620SHiroshi Yamauchi // them, either the branch (at the end of the first block) or the first 8869775a620SHiroshi Yamauchi // select in the first block). If the branch can't be hoisted, drop the 8879775a620SHiroshi Yamauchi // selects in the first blocks. 8889775a620SHiroshi Yamauchi // 8899775a620SHiroshi Yamauchi // For example, for the following scope/region with selects, we want to insert 8909775a620SHiroshi Yamauchi // the merged branch right before the first select in the first/entry block by 8919775a620SHiroshi Yamauchi // hoisting c1, c2, c3, and c4. 8929775a620SHiroshi Yamauchi // 8939775a620SHiroshi Yamauchi // // Branch insert point here. 8949775a620SHiroshi Yamauchi // a = c1 ? b : c; // Select 1 8959775a620SHiroshi Yamauchi // d = c2 ? e : f; // Select 2 8969775a620SHiroshi Yamauchi // if (c3) { // Branch 8979775a620SHiroshi Yamauchi // ... 8989775a620SHiroshi Yamauchi // c4 = foo() // A call. 8999775a620SHiroshi Yamauchi // g = c4 ? h : i; // Select 3 9009775a620SHiroshi Yamauchi // } 9019775a620SHiroshi Yamauchi // 9029775a620SHiroshi Yamauchi // But suppose we can't hoist c4 because it's dependent on the preceding 9039775a620SHiroshi Yamauchi // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 9049775a620SHiroshi Yamauchi // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 9059775a620SHiroshi Yamauchi void CHR::checkScopeHoistable(CHRScope *Scope) { 9069775a620SHiroshi Yamauchi RegInfo &RI = Scope->RegInfos[0]; 9079775a620SHiroshi Yamauchi Region *R = RI.R; 9089775a620SHiroshi Yamauchi BasicBlock *EntryBB = R->getEntry(); 9099775a620SHiroshi Yamauchi auto *Branch = RI.HasBranch ? 9109775a620SHiroshi Yamauchi cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 9119775a620SHiroshi Yamauchi SmallVector<SelectInst *, 8> &Selects = RI.Selects; 9129775a620SHiroshi Yamauchi if (RI.HasBranch || !Selects.empty()) { 9139775a620SHiroshi Yamauchi Instruction *InsertPoint = getBranchInsertPoint(RI); 9149775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 9159775a620SHiroshi Yamauchi // Avoid a data dependence from a select or a branch to a(nother) 9169775a620SHiroshi Yamauchi // select. Note no instruction can't data-depend on a branch (a branch 9179775a620SHiroshi Yamauchi // instruction doesn't produce a value). 9189775a620SHiroshi Yamauchi DenseSet<Instruction *> Unhoistables; 9199775a620SHiroshi Yamauchi // Initialize Unhoistables with the selects. 9209775a620SHiroshi Yamauchi for (SelectInst *SI : Selects) { 9219775a620SHiroshi Yamauchi Unhoistables.insert(SI); 9229775a620SHiroshi Yamauchi } 9239775a620SHiroshi Yamauchi // Remove Selects that can't be hoisted. 9249775a620SHiroshi Yamauchi for (auto it = Selects.begin(); it != Selects.end(); ) { 9259775a620SHiroshi Yamauchi SelectInst *SI = *it; 9269775a620SHiroshi Yamauchi if (SI == InsertPoint) { 9279775a620SHiroshi Yamauchi ++it; 9289775a620SHiroshi Yamauchi continue; 9299775a620SHiroshi Yamauchi } 930dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 9319775a620SHiroshi Yamauchi bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 932dfeb7974SHiroshi Yamauchi DT, Unhoistables, nullptr, Visited); 9339775a620SHiroshi Yamauchi if (!IsHoistable) { 9349775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 935fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 936fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, 937fd2c699dSHiroshi Yamauchi "DropUnhoistableSelect", SI) 938fd2c699dSHiroshi Yamauchi << "Dropped unhoistable select"; 939fd2c699dSHiroshi Yamauchi }); 9409775a620SHiroshi Yamauchi it = Selects.erase(it); 9419775a620SHiroshi Yamauchi // Since we are dropping the select here, we also drop it from 9429775a620SHiroshi Yamauchi // Unhoistables. 9439775a620SHiroshi Yamauchi Unhoistables.erase(SI); 9449775a620SHiroshi Yamauchi } else 9459775a620SHiroshi Yamauchi ++it; 9469775a620SHiroshi Yamauchi } 9479775a620SHiroshi Yamauchi // Update InsertPoint after potentially removing selects. 9489775a620SHiroshi Yamauchi InsertPoint = getBranchInsertPoint(RI); 9499775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 9509775a620SHiroshi Yamauchi if (RI.HasBranch && InsertPoint != Branch) { 951dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 9529775a620SHiroshi Yamauchi bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 953dfeb7974SHiroshi Yamauchi DT, Unhoistables, nullptr, Visited); 9549775a620SHiroshi Yamauchi if (!IsHoistable) { 9559775a620SHiroshi Yamauchi // If the branch isn't hoistable, drop the selects in the entry 9569775a620SHiroshi Yamauchi // block, preferring the branch, which makes the branch the hoist 9579775a620SHiroshi Yamauchi // point. 9589775a620SHiroshi Yamauchi assert(InsertPoint != Branch && "Branch must not be the hoist point"); 9599775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 9609775a620SHiroshi Yamauchi CHR_DEBUG( 9619775a620SHiroshi Yamauchi for (SelectInst *SI : Selects) { 9629775a620SHiroshi Yamauchi dbgs() << "SI " << *SI << "\n"; 9639775a620SHiroshi Yamauchi }); 964fd2c699dSHiroshi Yamauchi for (SelectInst *SI : Selects) { 965fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 966fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, 967fd2c699dSHiroshi Yamauchi "DropSelectUnhoistableBranch", SI) 968fd2c699dSHiroshi Yamauchi << "Dropped select due to unhoistable branch"; 969fd2c699dSHiroshi Yamauchi }); 970fd2c699dSHiroshi Yamauchi } 971b6211167SKazu Hirata llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 9729775a620SHiroshi Yamauchi return SI->getParent() == EntryBB; 973b6211167SKazu Hirata }); 9749775a620SHiroshi Yamauchi Unhoistables.clear(); 9759775a620SHiroshi Yamauchi InsertPoint = Branch; 9769775a620SHiroshi Yamauchi } 9779775a620SHiroshi Yamauchi } 9789775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 9799775a620SHiroshi Yamauchi #ifndef NDEBUG 9809775a620SHiroshi Yamauchi if (RI.HasBranch) { 9819775a620SHiroshi Yamauchi assert(!DT.dominates(Branch, InsertPoint) && 9829775a620SHiroshi Yamauchi "Branch can't be already above the hoist point"); 983dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 9849775a620SHiroshi Yamauchi assert(checkHoistValue(Branch->getCondition(), InsertPoint, 985dfeb7974SHiroshi Yamauchi DT, Unhoistables, nullptr, Visited) && 9869775a620SHiroshi Yamauchi "checkHoistValue for branch"); 9879775a620SHiroshi Yamauchi } 9889775a620SHiroshi Yamauchi for (auto *SI : Selects) { 9899775a620SHiroshi Yamauchi assert(!DT.dominates(SI, InsertPoint) && 9909775a620SHiroshi Yamauchi "SI can't be already above the hoist point"); 991dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 9929775a620SHiroshi Yamauchi assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 993dfeb7974SHiroshi Yamauchi Unhoistables, nullptr, Visited) && 9949775a620SHiroshi Yamauchi "checkHoistValue for selects"); 9959775a620SHiroshi Yamauchi } 9969775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Result\n"); 9979775a620SHiroshi Yamauchi if (RI.HasBranch) { 9989775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 9999775a620SHiroshi Yamauchi } 10009775a620SHiroshi Yamauchi for (auto *SI : Selects) { 10019775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 10029775a620SHiroshi Yamauchi } 10039775a620SHiroshi Yamauchi #endif 10049775a620SHiroshi Yamauchi } 10059775a620SHiroshi Yamauchi } 10069775a620SHiroshi Yamauchi 10079775a620SHiroshi Yamauchi // Traverse the region tree, find all nested scopes and merge them if possible. 10089775a620SHiroshi Yamauchi CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 10099775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Scopes) { 10109775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 10119775a620SHiroshi Yamauchi CHRScope *Result = findScope(R); 10129775a620SHiroshi Yamauchi // Visit subscopes. 10139775a620SHiroshi Yamauchi CHRScope *ConsecutiveSubscope = nullptr; 10149775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> Subscopes; 10159775a620SHiroshi Yamauchi for (auto It = R->begin(); It != R->end(); ++It) { 10169775a620SHiroshi Yamauchi const std::unique_ptr<Region> &SubR = *It; 1017b3b61de0SFangrui Song auto NextIt = std::next(It); 1018b3b61de0SFangrui Song Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 10199775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 10209775a620SHiroshi Yamauchi << "\n"); 10219775a620SHiroshi Yamauchi CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 10229775a620SHiroshi Yamauchi if (SubCHRScope) { 10239775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 10249775a620SHiroshi Yamauchi } else { 10259775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 10269775a620SHiroshi Yamauchi } 10279775a620SHiroshi Yamauchi if (SubCHRScope) { 10289775a620SHiroshi Yamauchi if (!ConsecutiveSubscope) 10299775a620SHiroshi Yamauchi ConsecutiveSubscope = SubCHRScope; 10309775a620SHiroshi Yamauchi else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 10319775a620SHiroshi Yamauchi Subscopes.push_back(ConsecutiveSubscope); 10329775a620SHiroshi Yamauchi ConsecutiveSubscope = SubCHRScope; 10339775a620SHiroshi Yamauchi } else 10349775a620SHiroshi Yamauchi ConsecutiveSubscope->append(SubCHRScope); 10359775a620SHiroshi Yamauchi } else { 10369775a620SHiroshi Yamauchi if (ConsecutiveSubscope) { 10379775a620SHiroshi Yamauchi Subscopes.push_back(ConsecutiveSubscope); 10389775a620SHiroshi Yamauchi } 10399775a620SHiroshi Yamauchi ConsecutiveSubscope = nullptr; 10409775a620SHiroshi Yamauchi } 10419775a620SHiroshi Yamauchi } 10429775a620SHiroshi Yamauchi if (ConsecutiveSubscope) { 10439775a620SHiroshi Yamauchi Subscopes.push_back(ConsecutiveSubscope); 10449775a620SHiroshi Yamauchi } 10459775a620SHiroshi Yamauchi for (CHRScope *Sub : Subscopes) { 10469775a620SHiroshi Yamauchi if (Result) { 10479775a620SHiroshi Yamauchi // Combine it with the parent. 10489775a620SHiroshi Yamauchi Result->addSub(Sub); 10499775a620SHiroshi Yamauchi } else { 10509775a620SHiroshi Yamauchi // Push Subscopes as they won't be combined with the parent. 10519775a620SHiroshi Yamauchi Scopes.push_back(Sub); 10529775a620SHiroshi Yamauchi } 10539775a620SHiroshi Yamauchi } 10549775a620SHiroshi Yamauchi return Result; 10559775a620SHiroshi Yamauchi } 10569775a620SHiroshi Yamauchi 10579775a620SHiroshi Yamauchi static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 10589775a620SHiroshi Yamauchi DenseSet<Value *> ConditionValues; 10599775a620SHiroshi Yamauchi if (RI.HasBranch) { 10609775a620SHiroshi Yamauchi auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 10619775a620SHiroshi Yamauchi ConditionValues.insert(BI->getCondition()); 10629775a620SHiroshi Yamauchi } 10639775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 10649775a620SHiroshi Yamauchi ConditionValues.insert(SI->getCondition()); 10659775a620SHiroshi Yamauchi } 10669775a620SHiroshi Yamauchi return ConditionValues; 10679775a620SHiroshi Yamauchi } 10689775a620SHiroshi Yamauchi 10699775a620SHiroshi Yamauchi 10709775a620SHiroshi Yamauchi // Determine whether to split a scope depending on the sets of the branch 10719775a620SHiroshi Yamauchi // condition values of the previous region and the current region. We split 10729775a620SHiroshi Yamauchi // (return true) it if 1) the condition values of the inner/lower scope can't be 10739775a620SHiroshi Yamauchi // hoisted up to the outer/upper scope, or 2) the two sets of the condition 10749775a620SHiroshi Yamauchi // values have an empty intersection (because the combined branch conditions 10759775a620SHiroshi Yamauchi // won't probably lead to a simpler combined condition). 10769775a620SHiroshi Yamauchi static bool shouldSplit(Instruction *InsertPoint, 10779775a620SHiroshi Yamauchi DenseSet<Value *> &PrevConditionValues, 10789775a620SHiroshi Yamauchi DenseSet<Value *> &ConditionValues, 10799775a620SHiroshi Yamauchi DominatorTree &DT, 10809775a620SHiroshi Yamauchi DenseSet<Instruction *> &Unhoistables) { 10818262a5b7SDávid Bolvanský assert(InsertPoint && "Null InsertPoint"); 10829775a620SHiroshi Yamauchi CHR_DEBUG( 10839775a620SHiroshi Yamauchi dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 10849775a620SHiroshi Yamauchi for (Value *V : PrevConditionValues) { 10859775a620SHiroshi Yamauchi dbgs() << *V << ", "; 10869775a620SHiroshi Yamauchi } 10879775a620SHiroshi Yamauchi dbgs() << " ConditionValues "; 10889775a620SHiroshi Yamauchi for (Value *V : ConditionValues) { 10899775a620SHiroshi Yamauchi dbgs() << *V << ", "; 10909775a620SHiroshi Yamauchi } 10919775a620SHiroshi Yamauchi dbgs() << "\n"); 10929775a620SHiroshi Yamauchi // If any of Bases isn't hoistable to the hoist point, split. 10939775a620SHiroshi Yamauchi for (Value *V : ConditionValues) { 1094dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 1095dfeb7974SHiroshi Yamauchi if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 10969775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 10979775a620SHiroshi Yamauchi return true; // Not hoistable, split. 10989775a620SHiroshi Yamauchi } 10999775a620SHiroshi Yamauchi } 11009775a620SHiroshi Yamauchi // If PrevConditionValues or ConditionValues is empty, don't split to avoid 11019775a620SHiroshi Yamauchi // unnecessary splits at scopes with no branch/selects. If 11029775a620SHiroshi Yamauchi // PrevConditionValues and ConditionValues don't intersect at all, split. 11039775a620SHiroshi Yamauchi if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 11049775a620SHiroshi Yamauchi // Use std::set as DenseSet doesn't work with set_intersection. 11059775a620SHiroshi Yamauchi std::set<Value *> PrevBases, Bases; 1106d842f2eeSHiroshi Yamauchi DenseMap<Value *, std::set<Value *>> Visited; 11079775a620SHiroshi Yamauchi for (Value *V : PrevConditionValues) { 1108f1542efdSBenjamin Kramer const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 11099775a620SHiroshi Yamauchi PrevBases.insert(BaseValues.begin(), BaseValues.end()); 11109775a620SHiroshi Yamauchi } 11119775a620SHiroshi Yamauchi for (Value *V : ConditionValues) { 1112f1542efdSBenjamin Kramer const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 11139775a620SHiroshi Yamauchi Bases.insert(BaseValues.begin(), BaseValues.end()); 11149775a620SHiroshi Yamauchi } 11159775a620SHiroshi Yamauchi CHR_DEBUG( 11169775a620SHiroshi Yamauchi dbgs() << "PrevBases "; 11179775a620SHiroshi Yamauchi for (Value *V : PrevBases) { 11189775a620SHiroshi Yamauchi dbgs() << *V << ", "; 11199775a620SHiroshi Yamauchi } 11209775a620SHiroshi Yamauchi dbgs() << " Bases "; 11219775a620SHiroshi Yamauchi for (Value *V : Bases) { 11229775a620SHiroshi Yamauchi dbgs() << *V << ", "; 11239775a620SHiroshi Yamauchi } 11249775a620SHiroshi Yamauchi dbgs() << "\n"); 1125f1542efdSBenjamin Kramer std::vector<Value *> Intersection; 1126f1542efdSBenjamin Kramer std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1127f1542efdSBenjamin Kramer Bases.end(), std::back_inserter(Intersection)); 11289775a620SHiroshi Yamauchi if (Intersection.empty()) { 11299775a620SHiroshi Yamauchi // Empty intersection, split. 11309775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 11319775a620SHiroshi Yamauchi return true; 11329775a620SHiroshi Yamauchi } 11339775a620SHiroshi Yamauchi } 11349775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "No split\n"); 11359775a620SHiroshi Yamauchi return false; // Don't split. 11369775a620SHiroshi Yamauchi } 11379775a620SHiroshi Yamauchi 1138b3b61de0SFangrui Song static void getSelectsInScope(CHRScope *Scope, 11399775a620SHiroshi Yamauchi DenseSet<Instruction *> &Output) { 1140b3b61de0SFangrui Song for (RegInfo &RI : Scope->RegInfos) 1141b3b61de0SFangrui Song for (SelectInst *SI : RI.Selects) 11429775a620SHiroshi Yamauchi Output.insert(SI); 1143b3b61de0SFangrui Song for (CHRScope *Sub : Scope->Subs) 1144b3b61de0SFangrui Song getSelectsInScope(Sub, Output); 11459775a620SHiroshi Yamauchi } 11469775a620SHiroshi Yamauchi 11479775a620SHiroshi Yamauchi void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 11489775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output) { 11499775a620SHiroshi Yamauchi for (CHRScope *Scope : Input) { 11509775a620SHiroshi Yamauchi assert(!Scope->BranchInsertPoint && 11519775a620SHiroshi Yamauchi "BranchInsertPoint must not be set"); 11529775a620SHiroshi Yamauchi DenseSet<Instruction *> Unhoistables; 1153b3b61de0SFangrui Song getSelectsInScope(Scope, Unhoistables); 11549775a620SHiroshi Yamauchi splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 11559775a620SHiroshi Yamauchi } 11569775a620SHiroshi Yamauchi #ifndef NDEBUG 11579775a620SHiroshi Yamauchi for (CHRScope *Scope : Output) { 11589775a620SHiroshi Yamauchi assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 11599775a620SHiroshi Yamauchi } 11609775a620SHiroshi Yamauchi #endif 11619775a620SHiroshi Yamauchi } 11629775a620SHiroshi Yamauchi 11639775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> CHR::splitScope( 11649775a620SHiroshi Yamauchi CHRScope *Scope, 11659775a620SHiroshi Yamauchi CHRScope *Outer, 11669775a620SHiroshi Yamauchi DenseSet<Value *> *OuterConditionValues, 11679775a620SHiroshi Yamauchi Instruction *OuterInsertPoint, 11689775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output, 11699775a620SHiroshi Yamauchi DenseSet<Instruction *> &Unhoistables) { 11709775a620SHiroshi Yamauchi if (Outer) { 11719775a620SHiroshi Yamauchi assert(OuterConditionValues && "Null OuterConditionValues"); 11729775a620SHiroshi Yamauchi assert(OuterInsertPoint && "Null OuterInsertPoint"); 11739775a620SHiroshi Yamauchi } 11749775a620SHiroshi Yamauchi bool PrevSplitFromOuter = true; 11759775a620SHiroshi Yamauchi DenseSet<Value *> PrevConditionValues; 11769775a620SHiroshi Yamauchi Instruction *PrevInsertPoint = nullptr; 11779775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> Splits; 11789775a620SHiroshi Yamauchi SmallVector<bool, 8> SplitsSplitFromOuter; 11799775a620SHiroshi Yamauchi SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 11809775a620SHiroshi Yamauchi SmallVector<Instruction *, 8> SplitsInsertPoints; 11819775a620SHiroshi Yamauchi SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 11829775a620SHiroshi Yamauchi for (RegInfo &RI : RegInfos) { 11839775a620SHiroshi Yamauchi Instruction *InsertPoint = getBranchInsertPoint(RI); 11849775a620SHiroshi Yamauchi DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 11859775a620SHiroshi Yamauchi CHR_DEBUG( 11869775a620SHiroshi Yamauchi dbgs() << "ConditionValues "; 11879775a620SHiroshi Yamauchi for (Value *V : ConditionValues) { 11889775a620SHiroshi Yamauchi dbgs() << *V << ", "; 11899775a620SHiroshi Yamauchi } 11909775a620SHiroshi Yamauchi dbgs() << "\n"); 11919775a620SHiroshi Yamauchi if (RI.R == RegInfos[0].R) { 11929775a620SHiroshi Yamauchi // First iteration. Check to see if we should split from the outer. 11939775a620SHiroshi Yamauchi if (Outer) { 11949775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 11959775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Should split from outer at " 11969775a620SHiroshi Yamauchi << RI.R->getNameStr() << "\n"); 11979775a620SHiroshi Yamauchi if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 11989775a620SHiroshi Yamauchi ConditionValues, DT, Unhoistables)) { 11999775a620SHiroshi Yamauchi PrevConditionValues = ConditionValues; 12009775a620SHiroshi Yamauchi PrevInsertPoint = InsertPoint; 1201fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 1202fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, 1203fd2c699dSHiroshi Yamauchi "SplitScopeFromOuter", 1204fd2c699dSHiroshi Yamauchi RI.R->getEntry()->getTerminator()) 1205fd2c699dSHiroshi Yamauchi << "Split scope from outer due to unhoistable branch/select " 1206fd2c699dSHiroshi Yamauchi << "and/or lack of common condition values"; 1207fd2c699dSHiroshi Yamauchi }); 12089775a620SHiroshi Yamauchi } else { 12099775a620SHiroshi Yamauchi // Not splitting from the outer. Use the outer bases and insert 12109775a620SHiroshi Yamauchi // point. Union the bases. 12119775a620SHiroshi Yamauchi PrevSplitFromOuter = false; 12129775a620SHiroshi Yamauchi PrevConditionValues = *OuterConditionValues; 12139775a620SHiroshi Yamauchi PrevConditionValues.insert(ConditionValues.begin(), 12149775a620SHiroshi Yamauchi ConditionValues.end()); 12159775a620SHiroshi Yamauchi PrevInsertPoint = OuterInsertPoint; 12169775a620SHiroshi Yamauchi } 12179775a620SHiroshi Yamauchi } else { 12189775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Outer null\n"); 12199775a620SHiroshi Yamauchi PrevConditionValues = ConditionValues; 12209775a620SHiroshi Yamauchi PrevInsertPoint = InsertPoint; 12219775a620SHiroshi Yamauchi } 12229775a620SHiroshi Yamauchi } else { 12239775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Should split from prev at " 12249775a620SHiroshi Yamauchi << RI.R->getNameStr() << "\n"); 12259775a620SHiroshi Yamauchi if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 12269775a620SHiroshi Yamauchi DT, Unhoistables)) { 12279775a620SHiroshi Yamauchi CHRScope *Tail = Scope->split(RI.R); 12289775a620SHiroshi Yamauchi Scopes.insert(Tail); 12299775a620SHiroshi Yamauchi Splits.push_back(Scope); 12309775a620SHiroshi Yamauchi SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 12319775a620SHiroshi Yamauchi SplitsConditionValues.push_back(PrevConditionValues); 12329775a620SHiroshi Yamauchi SplitsInsertPoints.push_back(PrevInsertPoint); 12339775a620SHiroshi Yamauchi Scope = Tail; 12349775a620SHiroshi Yamauchi PrevConditionValues = ConditionValues; 12359775a620SHiroshi Yamauchi PrevInsertPoint = InsertPoint; 12369775a620SHiroshi Yamauchi PrevSplitFromOuter = true; 1237fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 1238fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed(DEBUG_TYPE, 1239fd2c699dSHiroshi Yamauchi "SplitScopeFromPrev", 1240fd2c699dSHiroshi Yamauchi RI.R->getEntry()->getTerminator()) 1241fd2c699dSHiroshi Yamauchi << "Split scope from previous due to unhoistable branch/select " 1242fd2c699dSHiroshi Yamauchi << "and/or lack of common condition values"; 1243fd2c699dSHiroshi Yamauchi }); 12449775a620SHiroshi Yamauchi } else { 12459775a620SHiroshi Yamauchi // Not splitting. Union the bases. Keep the hoist point. 12469775a620SHiroshi Yamauchi PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 12479775a620SHiroshi Yamauchi } 12489775a620SHiroshi Yamauchi } 12499775a620SHiroshi Yamauchi } 12509775a620SHiroshi Yamauchi Splits.push_back(Scope); 12519775a620SHiroshi Yamauchi SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 12529775a620SHiroshi Yamauchi SplitsConditionValues.push_back(PrevConditionValues); 12539775a620SHiroshi Yamauchi assert(PrevInsertPoint && "Null PrevInsertPoint"); 12549775a620SHiroshi Yamauchi SplitsInsertPoints.push_back(PrevInsertPoint); 12559775a620SHiroshi Yamauchi assert(Splits.size() == SplitsConditionValues.size() && 12569775a620SHiroshi Yamauchi Splits.size() == SplitsSplitFromOuter.size() && 12579775a620SHiroshi Yamauchi Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 12589775a620SHiroshi Yamauchi for (size_t I = 0; I < Splits.size(); ++I) { 12599775a620SHiroshi Yamauchi CHRScope *Split = Splits[I]; 12609775a620SHiroshi Yamauchi DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 12619775a620SHiroshi Yamauchi Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 12629775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> NewSubs; 12639775a620SHiroshi Yamauchi DenseSet<Instruction *> SplitUnhoistables; 1264b3b61de0SFangrui Song getSelectsInScope(Split, SplitUnhoistables); 12659775a620SHiroshi Yamauchi for (CHRScope *Sub : Split->Subs) { 12669775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> SubSplits = splitScope( 12679775a620SHiroshi Yamauchi Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 12689775a620SHiroshi Yamauchi SplitUnhoistables); 12698299fb8fSKazu Hirata llvm::append_range(NewSubs, SubSplits); 12709775a620SHiroshi Yamauchi } 12719775a620SHiroshi Yamauchi Split->Subs = NewSubs; 12729775a620SHiroshi Yamauchi } 12739775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> Result; 12749775a620SHiroshi Yamauchi for (size_t I = 0; I < Splits.size(); ++I) { 12759775a620SHiroshi Yamauchi CHRScope *Split = Splits[I]; 12769775a620SHiroshi Yamauchi if (SplitsSplitFromOuter[I]) { 12779775a620SHiroshi Yamauchi // Split from the outer. 12789775a620SHiroshi Yamauchi Output.push_back(Split); 12799775a620SHiroshi Yamauchi Split->BranchInsertPoint = SplitsInsertPoints[I]; 12809775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 12819775a620SHiroshi Yamauchi << "\n"); 12829775a620SHiroshi Yamauchi } else { 12839775a620SHiroshi Yamauchi // Connected to the outer. 12849775a620SHiroshi Yamauchi Result.push_back(Split); 12859775a620SHiroshi Yamauchi } 12869775a620SHiroshi Yamauchi } 12879775a620SHiroshi Yamauchi if (!Outer) 12889775a620SHiroshi Yamauchi assert(Result.empty() && 12899775a620SHiroshi Yamauchi "If no outer (top-level), must return no nested ones"); 12909775a620SHiroshi Yamauchi return Result; 12919775a620SHiroshi Yamauchi } 12929775a620SHiroshi Yamauchi 12939775a620SHiroshi Yamauchi void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 12949775a620SHiroshi Yamauchi for (CHRScope *Scope : Scopes) { 12959775a620SHiroshi Yamauchi assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 12969775a620SHiroshi Yamauchi classifyBiasedScopes(Scope, Scope); 12979775a620SHiroshi Yamauchi CHR_DEBUG( 12989775a620SHiroshi Yamauchi dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 12999775a620SHiroshi Yamauchi dbgs() << "TrueBiasedRegions "; 13009775a620SHiroshi Yamauchi for (Region *R : Scope->TrueBiasedRegions) { 13019775a620SHiroshi Yamauchi dbgs() << R->getNameStr() << ", "; 13029775a620SHiroshi Yamauchi } 13039775a620SHiroshi Yamauchi dbgs() << "\n"; 13049775a620SHiroshi Yamauchi dbgs() << "FalseBiasedRegions "; 13059775a620SHiroshi Yamauchi for (Region *R : Scope->FalseBiasedRegions) { 13069775a620SHiroshi Yamauchi dbgs() << R->getNameStr() << ", "; 13079775a620SHiroshi Yamauchi } 13089775a620SHiroshi Yamauchi dbgs() << "\n"; 13099775a620SHiroshi Yamauchi dbgs() << "TrueBiasedSelects "; 13109775a620SHiroshi Yamauchi for (SelectInst *SI : Scope->TrueBiasedSelects) { 13119775a620SHiroshi Yamauchi dbgs() << *SI << ", "; 13129775a620SHiroshi Yamauchi } 13139775a620SHiroshi Yamauchi dbgs() << "\n"; 13149775a620SHiroshi Yamauchi dbgs() << "FalseBiasedSelects "; 13159775a620SHiroshi Yamauchi for (SelectInst *SI : Scope->FalseBiasedSelects) { 13169775a620SHiroshi Yamauchi dbgs() << *SI << ", "; 13179775a620SHiroshi Yamauchi } 13189775a620SHiroshi Yamauchi dbgs() << "\n";); 13199775a620SHiroshi Yamauchi } 13209775a620SHiroshi Yamauchi } 13219775a620SHiroshi Yamauchi 13229775a620SHiroshi Yamauchi void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 13239775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->RegInfos) { 13249775a620SHiroshi Yamauchi if (RI.HasBranch) { 13259775a620SHiroshi Yamauchi Region *R = RI.R; 132633bf1cadSKazu Hirata if (TrueBiasedRegionsGlobal.contains(R)) 13279775a620SHiroshi Yamauchi OutermostScope->TrueBiasedRegions.insert(R); 132833bf1cadSKazu Hirata else if (FalseBiasedRegionsGlobal.contains(R)) 13299775a620SHiroshi Yamauchi OutermostScope->FalseBiasedRegions.insert(R); 13309775a620SHiroshi Yamauchi else 13319775a620SHiroshi Yamauchi llvm_unreachable("Must be biased"); 13329775a620SHiroshi Yamauchi } 13339775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 133433bf1cadSKazu Hirata if (TrueBiasedSelectsGlobal.contains(SI)) 13359775a620SHiroshi Yamauchi OutermostScope->TrueBiasedSelects.insert(SI); 133633bf1cadSKazu Hirata else if (FalseBiasedSelectsGlobal.contains(SI)) 13379775a620SHiroshi Yamauchi OutermostScope->FalseBiasedSelects.insert(SI); 13389775a620SHiroshi Yamauchi else 13399775a620SHiroshi Yamauchi llvm_unreachable("Must be biased"); 13409775a620SHiroshi Yamauchi } 13419775a620SHiroshi Yamauchi } 13429775a620SHiroshi Yamauchi for (CHRScope *Sub : Scope->Subs) { 13439775a620SHiroshi Yamauchi classifyBiasedScopes(Sub, OutermostScope); 13449775a620SHiroshi Yamauchi } 13459775a620SHiroshi Yamauchi } 13469775a620SHiroshi Yamauchi 13479775a620SHiroshi Yamauchi static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 13489775a620SHiroshi Yamauchi unsigned NumBiased = Scope->TrueBiasedRegions.size() + 13499775a620SHiroshi Yamauchi Scope->FalseBiasedRegions.size() + 13509775a620SHiroshi Yamauchi Scope->TrueBiasedSelects.size() + 13519775a620SHiroshi Yamauchi Scope->FalseBiasedSelects.size(); 13529775a620SHiroshi Yamauchi return NumBiased >= CHRMergeThreshold; 13539775a620SHiroshi Yamauchi } 13549775a620SHiroshi Yamauchi 13559775a620SHiroshi Yamauchi void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 13569775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output) { 13579775a620SHiroshi Yamauchi for (CHRScope *Scope : Input) { 13589775a620SHiroshi Yamauchi // Filter out the ones with only one region and no subs. 13599775a620SHiroshi Yamauchi if (!hasAtLeastTwoBiasedBranches(Scope)) { 13609775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 13619775a620SHiroshi Yamauchi << Scope->TrueBiasedRegions.size() 13629775a620SHiroshi Yamauchi << " falsy-regions " << Scope->FalseBiasedRegions.size() 13639775a620SHiroshi Yamauchi << " true-selects " << Scope->TrueBiasedSelects.size() 13649775a620SHiroshi Yamauchi << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1365fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 1366fd2c699dSHiroshi Yamauchi return OptimizationRemarkMissed( 1367fd2c699dSHiroshi Yamauchi DEBUG_TYPE, 1368fd2c699dSHiroshi Yamauchi "DropScopeWithOneBranchOrSelect", 1369fd2c699dSHiroshi Yamauchi Scope->RegInfos[0].R->getEntry()->getTerminator()) 1370fd2c699dSHiroshi Yamauchi << "Drop scope with < " 1371fd2c699dSHiroshi Yamauchi << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1372fd2c699dSHiroshi Yamauchi << " biased branch(es) or select(s)"; 1373fd2c699dSHiroshi Yamauchi }); 13749775a620SHiroshi Yamauchi continue; 13759775a620SHiroshi Yamauchi } 13769775a620SHiroshi Yamauchi Output.push_back(Scope); 13779775a620SHiroshi Yamauchi } 13789775a620SHiroshi Yamauchi } 13799775a620SHiroshi Yamauchi 13809775a620SHiroshi Yamauchi void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 13819775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output) { 13829775a620SHiroshi Yamauchi for (CHRScope *Scope : Input) { 13839775a620SHiroshi Yamauchi assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 13849775a620SHiroshi Yamauchi "Empty"); 13859775a620SHiroshi Yamauchi setCHRRegions(Scope, Scope); 13869775a620SHiroshi Yamauchi Output.push_back(Scope); 13879775a620SHiroshi Yamauchi CHR_DEBUG( 13889775a620SHiroshi Yamauchi dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 13899775a620SHiroshi Yamauchi for (auto pair : Scope->HoistStopMap) { 13909775a620SHiroshi Yamauchi Region *R = pair.first; 13919775a620SHiroshi Yamauchi dbgs() << "Region " << R->getNameStr() << "\n"; 13929775a620SHiroshi Yamauchi for (Instruction *I : pair.second) { 13939775a620SHiroshi Yamauchi dbgs() << "HoistStop " << *I << "\n"; 13949775a620SHiroshi Yamauchi } 13959775a620SHiroshi Yamauchi } 13969775a620SHiroshi Yamauchi dbgs() << "CHRRegions" << "\n"; 13979775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->CHRRegions) { 13989775a620SHiroshi Yamauchi dbgs() << RI.R->getNameStr() << "\n"; 13999775a620SHiroshi Yamauchi }); 14009775a620SHiroshi Yamauchi } 14019775a620SHiroshi Yamauchi } 14029775a620SHiroshi Yamauchi 14039775a620SHiroshi Yamauchi void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 14049775a620SHiroshi Yamauchi DenseSet<Instruction *> Unhoistables; 14059775a620SHiroshi Yamauchi // Put the biased selects in Unhoistables because they should stay where they 14069775a620SHiroshi Yamauchi // are and constant-folded after CHR (in case one biased select or a branch 14079775a620SHiroshi Yamauchi // can depend on another biased select.) 14089775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->RegInfos) { 14099775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 14109775a620SHiroshi Yamauchi Unhoistables.insert(SI); 14119775a620SHiroshi Yamauchi } 14129775a620SHiroshi Yamauchi } 14139775a620SHiroshi Yamauchi Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 14149775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->RegInfos) { 14159775a620SHiroshi Yamauchi Region *R = RI.R; 14169775a620SHiroshi Yamauchi DenseSet<Instruction *> HoistStops; 14179775a620SHiroshi Yamauchi bool IsHoisted = false; 14189775a620SHiroshi Yamauchi if (RI.HasBranch) { 141933bf1cadSKazu Hirata assert((OutermostScope->TrueBiasedRegions.contains(R) || 142033bf1cadSKazu Hirata OutermostScope->FalseBiasedRegions.contains(R)) && 14219775a620SHiroshi Yamauchi "Must be truthy or falsy"); 14229775a620SHiroshi Yamauchi auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 14239775a620SHiroshi Yamauchi // Note checkHoistValue fills in HoistStops. 1424dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 14259775a620SHiroshi Yamauchi bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1426dfeb7974SHiroshi Yamauchi Unhoistables, &HoistStops, Visited); 14279775a620SHiroshi Yamauchi assert(IsHoistable && "Must be hoistable"); 14289775a620SHiroshi Yamauchi (void)(IsHoistable); // Unused in release build 14299775a620SHiroshi Yamauchi IsHoisted = true; 14309775a620SHiroshi Yamauchi } 14319775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 143233bf1cadSKazu Hirata assert((OutermostScope->TrueBiasedSelects.contains(SI) || 143333bf1cadSKazu Hirata OutermostScope->FalseBiasedSelects.contains(SI)) && 14349775a620SHiroshi Yamauchi "Must be true or false biased"); 14359775a620SHiroshi Yamauchi // Note checkHoistValue fills in HoistStops. 1436dfeb7974SHiroshi Yamauchi DenseMap<Instruction *, bool> Visited; 14379775a620SHiroshi Yamauchi bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1438dfeb7974SHiroshi Yamauchi Unhoistables, &HoistStops, Visited); 14399775a620SHiroshi Yamauchi assert(IsHoistable && "Must be hoistable"); 14409775a620SHiroshi Yamauchi (void)(IsHoistable); // Unused in release build 14419775a620SHiroshi Yamauchi IsHoisted = true; 14429775a620SHiroshi Yamauchi } 14439775a620SHiroshi Yamauchi if (IsHoisted) { 14449775a620SHiroshi Yamauchi OutermostScope->CHRRegions.push_back(RI); 14459775a620SHiroshi Yamauchi OutermostScope->HoistStopMap[R] = HoistStops; 14469775a620SHiroshi Yamauchi } 14479775a620SHiroshi Yamauchi } 14489775a620SHiroshi Yamauchi for (CHRScope *Sub : Scope->Subs) 14499775a620SHiroshi Yamauchi setCHRRegions(Sub, OutermostScope); 14509775a620SHiroshi Yamauchi } 14519775a620SHiroshi Yamauchi 1452f1542efdSBenjamin Kramer static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 14539775a620SHiroshi Yamauchi return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 14549775a620SHiroshi Yamauchi } 14559775a620SHiroshi Yamauchi 14569775a620SHiroshi Yamauchi void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 14579775a620SHiroshi Yamauchi SmallVectorImpl<CHRScope *> &Output) { 14589775a620SHiroshi Yamauchi Output.resize(Input.size()); 145975709329SFangrui Song llvm::copy(Input, Output.begin()); 1460efd94c56SFangrui Song llvm::stable_sort(Output, CHRScopeSorter); 14619775a620SHiroshi Yamauchi } 14629775a620SHiroshi Yamauchi 14639775a620SHiroshi Yamauchi // Return true if V is already hoisted or was hoisted (along with its operands) 14649775a620SHiroshi Yamauchi // to the insert point. 14659775a620SHiroshi Yamauchi static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 14669775a620SHiroshi Yamauchi HoistStopMapTy &HoistStopMap, 14679775a620SHiroshi Yamauchi DenseSet<Instruction *> &HoistedSet, 146816201040SHiroshi Yamauchi DenseSet<PHINode *> &TrivialPHIs, 146916201040SHiroshi Yamauchi DominatorTree &DT) { 14709775a620SHiroshi Yamauchi auto IT = HoistStopMap.find(R); 14719775a620SHiroshi Yamauchi assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 14729775a620SHiroshi Yamauchi DenseSet<Instruction *> &HoistStops = IT->second; 14739775a620SHiroshi Yamauchi if (auto *I = dyn_cast<Instruction>(V)) { 14749775a620SHiroshi Yamauchi if (I == HoistPoint) 14759775a620SHiroshi Yamauchi return; 14769775a620SHiroshi Yamauchi if (HoistStops.count(I)) 14779775a620SHiroshi Yamauchi return; 14789775a620SHiroshi Yamauchi if (auto *PN = dyn_cast<PHINode>(I)) 14799775a620SHiroshi Yamauchi if (TrivialPHIs.count(PN)) 14809775a620SHiroshi Yamauchi // The trivial phi inserted by the previous CHR scope could replace a 14819775a620SHiroshi Yamauchi // non-phi in HoistStops. Note that since this phi is at the exit of a 14829775a620SHiroshi Yamauchi // previous CHR scope, which dominates this scope, it's safe to stop 14839775a620SHiroshi Yamauchi // hoisting there. 14849775a620SHiroshi Yamauchi return; 14859775a620SHiroshi Yamauchi if (HoistedSet.count(I)) 14869775a620SHiroshi Yamauchi // Already hoisted, return. 14879775a620SHiroshi Yamauchi return; 14889775a620SHiroshi Yamauchi assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 148916201040SHiroshi Yamauchi assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 149016201040SHiroshi Yamauchi assert(DT.getNode(HoistPoint->getParent()) && 149116201040SHiroshi Yamauchi "DT must contain HoistPoint block"); 149216201040SHiroshi Yamauchi if (DT.dominates(I, HoistPoint)) 149316201040SHiroshi Yamauchi // We are already above the hoist point. Stop here. This may be necessary 149416201040SHiroshi Yamauchi // when multiple scopes would independently hoist the same 149516201040SHiroshi Yamauchi // instruction. Since an outer (dominating) scope would hoist it to its 149616201040SHiroshi Yamauchi // entry before an inner (dominated) scope would to its entry, the inner 149716201040SHiroshi Yamauchi // scope may see the instruction already hoisted, in which case it 149816201040SHiroshi Yamauchi // potentially wrong for the inner scope to hoist it and could cause bad 149916201040SHiroshi Yamauchi // IR (non-dominating def), but safe to skip hoisting it instead because 150016201040SHiroshi Yamauchi // it's already in a block that dominates the inner scope. 150116201040SHiroshi Yamauchi return; 15029775a620SHiroshi Yamauchi for (Value *Op : I->operands()) { 150316201040SHiroshi Yamauchi hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 15049775a620SHiroshi Yamauchi } 15059775a620SHiroshi Yamauchi I->moveBefore(HoistPoint); 15069775a620SHiroshi Yamauchi HoistedSet.insert(I); 15079775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 15089775a620SHiroshi Yamauchi } 15099775a620SHiroshi Yamauchi } 15109775a620SHiroshi Yamauchi 15119775a620SHiroshi Yamauchi // Hoist the dependent condition values of the branches and the selects in the 15129775a620SHiroshi Yamauchi // scope to the insert point. 15139775a620SHiroshi Yamauchi static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 151416201040SHiroshi Yamauchi DenseSet<PHINode *> &TrivialPHIs, 151516201040SHiroshi Yamauchi DominatorTree &DT) { 15169775a620SHiroshi Yamauchi DenseSet<Instruction *> HoistedSet; 15179775a620SHiroshi Yamauchi for (const RegInfo &RI : Scope->CHRRegions) { 15189775a620SHiroshi Yamauchi Region *R = RI.R; 15199775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 15209775a620SHiroshi Yamauchi bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 15219775a620SHiroshi Yamauchi if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 15229775a620SHiroshi Yamauchi auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 15239775a620SHiroshi Yamauchi hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 152416201040SHiroshi Yamauchi HoistedSet, TrivialPHIs, DT); 15259775a620SHiroshi Yamauchi } 15269775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 15279775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 15289775a620SHiroshi Yamauchi bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 15299775a620SHiroshi Yamauchi if (!(IsTrueBiased || IsFalseBiased)) 15309775a620SHiroshi Yamauchi continue; 15319775a620SHiroshi Yamauchi hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 153216201040SHiroshi Yamauchi HoistedSet, TrivialPHIs, DT); 15339775a620SHiroshi Yamauchi } 15349775a620SHiroshi Yamauchi } 15359775a620SHiroshi Yamauchi } 15369775a620SHiroshi Yamauchi 15379775a620SHiroshi Yamauchi // Negate the predicate if an ICmp if it's used only by branches or selects by 15389775a620SHiroshi Yamauchi // swapping the operands of the branches or the selects. Returns true if success. 1539b3b61de0SFangrui Song static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 15409775a620SHiroshi Yamauchi Instruction *ExcludedUser, 15419775a620SHiroshi Yamauchi CHRScope *Scope) { 15429775a620SHiroshi Yamauchi for (User *U : ICmp->users()) { 15439775a620SHiroshi Yamauchi if (U == ExcludedUser) 15449775a620SHiroshi Yamauchi continue; 15459775a620SHiroshi Yamauchi if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 15469775a620SHiroshi Yamauchi continue; 15479775a620SHiroshi Yamauchi if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 15489775a620SHiroshi Yamauchi continue; 15499775a620SHiroshi Yamauchi return false; 15509775a620SHiroshi Yamauchi } 15519775a620SHiroshi Yamauchi for (User *U : ICmp->users()) { 15529775a620SHiroshi Yamauchi if (U == ExcludedUser) 15539775a620SHiroshi Yamauchi continue; 15549775a620SHiroshi Yamauchi if (auto *BI = dyn_cast<BranchInst>(U)) { 15559775a620SHiroshi Yamauchi assert(BI->isConditional() && "Must be conditional"); 15569775a620SHiroshi Yamauchi BI->swapSuccessors(); 15579775a620SHiroshi Yamauchi // Don't need to swap this in terms of 15589775a620SHiroshi Yamauchi // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 15599775a620SHiroshi Yamauchi // mean whehter the branch is likely go into the if-then rather than 15609775a620SHiroshi Yamauchi // successor0/successor1 and because we can tell which edge is the then or 15619775a620SHiroshi Yamauchi // the else one by comparing the destination to the region exit block. 15629775a620SHiroshi Yamauchi continue; 15639775a620SHiroshi Yamauchi } 15649775a620SHiroshi Yamauchi if (auto *SI = dyn_cast<SelectInst>(U)) { 15659775a620SHiroshi Yamauchi // Swap operands 15660efeaa81SRoman Lebedev SI->swapValues(); 15679775a620SHiroshi Yamauchi SI->swapProfMetadata(); 15689775a620SHiroshi Yamauchi if (Scope->TrueBiasedSelects.count(SI)) { 1569c714da2cSKazu Hirata assert(!Scope->FalseBiasedSelects.contains(SI) && 15709775a620SHiroshi Yamauchi "Must not be already in"); 15719775a620SHiroshi Yamauchi Scope->FalseBiasedSelects.insert(SI); 15729775a620SHiroshi Yamauchi } else if (Scope->FalseBiasedSelects.count(SI)) { 1573c714da2cSKazu Hirata assert(!Scope->TrueBiasedSelects.contains(SI) && 15749775a620SHiroshi Yamauchi "Must not be already in"); 15759775a620SHiroshi Yamauchi Scope->TrueBiasedSelects.insert(SI); 15769775a620SHiroshi Yamauchi } 15779775a620SHiroshi Yamauchi continue; 15789775a620SHiroshi Yamauchi } 15799775a620SHiroshi Yamauchi llvm_unreachable("Must be a branch or a select"); 15809775a620SHiroshi Yamauchi } 15819775a620SHiroshi Yamauchi ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 15829775a620SHiroshi Yamauchi return true; 15839775a620SHiroshi Yamauchi } 15849775a620SHiroshi Yamauchi 15859775a620SHiroshi Yamauchi // A helper for transformScopes. Insert a trivial phi at the scope exit block 15869775a620SHiroshi Yamauchi // for a value that's defined in the scope but used outside it (meaning it's 15879775a620SHiroshi Yamauchi // alive at the exit block). 15889775a620SHiroshi Yamauchi static void insertTrivialPHIs(CHRScope *Scope, 15899775a620SHiroshi Yamauchi BasicBlock *EntryBlock, BasicBlock *ExitBlock, 15909775a620SHiroshi Yamauchi DenseSet<PHINode *> &TrivialPHIs) { 1591f1542efdSBenjamin Kramer SmallSetVector<BasicBlock *, 8> BlocksInScope; 15929775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->RegInfos) { 15939775a620SHiroshi Yamauchi for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 15949775a620SHiroshi Yamauchi // sub-Scopes. 1595f1542efdSBenjamin Kramer BlocksInScope.insert(BB); 15969775a620SHiroshi Yamauchi } 15979775a620SHiroshi Yamauchi } 1598f1542efdSBenjamin Kramer CHR_DEBUG({ 1599f1542efdSBenjamin Kramer dbgs() << "Inserting redundant phis\n"; 1600f1542efdSBenjamin Kramer for (BasicBlock *BB : BlocksInScope) 16019775a620SHiroshi Yamauchi dbgs() << "BlockInScope " << BB->getName() << "\n"; 16029775a620SHiroshi Yamauchi }); 1603f1542efdSBenjamin Kramer for (BasicBlock *BB : BlocksInScope) { 16049775a620SHiroshi Yamauchi for (Instruction &I : *BB) { 16059775a620SHiroshi Yamauchi SmallVector<Instruction *, 8> Users; 16069775a620SHiroshi Yamauchi for (User *U : I.users()) { 16079775a620SHiroshi Yamauchi if (auto *UI = dyn_cast<Instruction>(U)) { 1608c714da2cSKazu Hirata if (!BlocksInScope.contains(UI->getParent()) && 16099775a620SHiroshi Yamauchi // Unless there's already a phi for I at the exit block. 16109775a620SHiroshi Yamauchi !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 16119775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "V " << I << "\n"); 16129775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 16139775a620SHiroshi Yamauchi Users.push_back(UI); 16149775a620SHiroshi Yamauchi } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 16159775a620SHiroshi Yamauchi // There's a loop backedge from a block that's dominated by this 16169775a620SHiroshi Yamauchi // scope to the entry block. 16179775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "V " << I << "\n"); 16189775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() 16199775a620SHiroshi Yamauchi << "Used at entry block (for a back edge) by a phi user " 16209775a620SHiroshi Yamauchi << *UI << "\n"); 16219775a620SHiroshi Yamauchi Users.push_back(UI); 16229775a620SHiroshi Yamauchi } 16239775a620SHiroshi Yamauchi } 16249775a620SHiroshi Yamauchi } 16259775a620SHiroshi Yamauchi if (Users.size() > 0) { 16269775a620SHiroshi Yamauchi // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 16279775a620SHiroshi Yamauchi // ExitBlock. Replace I with the new phi in UI unless UI is another 16289775a620SHiroshi Yamauchi // phi at ExitBlock. 16291c82d320SKazu Hirata PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 16309775a620SHiroshi Yamauchi &ExitBlock->front()); 16319775a620SHiroshi Yamauchi for (BasicBlock *Pred : predecessors(ExitBlock)) { 16329775a620SHiroshi Yamauchi PN->addIncoming(&I, Pred); 16339775a620SHiroshi Yamauchi } 16349775a620SHiroshi Yamauchi TrivialPHIs.insert(PN); 16359775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 16369775a620SHiroshi Yamauchi for (Instruction *UI : Users) { 16379775a620SHiroshi Yamauchi for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 16389775a620SHiroshi Yamauchi if (UI->getOperand(J) == &I) { 16399775a620SHiroshi Yamauchi UI->setOperand(J, PN); 16409775a620SHiroshi Yamauchi } 16419775a620SHiroshi Yamauchi } 16429775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 16439775a620SHiroshi Yamauchi } 16449775a620SHiroshi Yamauchi } 16459775a620SHiroshi Yamauchi } 16469775a620SHiroshi Yamauchi } 16479775a620SHiroshi Yamauchi } 16489775a620SHiroshi Yamauchi 16499775a620SHiroshi Yamauchi // Assert that all the CHR regions of the scope have a biased branch or select. 1650c8f348cbSFangrui Song static void LLVM_ATTRIBUTE_UNUSED 1651c8f348cbSFangrui Song assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 16529775a620SHiroshi Yamauchi #ifndef NDEBUG 16539775a620SHiroshi Yamauchi auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 16549775a620SHiroshi Yamauchi if (Scope->TrueBiasedRegions.count(RI.R) || 16559775a620SHiroshi Yamauchi Scope->FalseBiasedRegions.count(RI.R)) 16569775a620SHiroshi Yamauchi return true; 16579775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) 16589775a620SHiroshi Yamauchi if (Scope->TrueBiasedSelects.count(SI) || 16599775a620SHiroshi Yamauchi Scope->FalseBiasedSelects.count(SI)) 16609775a620SHiroshi Yamauchi return true; 16619775a620SHiroshi Yamauchi return false; 16629775a620SHiroshi Yamauchi }; 16639775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->CHRRegions) { 16649775a620SHiroshi Yamauchi assert(HasBiasedBranchOrSelect(RI, Scope) && 16659775a620SHiroshi Yamauchi "Must have biased branch or select"); 16669775a620SHiroshi Yamauchi } 16679775a620SHiroshi Yamauchi #endif 16689775a620SHiroshi Yamauchi } 16699775a620SHiroshi Yamauchi 16709775a620SHiroshi Yamauchi // Assert that all the condition values of the biased branches and selects have 16719775a620SHiroshi Yamauchi // been hoisted to the pre-entry block or outside of the scope. 1672c8f348cbSFangrui Song static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1673c8f348cbSFangrui Song CHRScope *Scope, BasicBlock *PreEntryBlock) { 16749775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 16759775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->CHRRegions) { 16769775a620SHiroshi Yamauchi Region *R = RI.R; 16779775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 16789775a620SHiroshi Yamauchi bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 16799775a620SHiroshi Yamauchi if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 16809775a620SHiroshi Yamauchi auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 16819775a620SHiroshi Yamauchi Value *V = BI->getCondition(); 16829775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << *V << "\n"); 16839775a620SHiroshi Yamauchi if (auto *I = dyn_cast<Instruction>(V)) { 168472ee6d60SHiroshi Yamauchi (void)(I); // Unused in release build. 16859775a620SHiroshi Yamauchi assert((I->getParent() == PreEntryBlock || 16869775a620SHiroshi Yamauchi !Scope->contains(I)) && 16879775a620SHiroshi Yamauchi "Must have been hoisted to PreEntryBlock or outside the scope"); 16889775a620SHiroshi Yamauchi } 16899775a620SHiroshi Yamauchi } 16909775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 16919775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 16929775a620SHiroshi Yamauchi bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 16939775a620SHiroshi Yamauchi if (!(IsTrueBiased || IsFalseBiased)) 16949775a620SHiroshi Yamauchi continue; 16959775a620SHiroshi Yamauchi Value *V = SI->getCondition(); 16969775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << *V << "\n"); 16979775a620SHiroshi Yamauchi if (auto *I = dyn_cast<Instruction>(V)) { 169872ee6d60SHiroshi Yamauchi (void)(I); // Unused in release build. 16999775a620SHiroshi Yamauchi assert((I->getParent() == PreEntryBlock || 17009775a620SHiroshi Yamauchi !Scope->contains(I)) && 17019775a620SHiroshi Yamauchi "Must have been hoisted to PreEntryBlock or outside the scope"); 17029775a620SHiroshi Yamauchi } 17039775a620SHiroshi Yamauchi } 17049775a620SHiroshi Yamauchi } 17059775a620SHiroshi Yamauchi } 17069775a620SHiroshi Yamauchi 17079775a620SHiroshi Yamauchi void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 17089775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 17099775a620SHiroshi Yamauchi 17109775a620SHiroshi Yamauchi assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 17119775a620SHiroshi Yamauchi Region *FirstRegion = Scope->RegInfos[0].R; 17129775a620SHiroshi Yamauchi BasicBlock *EntryBlock = FirstRegion->getEntry(); 17139775a620SHiroshi Yamauchi Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 17149775a620SHiroshi Yamauchi BasicBlock *ExitBlock = LastRegion->getExit(); 17159775a620SHiroshi Yamauchi Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 17169775a620SHiroshi Yamauchi 17179775a620SHiroshi Yamauchi if (ExitBlock) { 17189775a620SHiroshi Yamauchi // Insert a trivial phi at the exit block (where the CHR hot path and the 17199775a620SHiroshi Yamauchi // cold path merges) for a value that's defined in the scope but used 17209775a620SHiroshi Yamauchi // outside it (meaning it's alive at the exit block). We will add the 17219775a620SHiroshi Yamauchi // incoming values for the CHR cold paths to it below. Without this, we'd 17229775a620SHiroshi Yamauchi // miss updating phi's for such values unless there happens to already be a 17239775a620SHiroshi Yamauchi // phi for that value there. 17249775a620SHiroshi Yamauchi insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 17259775a620SHiroshi Yamauchi } 17269775a620SHiroshi Yamauchi 17279775a620SHiroshi Yamauchi // Split the entry block of the first region. The new block becomes the new 17289775a620SHiroshi Yamauchi // entry block of the first region. The old entry block becomes the block to 17299775a620SHiroshi Yamauchi // insert the CHR branch into. Note DT gets updated. Since DT gets updated 17309775a620SHiroshi Yamauchi // through the split, we update the entry of the first region after the split, 17319775a620SHiroshi Yamauchi // and Region only points to the entry and the exit blocks, rather than 17329775a620SHiroshi Yamauchi // keeping everything in a list or set, the blocks membership and the 17339775a620SHiroshi Yamauchi // entry/exit blocks of the region are still valid after the split. 17349775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 17359775a620SHiroshi Yamauchi << " at " << *Scope->BranchInsertPoint << "\n"); 17369775a620SHiroshi Yamauchi BasicBlock *NewEntryBlock = 17379775a620SHiroshi Yamauchi SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 17389775a620SHiroshi Yamauchi assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 17399775a620SHiroshi Yamauchi "NewEntryBlock's only pred must be EntryBlock"); 17409775a620SHiroshi Yamauchi FirstRegion->replaceEntryRecursive(NewEntryBlock); 17419775a620SHiroshi Yamauchi BasicBlock *PreEntryBlock = EntryBlock; 17429775a620SHiroshi Yamauchi 17439775a620SHiroshi Yamauchi ValueToValueMapTy VMap; 17449775a620SHiroshi Yamauchi // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 17459775a620SHiroshi Yamauchi // hot path (originals) and a cold path (clones) and update the PHIs at the 17469775a620SHiroshi Yamauchi // exit block. 17479775a620SHiroshi Yamauchi cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 17489775a620SHiroshi Yamauchi 17499775a620SHiroshi Yamauchi // Replace the old (placeholder) branch with the new (merged) conditional 17509775a620SHiroshi Yamauchi // branch. 17519775a620SHiroshi Yamauchi BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 17529775a620SHiroshi Yamauchi NewEntryBlock, VMap); 17539775a620SHiroshi Yamauchi 17549775a620SHiroshi Yamauchi #ifndef NDEBUG 17559775a620SHiroshi Yamauchi assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 17569775a620SHiroshi Yamauchi #endif 17579775a620SHiroshi Yamauchi 17589775a620SHiroshi Yamauchi // Hoist the conditional values of the branches/selects. 175916201040SHiroshi Yamauchi hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 17609775a620SHiroshi Yamauchi 17619775a620SHiroshi Yamauchi #ifndef NDEBUG 17629775a620SHiroshi Yamauchi assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 17639775a620SHiroshi Yamauchi #endif 17649775a620SHiroshi Yamauchi 17659775a620SHiroshi Yamauchi // Create the combined branch condition and constant-fold the branches/selects 17669775a620SHiroshi Yamauchi // in the hot path. 17679775a620SHiroshi Yamauchi fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1768c8b1ed5fSKazu Hirata ProfileCount.getValueOr(0)); 17699775a620SHiroshi Yamauchi } 17709775a620SHiroshi Yamauchi 17719775a620SHiroshi Yamauchi // A helper for transformScopes. Clone the blocks in the scope (excluding the 17729775a620SHiroshi Yamauchi // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 17739775a620SHiroshi Yamauchi // at the exit block. 17749775a620SHiroshi Yamauchi void CHR::cloneScopeBlocks(CHRScope *Scope, 17759775a620SHiroshi Yamauchi BasicBlock *PreEntryBlock, 17769775a620SHiroshi Yamauchi BasicBlock *ExitBlock, 17779775a620SHiroshi Yamauchi Region *LastRegion, 17789775a620SHiroshi Yamauchi ValueToValueMapTy &VMap) { 17799775a620SHiroshi Yamauchi // Clone all the blocks. The original blocks will be the hot-path 17809775a620SHiroshi Yamauchi // CHR-optimized code and the cloned blocks will be the original unoptimized 17819775a620SHiroshi Yamauchi // code. This is so that the block pointers from the 17829775a620SHiroshi Yamauchi // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 17839775a620SHiroshi Yamauchi // which CHR should apply to. 17849775a620SHiroshi Yamauchi SmallVector<BasicBlock*, 8> NewBlocks; 17859775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->RegInfos) 17869775a620SHiroshi Yamauchi for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 17879775a620SHiroshi Yamauchi // sub-Scopes. 17889775a620SHiroshi Yamauchi assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 17899775a620SHiroshi Yamauchi BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 17909775a620SHiroshi Yamauchi NewBlocks.push_back(NewBB); 17919775a620SHiroshi Yamauchi VMap[BB] = NewBB; 17929775a620SHiroshi Yamauchi } 17939775a620SHiroshi Yamauchi 17949775a620SHiroshi Yamauchi // Place the cloned blocks right after the original blocks (right before the 17959775a620SHiroshi Yamauchi // exit block of.) 17969775a620SHiroshi Yamauchi if (ExitBlock) 17979775a620SHiroshi Yamauchi F.getBasicBlockList().splice(ExitBlock->getIterator(), 17989775a620SHiroshi Yamauchi F.getBasicBlockList(), 17999775a620SHiroshi Yamauchi NewBlocks[0]->getIterator(), F.end()); 18009775a620SHiroshi Yamauchi 18019775a620SHiroshi Yamauchi // Update the cloned blocks/instructions to refer to themselves. 18029775a620SHiroshi Yamauchi for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 18039775a620SHiroshi Yamauchi for (Instruction &I : *NewBlocks[i]) 18049775a620SHiroshi Yamauchi RemapInstruction(&I, VMap, 18059775a620SHiroshi Yamauchi RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 18069775a620SHiroshi Yamauchi 18079775a620SHiroshi Yamauchi // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 18089775a620SHiroshi Yamauchi // the top-level region but we don't need to add PHIs. The trivial PHIs 18099775a620SHiroshi Yamauchi // inserted above will be updated here. 18109775a620SHiroshi Yamauchi if (ExitBlock) 18119775a620SHiroshi Yamauchi for (PHINode &PN : ExitBlock->phis()) 18129775a620SHiroshi Yamauchi for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 18139775a620SHiroshi Yamauchi ++I) { 18149775a620SHiroshi Yamauchi BasicBlock *Pred = PN.getIncomingBlock(I); 18159775a620SHiroshi Yamauchi if (LastRegion->contains(Pred)) { 18169775a620SHiroshi Yamauchi Value *V = PN.getIncomingValue(I); 18179775a620SHiroshi Yamauchi auto It = VMap.find(V); 18189775a620SHiroshi Yamauchi if (It != VMap.end()) V = It->second; 18199775a620SHiroshi Yamauchi assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 18209775a620SHiroshi Yamauchi PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 18219775a620SHiroshi Yamauchi } 18229775a620SHiroshi Yamauchi } 18239775a620SHiroshi Yamauchi } 18249775a620SHiroshi Yamauchi 18259775a620SHiroshi Yamauchi // A helper for transformScope. Replace the old (placeholder) branch with the 18269775a620SHiroshi Yamauchi // new (merged) conditional branch. 18279775a620SHiroshi Yamauchi BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 18289775a620SHiroshi Yamauchi BasicBlock *EntryBlock, 18299775a620SHiroshi Yamauchi BasicBlock *NewEntryBlock, 18309775a620SHiroshi Yamauchi ValueToValueMapTy &VMap) { 18319775a620SHiroshi Yamauchi BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 18329775a620SHiroshi Yamauchi assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 18339775a620SHiroshi Yamauchi "SplitBlock did not work correctly!"); 18349775a620SHiroshi Yamauchi assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 18359775a620SHiroshi Yamauchi "NewEntryBlock's only pred must be EntryBlock"); 18369775a620SHiroshi Yamauchi assert(VMap.find(NewEntryBlock) != VMap.end() && 18379775a620SHiroshi Yamauchi "NewEntryBlock must have been copied"); 18389775a620SHiroshi Yamauchi OldBR->dropAllReferences(); 1839bd897a02SHiroshi Yamauchi OldBR->eraseFromParent(); 18409775a620SHiroshi Yamauchi // The true predicate is a placeholder. It will be replaced later in 18419775a620SHiroshi Yamauchi // fixupBranchesAndSelects(). 18429775a620SHiroshi Yamauchi BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 18439775a620SHiroshi Yamauchi cast<BasicBlock>(VMap[NewEntryBlock]), 18449775a620SHiroshi Yamauchi ConstantInt::getTrue(F.getContext())); 18459775a620SHiroshi Yamauchi PreEntryBlock->getInstList().push_back(NewBR); 18469775a620SHiroshi Yamauchi assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 18479775a620SHiroshi Yamauchi "NewEntryBlock's only pred must be EntryBlock"); 18489775a620SHiroshi Yamauchi return NewBR; 18499775a620SHiroshi Yamauchi } 18509775a620SHiroshi Yamauchi 18519775a620SHiroshi Yamauchi // A helper for transformScopes. Create the combined branch condition and 18529775a620SHiroshi Yamauchi // constant-fold the branches/selects in the hot path. 18539775a620SHiroshi Yamauchi void CHR::fixupBranchesAndSelects(CHRScope *Scope, 18549775a620SHiroshi Yamauchi BasicBlock *PreEntryBlock, 18559775a620SHiroshi Yamauchi BranchInst *MergedBR, 18569775a620SHiroshi Yamauchi uint64_t ProfileCount) { 18579775a620SHiroshi Yamauchi Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 18589775a620SHiroshi Yamauchi BranchProbability CHRBranchBias(1, 1); 18599775a620SHiroshi Yamauchi uint64_t NumCHRedBranches = 0; 18609775a620SHiroshi Yamauchi IRBuilder<> IRB(PreEntryBlock->getTerminator()); 18619775a620SHiroshi Yamauchi for (RegInfo &RI : Scope->CHRRegions) { 18629775a620SHiroshi Yamauchi Region *R = RI.R; 18639775a620SHiroshi Yamauchi if (RI.HasBranch) { 18649775a620SHiroshi Yamauchi fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 18659775a620SHiroshi Yamauchi ++NumCHRedBranches; 18669775a620SHiroshi Yamauchi } 18679775a620SHiroshi Yamauchi for (SelectInst *SI : RI.Selects) { 18689775a620SHiroshi Yamauchi fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 18699775a620SHiroshi Yamauchi ++NumCHRedBranches; 18709775a620SHiroshi Yamauchi } 18719775a620SHiroshi Yamauchi } 18729775a620SHiroshi Yamauchi Stats.NumBranchesDelta += NumCHRedBranches - 1; 18739775a620SHiroshi Yamauchi Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1874fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 1875fd2c699dSHiroshi Yamauchi return OptimizationRemark(DEBUG_TYPE, 1876fd2c699dSHiroshi Yamauchi "CHR", 1877fd2c699dSHiroshi Yamauchi // Refer to the hot (original) path 1878fd2c699dSHiroshi Yamauchi MergedBR->getSuccessor(0)->getTerminator()) 1879fd2c699dSHiroshi Yamauchi << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1880fd2c699dSHiroshi Yamauchi << " branches or selects"; 1881fd2c699dSHiroshi Yamauchi }); 18829775a620SHiroshi Yamauchi MergedBR->setCondition(MergedCondition); 18835c31b8b9SArthur Eubanks uint32_t Weights[] = { 18845c31b8b9SArthur Eubanks static_cast<uint32_t>(CHRBranchBias.scale(1000)), 18855c31b8b9SArthur Eubanks static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1886f1542efdSBenjamin Kramer }; 18879775a620SHiroshi Yamauchi MDBuilder MDB(F.getContext()); 18889775a620SHiroshi Yamauchi MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 18899775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 18909775a620SHiroshi Yamauchi << "\n"); 18919775a620SHiroshi Yamauchi } 18929775a620SHiroshi Yamauchi 18939775a620SHiroshi Yamauchi // A helper for fixupBranchesAndSelects. Add to the combined branch condition 18949775a620SHiroshi Yamauchi // and constant-fold a branch in the hot path. 18959775a620SHiroshi Yamauchi void CHR::fixupBranch(Region *R, CHRScope *Scope, 18969775a620SHiroshi Yamauchi IRBuilder<> &IRB, 18979775a620SHiroshi Yamauchi Value *&MergedCondition, 18989775a620SHiroshi Yamauchi BranchProbability &CHRBranchBias) { 18999775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 19009775a620SHiroshi Yamauchi assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 19019775a620SHiroshi Yamauchi "Must be truthy or falsy"); 19029775a620SHiroshi Yamauchi auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 19039775a620SHiroshi Yamauchi assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 19049775a620SHiroshi Yamauchi "Must be in the bias map"); 19059775a620SHiroshi Yamauchi BranchProbability Bias = BranchBiasMap[R]; 19069775a620SHiroshi Yamauchi assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 19079775a620SHiroshi Yamauchi // Take the min. 19089775a620SHiroshi Yamauchi if (CHRBranchBias > Bias) 19099775a620SHiroshi Yamauchi CHRBranchBias = Bias; 19109775a620SHiroshi Yamauchi BasicBlock *IfThen = BI->getSuccessor(1); 19119775a620SHiroshi Yamauchi BasicBlock *IfElse = BI->getSuccessor(0); 19129775a620SHiroshi Yamauchi BasicBlock *RegionExitBlock = R->getExit(); 19139775a620SHiroshi Yamauchi assert(RegionExitBlock && "Null ExitBlock"); 19149775a620SHiroshi Yamauchi assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 19159775a620SHiroshi Yamauchi IfThen != IfElse && "Invariant from findScopes"); 19169775a620SHiroshi Yamauchi if (IfThen == RegionExitBlock) { 19179775a620SHiroshi Yamauchi // Swap them so that IfThen means going into it and IfElse means skipping 19189775a620SHiroshi Yamauchi // it. 19199775a620SHiroshi Yamauchi std::swap(IfThen, IfElse); 19209775a620SHiroshi Yamauchi } 19219775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 19229775a620SHiroshi Yamauchi << " IfElse " << IfElse->getName() << "\n"); 19239775a620SHiroshi Yamauchi Value *Cond = BI->getCondition(); 19249775a620SHiroshi Yamauchi BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 19259775a620SHiroshi Yamauchi bool ConditionTrue = HotTarget == BI->getSuccessor(0); 19269775a620SHiroshi Yamauchi addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 19279775a620SHiroshi Yamauchi MergedCondition); 19289775a620SHiroshi Yamauchi // Constant-fold the branch at ClonedEntryBlock. 19299775a620SHiroshi Yamauchi assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 19309775a620SHiroshi Yamauchi "The successor shouldn't change"); 19319775a620SHiroshi Yamauchi Value *NewCondition = ConditionTrue ? 19329775a620SHiroshi Yamauchi ConstantInt::getTrue(F.getContext()) : 19339775a620SHiroshi Yamauchi ConstantInt::getFalse(F.getContext()); 19349775a620SHiroshi Yamauchi BI->setCondition(NewCondition); 19359775a620SHiroshi Yamauchi } 19369775a620SHiroshi Yamauchi 19379775a620SHiroshi Yamauchi // A helper for fixupBranchesAndSelects. Add to the combined branch condition 19389775a620SHiroshi Yamauchi // and constant-fold a select in the hot path. 19399775a620SHiroshi Yamauchi void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 19409775a620SHiroshi Yamauchi IRBuilder<> &IRB, 19419775a620SHiroshi Yamauchi Value *&MergedCondition, 19429775a620SHiroshi Yamauchi BranchProbability &CHRBranchBias) { 19439775a620SHiroshi Yamauchi bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 19449775a620SHiroshi Yamauchi assert((IsTrueBiased || 19459775a620SHiroshi Yamauchi Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 19469775a620SHiroshi Yamauchi assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 19479775a620SHiroshi Yamauchi "Must be in the bias map"); 19489775a620SHiroshi Yamauchi BranchProbability Bias = SelectBiasMap[SI]; 19499775a620SHiroshi Yamauchi assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 19509775a620SHiroshi Yamauchi // Take the min. 19519775a620SHiroshi Yamauchi if (CHRBranchBias > Bias) 19529775a620SHiroshi Yamauchi CHRBranchBias = Bias; 19539775a620SHiroshi Yamauchi Value *Cond = SI->getCondition(); 19549775a620SHiroshi Yamauchi addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 19559775a620SHiroshi Yamauchi MergedCondition); 19569775a620SHiroshi Yamauchi Value *NewCondition = IsTrueBiased ? 19579775a620SHiroshi Yamauchi ConstantInt::getTrue(F.getContext()) : 19589775a620SHiroshi Yamauchi ConstantInt::getFalse(F.getContext()); 19599775a620SHiroshi Yamauchi SI->setCondition(NewCondition); 19609775a620SHiroshi Yamauchi } 19619775a620SHiroshi Yamauchi 19629775a620SHiroshi Yamauchi // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 19639775a620SHiroshi Yamauchi // condition. 19649775a620SHiroshi Yamauchi void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1965afc21c7eSNikita Popov Instruction *BranchOrSelect, CHRScope *Scope, 1966afc21c7eSNikita Popov IRBuilder<> &IRB, Value *&MergedCondition) { 1967afc21c7eSNikita Popov if (!IsTrueBiased) { 19689775a620SHiroshi Yamauchi // If Cond is an icmp and all users of V except for BranchOrSelect is a 19699775a620SHiroshi Yamauchi // branch, negate the icmp predicate and swap the branch targets and avoid 19709775a620SHiroshi Yamauchi // inserting an Xor to negate Cond. 1971afc21c7eSNikita Popov auto *ICmp = dyn_cast<ICmpInst>(Cond); 1972afc21c7eSNikita Popov if (!ICmp || 1973afc21c7eSNikita Popov !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1974afc21c7eSNikita Popov Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1975afc21c7eSNikita Popov } 1976afc21c7eSNikita Popov 19777ba48466SNikita Popov // Select conditions can be poison, while branching on poison is immediate 19787ba48466SNikita Popov // undefined behavior. As such, we need to freeze potentially poisonous 19797ba48466SNikita Popov // conditions derived from selects. 19807ba48466SNikita Popov if (isa<SelectInst>(BranchOrSelect) && 19817ba48466SNikita Popov !isGuaranteedNotToBeUndefOrPoison(Cond)) 19827ba48466SNikita Popov Cond = IRB.CreateFreeze(Cond); 19837ba48466SNikita Popov 1984c8eb83f2SNikita Popov // Use logical and to avoid propagating poison from later conditions. 1985c8eb83f2SNikita Popov MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 19869775a620SHiroshi Yamauchi } 19879775a620SHiroshi Yamauchi 19889775a620SHiroshi Yamauchi void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1989b3b61de0SFangrui Song unsigned I = 0; 19909775a620SHiroshi Yamauchi DenseSet<PHINode *> TrivialPHIs; 19919775a620SHiroshi Yamauchi for (CHRScope *Scope : CHRScopes) { 19929775a620SHiroshi Yamauchi transformScopes(Scope, TrivialPHIs); 19939775a620SHiroshi Yamauchi CHR_DEBUG( 19949775a620SHiroshi Yamauchi std::ostringstream oss; 1995b3b61de0SFangrui Song oss << " after transformScopes " << I++; 19969775a620SHiroshi Yamauchi dumpIR(F, oss.str().c_str(), nullptr)); 1997b3b61de0SFangrui Song (void)I; 19989775a620SHiroshi Yamauchi } 19999775a620SHiroshi Yamauchi } 20009775a620SHiroshi Yamauchi 2001c8f348cbSFangrui Song static void LLVM_ATTRIBUTE_UNUSED 2002c8f348cbSFangrui Song dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 20039775a620SHiroshi Yamauchi dbgs() << Label << " " << Scopes.size() << "\n"; 20049775a620SHiroshi Yamauchi for (CHRScope *Scope : Scopes) { 20059775a620SHiroshi Yamauchi dbgs() << *Scope << "\n"; 20069775a620SHiroshi Yamauchi } 20079775a620SHiroshi Yamauchi } 20089775a620SHiroshi Yamauchi 20099775a620SHiroshi Yamauchi bool CHR::run() { 20109775a620SHiroshi Yamauchi if (!shouldApply(F, PSI)) 20119775a620SHiroshi Yamauchi return false; 20129775a620SHiroshi Yamauchi 20139775a620SHiroshi Yamauchi CHR_DEBUG(dumpIR(F, "before", nullptr)); 20149775a620SHiroshi Yamauchi 20159775a620SHiroshi Yamauchi bool Changed = false; 20169775a620SHiroshi Yamauchi { 20179775a620SHiroshi Yamauchi CHR_DEBUG( 20189775a620SHiroshi Yamauchi dbgs() << "RegionInfo:\n"; 20199775a620SHiroshi Yamauchi RI.print(dbgs())); 20209775a620SHiroshi Yamauchi 20219775a620SHiroshi Yamauchi // Recursively traverse the region tree and find regions that have biased 20229775a620SHiroshi Yamauchi // branches and/or selects and create scopes. 20239775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> AllScopes; 20249775a620SHiroshi Yamauchi findScopes(AllScopes); 20259775a620SHiroshi Yamauchi CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 20269775a620SHiroshi Yamauchi 20279775a620SHiroshi Yamauchi // Split the scopes if 1) the conditiona values of the biased 20289775a620SHiroshi Yamauchi // branches/selects of the inner/lower scope can't be hoisted up to the 20299775a620SHiroshi Yamauchi // outermost/uppermost scope entry, or 2) the condition values of the biased 20309775a620SHiroshi Yamauchi // branches/selects in a scope (including subscopes) don't share at least 20319775a620SHiroshi Yamauchi // one common value. 20329775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> SplitScopes; 20339775a620SHiroshi Yamauchi splitScopes(AllScopes, SplitScopes); 20349775a620SHiroshi Yamauchi CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 20359775a620SHiroshi Yamauchi 20369775a620SHiroshi Yamauchi // After splitting, set the biased regions and selects of a scope (a tree 20379775a620SHiroshi Yamauchi // root) that include those of the subscopes. 20389775a620SHiroshi Yamauchi classifyBiasedScopes(SplitScopes); 20399775a620SHiroshi Yamauchi CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 20409775a620SHiroshi Yamauchi 20419775a620SHiroshi Yamauchi // Filter out the scopes that has only one biased region or select (CHR 20429775a620SHiroshi Yamauchi // isn't useful in such a case). 20439775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> FilteredScopes; 20449775a620SHiroshi Yamauchi filterScopes(SplitScopes, FilteredScopes); 20459775a620SHiroshi Yamauchi CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 20469775a620SHiroshi Yamauchi 20479775a620SHiroshi Yamauchi // Set the regions to be CHR'ed and their hoist stops for each scope. 20489775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> SetScopes; 20499775a620SHiroshi Yamauchi setCHRRegions(FilteredScopes, SetScopes); 20509775a620SHiroshi Yamauchi CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 20519775a620SHiroshi Yamauchi 20529775a620SHiroshi Yamauchi // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 20539775a620SHiroshi Yamauchi // ones. We need to apply CHR from outer to inner so that we apply CHR only 20549775a620SHiroshi Yamauchi // to the hot path, rather than both hot and cold paths. 20559775a620SHiroshi Yamauchi SmallVector<CHRScope *, 8> SortedScopes; 20569775a620SHiroshi Yamauchi sortScopes(SetScopes, SortedScopes); 20579775a620SHiroshi Yamauchi CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 20589775a620SHiroshi Yamauchi 20599775a620SHiroshi Yamauchi CHR_DEBUG( 20609775a620SHiroshi Yamauchi dbgs() << "RegionInfo:\n"; 20619775a620SHiroshi Yamauchi RI.print(dbgs())); 20629775a620SHiroshi Yamauchi 20639775a620SHiroshi Yamauchi // Apply the CHR transformation. 20649775a620SHiroshi Yamauchi if (!SortedScopes.empty()) { 20659775a620SHiroshi Yamauchi transformScopes(SortedScopes); 20669775a620SHiroshi Yamauchi Changed = true; 20679775a620SHiroshi Yamauchi } 20689775a620SHiroshi Yamauchi } 20699775a620SHiroshi Yamauchi 2070fd2c699dSHiroshi Yamauchi if (Changed) { 20719775a620SHiroshi Yamauchi CHR_DEBUG(dumpIR(F, "after", &Stats)); 2072fd2c699dSHiroshi Yamauchi ORE.emit([&]() { 2073fd2c699dSHiroshi Yamauchi return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2074fd2c699dSHiroshi Yamauchi << ore::NV("Function", &F) << " " 2075fd2c699dSHiroshi Yamauchi << "Reduced the number of branches in hot paths by " 2076fd2c699dSHiroshi Yamauchi << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2077fd2c699dSHiroshi Yamauchi << " (static) and " 2078fd2c699dSHiroshi Yamauchi << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2079fd2c699dSHiroshi Yamauchi << " (weighted by PGO count)"; 2080fd2c699dSHiroshi Yamauchi }); 2081fd2c699dSHiroshi Yamauchi } 20829775a620SHiroshi Yamauchi 20839775a620SHiroshi Yamauchi return Changed; 20849775a620SHiroshi Yamauchi } 20859775a620SHiroshi Yamauchi 20869775a620SHiroshi Yamauchi bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 20879775a620SHiroshi Yamauchi BlockFrequencyInfo &BFI = 20889775a620SHiroshi Yamauchi getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 20899775a620SHiroshi Yamauchi DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 20909775a620SHiroshi Yamauchi ProfileSummaryInfo &PSI = 2091e7b789b5SVedant Kumar getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 20929775a620SHiroshi Yamauchi RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2093fd2c699dSHiroshi Yamauchi std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 20940eaee545SJonas Devlieghere std::make_unique<OptimizationRemarkEmitter>(&F); 2095bce1bf0eSKazu Hirata return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); 20969775a620SHiroshi Yamauchi } 20979775a620SHiroshi Yamauchi 20989775a620SHiroshi Yamauchi namespace llvm { 20999775a620SHiroshi Yamauchi 21009775a620SHiroshi Yamauchi ControlHeightReductionPass::ControlHeightReductionPass() { 2101b3b61de0SFangrui Song parseCHRFilterFiles(); 21029775a620SHiroshi Yamauchi } 21039775a620SHiroshi Yamauchi 21049775a620SHiroshi Yamauchi PreservedAnalyses ControlHeightReductionPass::run( 21059775a620SHiroshi Yamauchi Function &F, 21069775a620SHiroshi Yamauchi FunctionAnalysisManager &FAM) { 21079775a620SHiroshi Yamauchi auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 21089775a620SHiroshi Yamauchi auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 21099775a620SHiroshi Yamauchi auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2110bd541b21SAlina Sbirlea auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 21119775a620SHiroshi Yamauchi auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2112fd2c699dSHiroshi Yamauchi auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2113fd2c699dSHiroshi Yamauchi bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 21149775a620SHiroshi Yamauchi if (!Changed) 21159775a620SHiroshi Yamauchi return PreservedAnalyses::all(); 21166b9524a0SArthur Eubanks return PreservedAnalyses::none(); 21179775a620SHiroshi Yamauchi } 21189775a620SHiroshi Yamauchi 21199775a620SHiroshi Yamauchi } // namespace llvm 2120