1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Analysis/InstructionSimplify.h"
17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/CFG.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/IR/ValueHandle.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Transforms/Utils/SSAUpdater.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <utility>
49
50 using namespace llvm;
51 using namespace llvm::PatternMatch;
52
53 #define DEBUG_TYPE "structurizecfg"
54
55 // The name for newly created blocks.
56 const char FlowBlockName[] = "Flow";
57
58 namespace {
59
60 static cl::opt<bool> ForceSkipUniformRegions(
61 "structurizecfg-skip-uniform-regions",
62 cl::Hidden,
63 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
64 cl::init(false));
65
66 static cl::opt<bool>
67 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
68 cl::desc("Allow relaxed uniform region checks"),
69 cl::init(true));
70
71 // Definition of the complex types used in this pass.
72
73 using BBValuePair = std::pair<BasicBlock *, Value *>;
74
75 using RNVector = SmallVector<RegionNode *, 8>;
76 using BBVector = SmallVector<BasicBlock *, 8>;
77 using BranchVector = SmallVector<BranchInst *, 8>;
78 using BBValueVector = SmallVector<BBValuePair, 2>;
79
80 using BBSet = SmallPtrSet<BasicBlock *, 8>;
81
82 using PhiMap = MapVector<PHINode *, BBValueVector>;
83 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
84
85 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
86 using BBPredicates = DenseMap<BasicBlock *, Value *>;
87 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
88 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
89
90 // A traits type that is intended to be used in graph algorithms. The graph
91 // traits starts at an entry node, and traverses the RegionNodes that are in
92 // the Nodes set.
93 struct SubGraphTraits {
94 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
95 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
96
97 // This wraps a set of Nodes into the iterator, so we know which edges to
98 // filter out.
99 class WrappedSuccIterator
100 : public iterator_adaptor_base<
101 WrappedSuccIterator, BaseSuccIterator,
102 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
103 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
104 SmallDenseSet<RegionNode *> *Nodes;
105
106 public:
WrappedSuccIterator(BaseSuccIterator It,SmallDenseSet<RegionNode * > * Nodes)107 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
108 : iterator_adaptor_base(It), Nodes(Nodes) {}
109
operator *() const110 NodeRef operator*() const { return {*I, Nodes}; }
111 };
112
filterAll__anond74a67a70111::SubGraphTraits113 static bool filterAll(const NodeRef &N) { return true; }
filterSet__anond74a67a70111::SubGraphTraits114 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
115
116 using ChildIteratorType =
117 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
118
getEntryNode__anond74a67a70111::SubGraphTraits119 static NodeRef getEntryNode(Region *R) {
120 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
121 }
122
getEntryNode__anond74a67a70111::SubGraphTraits123 static NodeRef getEntryNode(NodeRef N) { return N; }
124
children__anond74a67a70111::SubGraphTraits125 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
126 auto *filter = N.second ? &filterSet : &filterAll;
127 return make_filter_range(
128 make_range<WrappedSuccIterator>(
129 {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
130 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
131 filter);
132 }
133
child_begin__anond74a67a70111::SubGraphTraits134 static ChildIteratorType child_begin(const NodeRef &N) {
135 return children(N).begin();
136 }
137
child_end__anond74a67a70111::SubGraphTraits138 static ChildIteratorType child_end(const NodeRef &N) {
139 return children(N).end();
140 }
141 };
142
143 /// Finds the nearest common dominator of a set of BasicBlocks.
144 ///
145 /// For every BB you add to the set, you can specify whether we "remember" the
146 /// block. When you get the common dominator, you can also ask whether it's one
147 /// of the blocks we remembered.
148 class NearestCommonDominator {
149 DominatorTree *DT;
150 BasicBlock *Result = nullptr;
151 bool ResultIsRemembered = false;
152
153 /// Add BB to the resulting dominator.
addBlock(BasicBlock * BB,bool Remember)154 void addBlock(BasicBlock *BB, bool Remember) {
155 if (!Result) {
156 Result = BB;
157 ResultIsRemembered = Remember;
158 return;
159 }
160
161 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
162 if (NewResult != Result)
163 ResultIsRemembered = false;
164 if (NewResult == BB)
165 ResultIsRemembered |= Remember;
166 Result = NewResult;
167 }
168
169 public:
NearestCommonDominator(DominatorTree * DomTree)170 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
171
addBlock(BasicBlock * BB)172 void addBlock(BasicBlock *BB) {
173 addBlock(BB, /* Remember = */ false);
174 }
175
addAndRememberBlock(BasicBlock * BB)176 void addAndRememberBlock(BasicBlock *BB) {
177 addBlock(BB, /* Remember = */ true);
178 }
179
180 /// Get the nearest common dominator of all the BBs added via addBlock() and
181 /// addAndRememberBlock().
result()182 BasicBlock *result() { return Result; }
183
184 /// Is the BB returned by getResult() one of the blocks we added to the set
185 /// with addAndRememberBlock()?
resultIsRememberedBlock()186 bool resultIsRememberedBlock() { return ResultIsRemembered; }
187 };
188
189 /// Transforms the control flow graph on one single entry/exit region
190 /// at a time.
191 ///
192 /// After the transform all "If"/"Then"/"Else" style control flow looks like
193 /// this:
194 ///
195 /// \verbatim
196 /// 1
197 /// ||
198 /// | |
199 /// 2 |
200 /// | /
201 /// |/
202 /// 3
203 /// || Where:
204 /// | | 1 = "If" block, calculates the condition
205 /// 4 | 2 = "Then" subregion, runs if the condition is true
206 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
207 /// |/ 4 = "Else" optional subregion, runs if the condition is false
208 /// 5 5 = "End" block, also rejoins the control flow
209 /// \endverbatim
210 ///
211 /// Control flow is expressed as a branch where the true exit goes into the
212 /// "Then"/"Else" region, while the false exit skips the region
213 /// The condition for the optional "Else" region is expressed as a PHI node.
214 /// The incoming values of the PHI node are true for the "If" edge and false
215 /// for the "Then" edge.
216 ///
217 /// Additionally to that even complicated loops look like this:
218 ///
219 /// \verbatim
220 /// 1
221 /// ||
222 /// | |
223 /// 2 ^ Where:
224 /// | / 1 = "Entry" block
225 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
226 /// 3 3 = "Flow" block, with back edge to entry block
227 /// |
228 /// \endverbatim
229 ///
230 /// The back edge of the "Flow" block is always on the false side of the branch
231 /// while the true side continues the general flow. So the loop condition
232 /// consist of a network of PHI nodes where the true incoming values expresses
233 /// breaks and the false values expresses continue states.
234
235 class StructurizeCFG {
236 Type *Boolean;
237 ConstantInt *BoolTrue;
238 ConstantInt *BoolFalse;
239 UndefValue *BoolUndef;
240
241 Function *Func;
242 Region *ParentRegion;
243
244 LegacyDivergenceAnalysis *DA = nullptr;
245 DominatorTree *DT;
246
247 SmallVector<RegionNode *, 8> Order;
248 BBSet Visited;
249
250 SmallVector<WeakVH, 8> AffectedPhis;
251 BBPhiMap DeletedPhis;
252 BB2BBVecMap AddedPhis;
253
254 PredMap Predicates;
255 BranchVector Conditions;
256
257 BB2BBMap Loops;
258 PredMap LoopPreds;
259 BranchVector LoopConds;
260
261 RegionNode *PrevNode;
262
263 void orderNodes();
264
265 void analyzeLoops(RegionNode *N);
266
267 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
268
269 void gatherPredicates(RegionNode *N);
270
271 void collectInfos();
272
273 void insertConditions(bool Loops);
274
275 void simplifyConditions();
276
277 void delPhiValues(BasicBlock *From, BasicBlock *To);
278
279 void addPhiValues(BasicBlock *From, BasicBlock *To);
280
281 void setPhiValues();
282
283 void simplifyAffectedPhis();
284
285 void killTerminator(BasicBlock *BB);
286
287 void changeExit(RegionNode *Node, BasicBlock *NewExit,
288 bool IncludeDominator);
289
290 BasicBlock *getNextFlow(BasicBlock *Dominator);
291
292 BasicBlock *needPrefix(bool NeedEmpty);
293
294 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
295
296 void setPrevNode(BasicBlock *BB);
297
298 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
299
300 bool isPredictableTrue(RegionNode *Node);
301
302 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
303
304 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
305
306 void createFlow();
307
308 void rebuildSSA();
309
310 public:
311 void init(Region *R);
312 bool run(Region *R, DominatorTree *DT);
313 bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
314 };
315
316 class StructurizeCFGLegacyPass : public RegionPass {
317 bool SkipUniformRegions;
318
319 public:
320 static char ID;
321
StructurizeCFGLegacyPass(bool SkipUniformRegions_=false)322 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
323 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
324 if (ForceSkipUniformRegions.getNumOccurrences())
325 SkipUniformRegions = ForceSkipUniformRegions.getValue();
326 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
327 }
328
runOnRegion(Region * R,RGPassManager & RGM)329 bool runOnRegion(Region *R, RGPassManager &RGM) override {
330 StructurizeCFG SCFG;
331 SCFG.init(R);
332 if (SkipUniformRegions) {
333 LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
334 if (SCFG.makeUniformRegion(R, DA))
335 return false;
336 }
337 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338 return SCFG.run(R, DT);
339 }
340
getPassName() const341 StringRef getPassName() const override { return "Structurize control flow"; }
342
getAnalysisUsage(AnalysisUsage & AU) const343 void getAnalysisUsage(AnalysisUsage &AU) const override {
344 if (SkipUniformRegions)
345 AU.addRequired<LegacyDivergenceAnalysis>();
346 AU.addRequiredID(LowerSwitchID);
347 AU.addRequired<DominatorTreeWrapperPass>();
348
349 AU.addPreserved<DominatorTreeWrapperPass>();
350 RegionPass::getAnalysisUsage(AU);
351 }
352 };
353
354 } // end anonymous namespace
355
356 char StructurizeCFGLegacyPass::ID = 0;
357
358 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
359 "Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)360 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
361 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
362 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
363 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
364 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
365 "Structurize the CFG", false, false)
366
367 /// Build up the general order of nodes, by performing a topological sort of the
368 /// parent region's nodes, while ensuring that there is no outer cycle node
369 /// between any two inner cycle nodes.
370 void StructurizeCFG::orderNodes() {
371 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
372 GraphTraits<Region *>::nodes_end(ParentRegion)));
373 if (Order.empty())
374 return;
375
376 SmallDenseSet<RegionNode *> Nodes;
377 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
378
379 // A list of range indices of SCCs in Order, to be processed.
380 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
381 unsigned I = 0, E = Order.size();
382 while (true) {
383 // Run through all the SCCs in the subgraph starting with Entry.
384 for (auto SCCI =
385 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
386 EntryNode);
387 !SCCI.isAtEnd(); ++SCCI) {
388 auto &SCC = *SCCI;
389
390 // An SCC up to the size of 2, can be reduced to an entry (the last node),
391 // and a possible additional node. Therefore, it is already in order, and
392 // there is no need to add it to the work-list.
393 unsigned Size = SCC.size();
394 if (Size > 2)
395 WorkList.emplace_back(I, I + Size);
396
397 // Add the SCC nodes to the Order array.
398 for (auto &N : SCC) {
399 assert(I < E && "SCC size mismatch!");
400 Order[I++] = N.first;
401 }
402 }
403 assert(I == E && "SCC size mismatch!");
404
405 // If there are no more SCCs to order, then we are done.
406 if (WorkList.empty())
407 break;
408
409 std::tie(I, E) = WorkList.pop_back_val();
410
411 // Collect the set of nodes in the SCC's subgraph. These are only the
412 // possible child nodes; we do not add the entry (last node) otherwise we
413 // will have the same exact SCC all over again.
414 Nodes.clear();
415 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
416
417 // Update the entry node.
418 EntryNode.first = Order[E - 1];
419 EntryNode.second = &Nodes;
420 }
421 }
422
423 /// Determine the end of the loops
analyzeLoops(RegionNode * N)424 void StructurizeCFG::analyzeLoops(RegionNode *N) {
425 if (N->isSubRegion()) {
426 // Test for exit as back edge
427 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
428 if (Visited.count(Exit))
429 Loops[Exit] = N->getEntry();
430
431 } else {
432 // Test for successors as back edge
433 BasicBlock *BB = N->getNodeAs<BasicBlock>();
434 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
435
436 for (BasicBlock *Succ : Term->successors())
437 if (Visited.count(Succ))
438 Loops[Succ] = BB;
439 }
440 }
441
442 /// Build the condition for one edge
buildCondition(BranchInst * Term,unsigned Idx,bool Invert)443 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
444 bool Invert) {
445 Value *Cond = Invert ? BoolFalse : BoolTrue;
446 if (Term->isConditional()) {
447 Cond = Term->getCondition();
448
449 if (Idx != (unsigned)Invert)
450 Cond = invertCondition(Cond);
451 }
452 return Cond;
453 }
454
455 /// Analyze the predecessors of each block and build up predicates
gatherPredicates(RegionNode * N)456 void StructurizeCFG::gatherPredicates(RegionNode *N) {
457 RegionInfo *RI = ParentRegion->getRegionInfo();
458 BasicBlock *BB = N->getEntry();
459 BBPredicates &Pred = Predicates[BB];
460 BBPredicates &LPred = LoopPreds[BB];
461
462 for (BasicBlock *P : predecessors(BB)) {
463 // Ignore it if it's a branch from outside into our region entry
464 if (!ParentRegion->contains(P))
465 continue;
466
467 Region *R = RI->getRegionFor(P);
468 if (R == ParentRegion) {
469 // It's a top level block in our region
470 BranchInst *Term = cast<BranchInst>(P->getTerminator());
471 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
472 BasicBlock *Succ = Term->getSuccessor(i);
473 if (Succ != BB)
474 continue;
475
476 if (Visited.count(P)) {
477 // Normal forward edge
478 if (Term->isConditional()) {
479 // Try to treat it like an ELSE block
480 BasicBlock *Other = Term->getSuccessor(!i);
481 if (Visited.count(Other) && !Loops.count(Other) &&
482 !Pred.count(Other) && !Pred.count(P)) {
483
484 Pred[Other] = BoolFalse;
485 Pred[P] = BoolTrue;
486 continue;
487 }
488 }
489 Pred[P] = buildCondition(Term, i, false);
490 } else {
491 // Back edge
492 LPred[P] = buildCondition(Term, i, true);
493 }
494 }
495 } else {
496 // It's an exit from a sub region
497 while (R->getParent() != ParentRegion)
498 R = R->getParent();
499
500 // Edge from inside a subregion to its entry, ignore it
501 if (*R == *N)
502 continue;
503
504 BasicBlock *Entry = R->getEntry();
505 if (Visited.count(Entry))
506 Pred[Entry] = BoolTrue;
507 else
508 LPred[Entry] = BoolFalse;
509 }
510 }
511 }
512
513 /// Collect various loop and predicate infos
collectInfos()514 void StructurizeCFG::collectInfos() {
515 // Reset predicate
516 Predicates.clear();
517
518 // and loop infos
519 Loops.clear();
520 LoopPreds.clear();
521
522 // Reset the visited nodes
523 Visited.clear();
524
525 for (RegionNode *RN : reverse(Order)) {
526 LLVM_DEBUG(dbgs() << "Visiting: "
527 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
528 << RN->getEntry()->getName() << "\n");
529
530 // Analyze all the conditions leading to a node
531 gatherPredicates(RN);
532
533 // Remember that we've seen this node
534 Visited.insert(RN->getEntry());
535
536 // Find the last back edges
537 analyzeLoops(RN);
538 }
539 }
540
541 /// Insert the missing branch conditions
insertConditions(bool Loops)542 void StructurizeCFG::insertConditions(bool Loops) {
543 BranchVector &Conds = Loops ? LoopConds : Conditions;
544 Value *Default = Loops ? BoolTrue : BoolFalse;
545 SSAUpdater PhiInserter;
546
547 for (BranchInst *Term : Conds) {
548 assert(Term->isConditional());
549
550 BasicBlock *Parent = Term->getParent();
551 BasicBlock *SuccTrue = Term->getSuccessor(0);
552 BasicBlock *SuccFalse = Term->getSuccessor(1);
553
554 PhiInserter.Initialize(Boolean, "");
555 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
556 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
557
558 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
559
560 NearestCommonDominator Dominator(DT);
561 Dominator.addBlock(Parent);
562
563 Value *ParentValue = nullptr;
564 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
565 BasicBlock *BB = BBAndPred.first;
566 Value *Pred = BBAndPred.second;
567
568 if (BB == Parent) {
569 ParentValue = Pred;
570 break;
571 }
572 PhiInserter.AddAvailableValue(BB, Pred);
573 Dominator.addAndRememberBlock(BB);
574 }
575
576 if (ParentValue) {
577 Term->setCondition(ParentValue);
578 } else {
579 if (!Dominator.resultIsRememberedBlock())
580 PhiInserter.AddAvailableValue(Dominator.result(), Default);
581
582 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
583 }
584 }
585 }
586
587 /// Simplify any inverted conditions that were built by buildConditions.
simplifyConditions()588 void StructurizeCFG::simplifyConditions() {
589 SmallVector<Instruction *> InstToErase;
590 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
591 auto &Preds = I.second;
592 for (auto &J : Preds) {
593 auto &Cond = J.second;
594 Instruction *Inverted;
595 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
596 !Cond->use_empty()) {
597 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
598 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
599 Cond->replaceAllUsesWith(InvertedCmp);
600 InstToErase.push_back(cast<Instruction>(Cond));
601 }
602 }
603 }
604 }
605 for (auto *I : InstToErase)
606 I->eraseFromParent();
607 }
608
609 /// Remove all PHI values coming from "From" into "To" and remember
610 /// them in DeletedPhis
delPhiValues(BasicBlock * From,BasicBlock * To)611 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
612 PhiMap &Map = DeletedPhis[To];
613 for (PHINode &Phi : To->phis()) {
614 bool Recorded = false;
615 while (Phi.getBasicBlockIndex(From) != -1) {
616 Value *Deleted = Phi.removeIncomingValue(From, false);
617 Map[&Phi].push_back(std::make_pair(From, Deleted));
618 if (!Recorded) {
619 AffectedPhis.push_back(&Phi);
620 Recorded = true;
621 }
622 }
623 }
624 }
625
626 /// Add a dummy PHI value as soon as we knew the new predecessor
addPhiValues(BasicBlock * From,BasicBlock * To)627 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
628 for (PHINode &Phi : To->phis()) {
629 Value *Undef = UndefValue::get(Phi.getType());
630 Phi.addIncoming(Undef, From);
631 }
632 AddedPhis[To].push_back(From);
633 }
634
635 /// Add the real PHI value as soon as everything is set up
setPhiValues()636 void StructurizeCFG::setPhiValues() {
637 SmallVector<PHINode *, 8> InsertedPhis;
638 SSAUpdater Updater(&InsertedPhis);
639 for (const auto &AddedPhi : AddedPhis) {
640 BasicBlock *To = AddedPhi.first;
641 const BBVector &From = AddedPhi.second;
642
643 if (!DeletedPhis.count(To))
644 continue;
645
646 PhiMap &Map = DeletedPhis[To];
647 for (const auto &PI : Map) {
648 PHINode *Phi = PI.first;
649 Value *Undef = UndefValue::get(Phi->getType());
650 Updater.Initialize(Phi->getType(), "");
651 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
652 Updater.AddAvailableValue(To, Undef);
653
654 NearestCommonDominator Dominator(DT);
655 Dominator.addBlock(To);
656 for (const auto &VI : PI.second) {
657 Updater.AddAvailableValue(VI.first, VI.second);
658 Dominator.addAndRememberBlock(VI.first);
659 }
660
661 if (!Dominator.resultIsRememberedBlock())
662 Updater.AddAvailableValue(Dominator.result(), Undef);
663
664 for (BasicBlock *FI : From)
665 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
666 AffectedPhis.push_back(Phi);
667 }
668
669 DeletedPhis.erase(To);
670 }
671 assert(DeletedPhis.empty());
672
673 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
674 }
675
simplifyAffectedPhis()676 void StructurizeCFG::simplifyAffectedPhis() {
677 bool Changed;
678 do {
679 Changed = false;
680 SimplifyQuery Q(Func->getParent()->getDataLayout());
681 Q.DT = DT;
682 for (WeakVH VH : AffectedPhis) {
683 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
684 if (auto NewValue = simplifyInstruction(Phi, Q)) {
685 Phi->replaceAllUsesWith(NewValue);
686 Phi->eraseFromParent();
687 Changed = true;
688 }
689 }
690 }
691 } while (Changed);
692 }
693
694 /// Remove phi values from all successors and then remove the terminator.
killTerminator(BasicBlock * BB)695 void StructurizeCFG::killTerminator(BasicBlock *BB) {
696 Instruction *Term = BB->getTerminator();
697 if (!Term)
698 return;
699
700 for (BasicBlock *Succ : successors(BB))
701 delPhiValues(BB, Succ);
702
703 if (DA)
704 DA->removeValue(Term);
705 Term->eraseFromParent();
706 }
707
708 /// Let node exit(s) point to NewExit
changeExit(RegionNode * Node,BasicBlock * NewExit,bool IncludeDominator)709 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
710 bool IncludeDominator) {
711 if (Node->isSubRegion()) {
712 Region *SubRegion = Node->getNodeAs<Region>();
713 BasicBlock *OldExit = SubRegion->getExit();
714 BasicBlock *Dominator = nullptr;
715
716 // Find all the edges from the sub region to the exit.
717 // We use make_early_inc_range here because we modify BB's terminator.
718 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
719 if (!SubRegion->contains(BB))
720 continue;
721
722 // Modify the edges to point to the new exit
723 delPhiValues(BB, OldExit);
724 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
725 addPhiValues(BB, NewExit);
726
727 // Find the new dominator (if requested)
728 if (IncludeDominator) {
729 if (!Dominator)
730 Dominator = BB;
731 else
732 Dominator = DT->findNearestCommonDominator(Dominator, BB);
733 }
734 }
735
736 // Change the dominator (if requested)
737 if (Dominator)
738 DT->changeImmediateDominator(NewExit, Dominator);
739
740 // Update the region info
741 SubRegion->replaceExit(NewExit);
742 } else {
743 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
744 killTerminator(BB);
745 BranchInst::Create(NewExit, BB);
746 addPhiValues(BB, NewExit);
747 if (IncludeDominator)
748 DT->changeImmediateDominator(NewExit, BB);
749 }
750 }
751
752 /// Create a new flow node and update dominator tree and region info
getNextFlow(BasicBlock * Dominator)753 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
754 LLVMContext &Context = Func->getContext();
755 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
756 Order.back()->getEntry();
757 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
758 Func, Insert);
759 DT->addNewBlock(Flow, Dominator);
760 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
761 return Flow;
762 }
763
764 /// Create a new or reuse the previous node as flow node
needPrefix(bool NeedEmpty)765 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
766 BasicBlock *Entry = PrevNode->getEntry();
767
768 if (!PrevNode->isSubRegion()) {
769 killTerminator(Entry);
770 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
771 return Entry;
772 }
773
774 // create a new flow node
775 BasicBlock *Flow = getNextFlow(Entry);
776
777 // and wire it up
778 changeExit(PrevNode, Flow, true);
779 PrevNode = ParentRegion->getBBNode(Flow);
780 return Flow;
781 }
782
783 /// Returns the region exit if possible, otherwise just a new flow node
needPostfix(BasicBlock * Flow,bool ExitUseAllowed)784 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
785 bool ExitUseAllowed) {
786 if (!Order.empty() || !ExitUseAllowed)
787 return getNextFlow(Flow);
788
789 BasicBlock *Exit = ParentRegion->getExit();
790 DT->changeImmediateDominator(Exit, Flow);
791 addPhiValues(Flow, Exit);
792 return Exit;
793 }
794
795 /// Set the previous node
setPrevNode(BasicBlock * BB)796 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
797 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
798 : nullptr;
799 }
800
801 /// Does BB dominate all the predicates of Node?
dominatesPredicates(BasicBlock * BB,RegionNode * Node)802 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
803 BBPredicates &Preds = Predicates[Node->getEntry()];
804 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
805 return DT->dominates(BB, Pred.first);
806 });
807 }
808
809 /// Can we predict that this node will always be called?
isPredictableTrue(RegionNode * Node)810 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
811 BBPredicates &Preds = Predicates[Node->getEntry()];
812 bool Dominated = false;
813
814 // Regionentry is always true
815 if (!PrevNode)
816 return true;
817
818 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
819 BasicBlock *BB = Pred.first;
820 Value *V = Pred.second;
821
822 if (V != BoolTrue)
823 return false;
824
825 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
826 Dominated = true;
827 }
828
829 // TODO: The dominator check is too strict
830 return Dominated;
831 }
832
833 /// Take one node from the order vector and wire it up
wireFlow(bool ExitUseAllowed,BasicBlock * LoopEnd)834 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
835 BasicBlock *LoopEnd) {
836 RegionNode *Node = Order.pop_back_val();
837 Visited.insert(Node->getEntry());
838
839 if (isPredictableTrue(Node)) {
840 // Just a linear flow
841 if (PrevNode) {
842 changeExit(PrevNode, Node->getEntry(), true);
843 }
844 PrevNode = Node;
845 } else {
846 // Insert extra prefix node (or reuse last one)
847 BasicBlock *Flow = needPrefix(false);
848
849 // Insert extra postfix node (or use exit instead)
850 BasicBlock *Entry = Node->getEntry();
851 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
852
853 // let it point to entry and next block
854 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
855 addPhiValues(Flow, Entry);
856 DT->changeImmediateDominator(Entry, Flow);
857
858 PrevNode = Node;
859 while (!Order.empty() && !Visited.count(LoopEnd) &&
860 dominatesPredicates(Entry, Order.back())) {
861 handleLoops(false, LoopEnd);
862 }
863
864 changeExit(PrevNode, Next, false);
865 setPrevNode(Next);
866 }
867 }
868
handleLoops(bool ExitUseAllowed,BasicBlock * LoopEnd)869 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
870 BasicBlock *LoopEnd) {
871 RegionNode *Node = Order.back();
872 BasicBlock *LoopStart = Node->getEntry();
873
874 if (!Loops.count(LoopStart)) {
875 wireFlow(ExitUseAllowed, LoopEnd);
876 return;
877 }
878
879 if (!isPredictableTrue(Node))
880 LoopStart = needPrefix(true);
881
882 LoopEnd = Loops[Node->getEntry()];
883 wireFlow(false, LoopEnd);
884 while (!Visited.count(LoopEnd)) {
885 handleLoops(false, LoopEnd);
886 }
887
888 // If the start of the loop is the entry block, we can't branch to it so
889 // insert a new dummy entry block.
890 Function *LoopFunc = LoopStart->getParent();
891 if (LoopStart == &LoopFunc->getEntryBlock()) {
892 LoopStart->setName("entry.orig");
893
894 BasicBlock *NewEntry =
895 BasicBlock::Create(LoopStart->getContext(),
896 "entry",
897 LoopFunc,
898 LoopStart);
899 BranchInst::Create(LoopStart, NewEntry);
900 DT->setNewRoot(NewEntry);
901 }
902
903 // Create an extra loop end node
904 LoopEnd = needPrefix(false);
905 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
906 LoopConds.push_back(BranchInst::Create(Next, LoopStart,
907 BoolUndef, LoopEnd));
908 addPhiValues(LoopEnd, LoopStart);
909 setPrevNode(Next);
910 }
911
912 /// After this function control flow looks like it should be, but
913 /// branches and PHI nodes only have undefined conditions.
createFlow()914 void StructurizeCFG::createFlow() {
915 BasicBlock *Exit = ParentRegion->getExit();
916 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
917
918 AffectedPhis.clear();
919 DeletedPhis.clear();
920 AddedPhis.clear();
921 Conditions.clear();
922 LoopConds.clear();
923
924 PrevNode = nullptr;
925 Visited.clear();
926
927 while (!Order.empty()) {
928 handleLoops(EntryDominatesExit, nullptr);
929 }
930
931 if (PrevNode)
932 changeExit(PrevNode, Exit, EntryDominatesExit);
933 else
934 assert(EntryDominatesExit);
935 }
936
937 /// Handle a rare case where the disintegrated nodes instructions
938 /// no longer dominate all their uses. Not sure if this is really necessary
rebuildSSA()939 void StructurizeCFG::rebuildSSA() {
940 SSAUpdater Updater;
941 for (BasicBlock *BB : ParentRegion->blocks())
942 for (Instruction &I : *BB) {
943 bool Initialized = false;
944 // We may modify the use list as we iterate over it, so we use
945 // make_early_inc_range.
946 for (Use &U : llvm::make_early_inc_range(I.uses())) {
947 Instruction *User = cast<Instruction>(U.getUser());
948 if (User->getParent() == BB) {
949 continue;
950 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
951 if (UserPN->getIncomingBlock(U) == BB)
952 continue;
953 }
954
955 if (DT->dominates(&I, User))
956 continue;
957
958 if (!Initialized) {
959 Value *Undef = UndefValue::get(I.getType());
960 Updater.Initialize(I.getType(), "");
961 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
962 Updater.AddAvailableValue(BB, &I);
963 Initialized = true;
964 }
965 Updater.RewriteUseAfterInsertions(U);
966 }
967 }
968 }
969
hasOnlyUniformBranches(Region * R,unsigned UniformMDKindID,const LegacyDivergenceAnalysis & DA)970 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
971 const LegacyDivergenceAnalysis &DA) {
972 // Bool for if all sub-regions are uniform.
973 bool SubRegionsAreUniform = true;
974 // Count of how many direct children are conditional.
975 unsigned ConditionalDirectChildren = 0;
976
977 for (auto E : R->elements()) {
978 if (!E->isSubRegion()) {
979 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
980 if (!Br || !Br->isConditional())
981 continue;
982
983 if (!DA.isUniform(Br))
984 return false;
985
986 // One of our direct children is conditional.
987 ConditionalDirectChildren++;
988
989 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
990 << " has uniform terminator\n");
991 } else {
992 // Explicitly refuse to treat regions as uniform if they have non-uniform
993 // subregions. We cannot rely on DivergenceAnalysis for branches in
994 // subregions because those branches may have been removed and re-created,
995 // so we look for our metadata instead.
996 //
997 // Warning: It would be nice to treat regions as uniform based only on
998 // their direct child basic blocks' terminators, regardless of whether
999 // subregions are uniform or not. However, this requires a very careful
1000 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1001 for (auto BB : E->getNodeAs<Region>()->blocks()) {
1002 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1003 if (!Br || !Br->isConditional())
1004 continue;
1005
1006 if (!Br->getMetadata(UniformMDKindID)) {
1007 // Early exit if we cannot have relaxed uniform regions.
1008 if (!RelaxedUniformRegions)
1009 return false;
1010
1011 SubRegionsAreUniform = false;
1012 break;
1013 }
1014 }
1015 }
1016 }
1017
1018 // Our region is uniform if:
1019 // 1. All conditional branches that are direct children are uniform (checked
1020 // above).
1021 // 2. And either:
1022 // a. All sub-regions are uniform.
1023 // b. There is one or less conditional branches among the direct children.
1024 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1025 }
1026
init(Region * R)1027 void StructurizeCFG::init(Region *R) {
1028 LLVMContext &Context = R->getEntry()->getContext();
1029
1030 Boolean = Type::getInt1Ty(Context);
1031 BoolTrue = ConstantInt::getTrue(Context);
1032 BoolFalse = ConstantInt::getFalse(Context);
1033 BoolUndef = UndefValue::get(Boolean);
1034
1035 this->DA = nullptr;
1036 }
1037
makeUniformRegion(Region * R,LegacyDivergenceAnalysis * DA)1038 bool StructurizeCFG::makeUniformRegion(Region *R,
1039 LegacyDivergenceAnalysis *DA) {
1040 if (R->isTopLevelRegion())
1041 return false;
1042
1043 this->DA = DA;
1044 // TODO: We could probably be smarter here with how we handle sub-regions.
1045 // We currently rely on the fact that metadata is set by earlier invocations
1046 // of the pass on sub-regions, and that this metadata doesn't get lost --
1047 // but we shouldn't rely on metadata for correctness!
1048 unsigned UniformMDKindID =
1049 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1050
1051 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
1052 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1053 << '\n');
1054
1055 // Mark all direct child block terminators as having been treated as
1056 // uniform. To account for a possible future in which non-uniform
1057 // sub-regions are treated more cleverly, indirect children are not
1058 // marked as uniform.
1059 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1060 for (RegionNode *E : R->elements()) {
1061 if (E->isSubRegion())
1062 continue;
1063
1064 if (Instruction *Term = E->getEntry()->getTerminator())
1065 Term->setMetadata(UniformMDKindID, MD);
1066 }
1067
1068 return true;
1069 }
1070 return false;
1071 }
1072
1073 /// Run the transformation for each region found
run(Region * R,DominatorTree * DT)1074 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1075 if (R->isTopLevelRegion())
1076 return false;
1077
1078 this->DT = DT;
1079
1080 Func = R->getEntry()->getParent();
1081 ParentRegion = R;
1082
1083 orderNodes();
1084 collectInfos();
1085 createFlow();
1086 insertConditions(false);
1087 insertConditions(true);
1088 setPhiValues();
1089 simplifyConditions();
1090 simplifyAffectedPhis();
1091 rebuildSSA();
1092
1093 // Cleanup
1094 Order.clear();
1095 Visited.clear();
1096 DeletedPhis.clear();
1097 AddedPhis.clear();
1098 Predicates.clear();
1099 Conditions.clear();
1100 Loops.clear();
1101 LoopPreds.clear();
1102 LoopConds.clear();
1103
1104 return true;
1105 }
1106
createStructurizeCFGPass(bool SkipUniformRegions)1107 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1108 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1109 }
1110
addRegionIntoQueue(Region & R,std::vector<Region * > & Regions)1111 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1112 Regions.push_back(&R);
1113 for (const auto &E : R)
1114 addRegionIntoQueue(*E, Regions);
1115 }
1116
run(Function & F,FunctionAnalysisManager & AM)1117 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1118 FunctionAnalysisManager &AM) {
1119
1120 bool Changed = false;
1121 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1122 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1123 std::vector<Region *> Regions;
1124 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1125 while (!Regions.empty()) {
1126 Region *R = Regions.back();
1127 StructurizeCFG SCFG;
1128 SCFG.init(R);
1129 Changed |= SCFG.run(R, DT);
1130 Regions.pop_back();
1131 }
1132 if (!Changed)
1133 return PreservedAnalyses::all();
1134 PreservedAnalyses PA;
1135 PA.preserve<DominatorTreeAnalysis>();
1136 return PA;
1137 }
1138