1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/CodeGen/BlockGenerators.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/RuntimeDebugBuilder.h"
20 #include "polly/Options.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/RegionInfo.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/Local.h"
32 #include "isl/aff.h"
33 #include "isl/ast.h"
34 #include "isl/ast_build.h"
35 #include "isl/set.h"
36 #include <deque>
37 
38 using namespace llvm;
39 using namespace polly;
40 
41 static cl::opt<bool> Aligned("enable-polly-aligned",
42                              cl::desc("Assumed aligned memory accesses."),
43                              cl::Hidden, cl::init(false), cl::ZeroOrMore,
44                              cl::cat(PollyCategory));
45 
46 static cl::opt<bool> DebugPrinting(
47     "polly-codegen-add-debug-printing",
48     cl::desc("Add printf calls that show the values loaded/stored."),
49     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
50 
51 BlockGenerator::BlockGenerator(
52     PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
53     AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
54     ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
55     : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
56       EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
57       GlobalMap(GlobalMap), StartBlock(StartBlock) {}
58 
59 Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
60                                              ValueMapT &BBMap,
61                                              LoopToScevMapT &LTS,
62                                              Loop *L) const {
63   if (!SE.isSCEVable(Old->getType()))
64     return nullptr;
65 
66   const SCEV *Scev = SE.getSCEVAtScope(Old, L);
67   if (!Scev)
68     return nullptr;
69 
70   if (isa<SCEVCouldNotCompute>(Scev))
71     return nullptr;
72 
73   const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
74   ValueMapT VTV;
75   VTV.insert(BBMap.begin(), BBMap.end());
76   VTV.insert(GlobalMap.begin(), GlobalMap.end());
77 
78   Scop &S = *Stmt.getParent();
79   const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
80   auto IP = Builder.GetInsertPoint();
81 
82   assert(IP != Builder.GetInsertBlock()->end() &&
83          "Only instructions can be insert points for SCEVExpander");
84   Value *Expanded =
85       expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
86                     StartBlock->getSinglePredecessor());
87 
88   BBMap[Old] = Expanded;
89   return Expanded;
90 }
91 
92 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
93                                    LoopToScevMapT &LTS, Loop *L) const {
94   // Constants that do not reference any named value can always remain
95   // unchanged. Handle them early to avoid expensive map lookups. We do not take
96   // the fast-path for external constants which are referenced through globals
97   // as these may need to be rewritten when distributing code accross different
98   // LLVM modules.
99   if (isa<Constant>(Old) && !isa<GlobalValue>(Old))
100     return Old;
101 
102   // Inline asm is like a constant to us.
103   if (isa<InlineAsm>(Old))
104     return Old;
105 
106   if (Value *New = GlobalMap.lookup(Old)) {
107     if (Value *NewRemapped = GlobalMap.lookup(New))
108       New = NewRemapped;
109     if (Old->getType()->getScalarSizeInBits() <
110         New->getType()->getScalarSizeInBits())
111       New = Builder.CreateTruncOrBitCast(New, Old->getType());
112 
113     return New;
114   }
115 
116   if (Value *New = BBMap.lookup(Old))
117     return New;
118 
119   if (Value *New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L))
120     return New;
121 
122   // A scop-constant value defined by a global or a function parameter.
123   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
124     return Old;
125 
126   // A scop-constant value defined by an instruction executed outside the scop.
127   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
128     if (!Stmt.getParent()->contains(Inst->getParent()))
129       return Old;
130 
131   // The scalar dependence is neither available nor SCEVCodegenable.
132   llvm_unreachable("Unexpected scalar dependence in region!");
133   return nullptr;
134 }
135 
136 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
137                                     ValueMapT &BBMap, LoopToScevMapT &LTS) {
138   // We do not generate debug intrinsics as we did not investigate how to
139   // copy them correctly. At the current state, they just crash the code
140   // generation as the meta-data operands are not correctly copied.
141   if (isa<DbgInfoIntrinsic>(Inst))
142     return;
143 
144   Instruction *NewInst = Inst->clone();
145 
146   // Replace old operands with the new ones.
147   for (Value *OldOperand : Inst->operands()) {
148     Value *NewOperand =
149         getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
150 
151     if (!NewOperand) {
152       assert(!isa<StoreInst>(NewInst) &&
153              "Store instructions are always needed!");
154       delete NewInst;
155       return;
156     }
157 
158     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
159   }
160 
161   Builder.Insert(NewInst);
162   BBMap[Inst] = NewInst;
163 
164   if (!NewInst->getType()->isVoidTy())
165     NewInst->setName("p_" + Inst->getName());
166 }
167 
168 Value *
169 BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
170                                          ValueMapT &BBMap, LoopToScevMapT &LTS,
171                                          isl_id_to_ast_expr *NewAccesses) {
172   const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
173   return generateLocationAccessed(
174       Stmt, getLoopForStmt(Stmt),
175       Inst.isNull() ? nullptr : Inst.getPointerOperand(), BBMap, LTS,
176       NewAccesses, MA.getId(), MA.getAccessValue()->getType());
177 }
178 
179 Value *BlockGenerator::generateLocationAccessed(
180     ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
181     LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
182     Type *ExpectedType) {
183   isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
184 
185   if (AccessExpr) {
186     AccessExpr = isl_ast_expr_address_of(AccessExpr);
187     auto Address = ExprBuilder->create(AccessExpr);
188 
189     // Cast the address of this memory access to a pointer type that has the
190     // same element type as the original access, but uses the address space of
191     // the newly generated pointer.
192     auto OldPtrTy = ExpectedType->getPointerTo();
193     auto NewPtrTy = Address->getType();
194     OldPtrTy = PointerType::get(OldPtrTy->getElementType(),
195                                 NewPtrTy->getPointerAddressSpace());
196 
197     if (OldPtrTy != NewPtrTy)
198       Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
199     return Address;
200   }
201   assert(
202       Pointer &&
203       "If expression was not generated, must use the original pointer value");
204   return getNewValue(Stmt, Pointer, BBMap, LTS, L);
205 }
206 
207 Value *
208 BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
209                                    LoopToScevMapT &LTS, ValueMapT &BBMap,
210                                    __isl_keep isl_id_to_ast_expr *NewAccesses) {
211   if (Access.isLatestArrayKind())
212     return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
213                                     LTS, NewAccesses, Access.getId(),
214                                     Access.getAccessValue()->getType());
215 
216   return getOrCreateAlloca(Access);
217 }
218 
219 Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
220   auto *StmtBB = Stmt.getEntryBlock();
221   return LI.getLoopFor(StmtBB);
222 }
223 
224 Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
225                                          ValueMapT &BBMap, LoopToScevMapT &LTS,
226                                          isl_id_to_ast_expr *NewAccesses) {
227   if (Value *PreloadLoad = GlobalMap.lookup(Load))
228     return PreloadLoad;
229 
230   Value *NewPointer =
231       generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
232   Value *ScalarLoad = Builder.CreateAlignedLoad(
233       NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
234 
235   if (DebugPrinting)
236     RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
237                                           ": ", ScalarLoad, "\n");
238 
239   return ScalarLoad;
240 }
241 
242 void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
243                                         ValueMapT &BBMap, LoopToScevMapT &LTS,
244                                         isl_id_to_ast_expr *NewAccesses) {
245   Value *NewPointer =
246       generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
247   Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, LTS,
248                                     getLoopForStmt(Stmt));
249 
250   if (DebugPrinting)
251     RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to  ", NewPointer,
252                                           ": ", ValueOperand, "\n");
253 
254   Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment());
255 }
256 
257 bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
258   Loop *L = getLoopForStmt(Stmt);
259   return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
260          canSynthesize(Inst, *Stmt.getParent(), &SE, L);
261 }
262 
263 void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
264                                      ValueMapT &BBMap, LoopToScevMapT &LTS,
265                                      isl_id_to_ast_expr *NewAccesses) {
266   // Terminator instructions control the control flow. They are explicitly
267   // expressed in the clast and do not need to be copied.
268   if (Inst->isTerminator())
269     return;
270 
271   // Synthesizable statements will be generated on-demand.
272   if (canSyntheziseInStmt(Stmt, Inst))
273     return;
274 
275   if (auto *Load = dyn_cast<LoadInst>(Inst)) {
276     Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
277     // Compute NewLoad before its insertion in BBMap to make the insertion
278     // deterministic.
279     BBMap[Load] = NewLoad;
280     return;
281   }
282 
283   if (auto *Store = dyn_cast<StoreInst>(Inst)) {
284     generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
285     return;
286   }
287 
288   if (auto *PHI = dyn_cast<PHINode>(Inst)) {
289     copyPHIInstruction(Stmt, PHI, BBMap, LTS);
290     return;
291   }
292 
293   // Skip some special intrinsics for which we do not adjust the semantics to
294   // the new schedule. All others are handled like every other instruction.
295   if (isIgnoredIntrinsic(Inst))
296     return;
297 
298   copyInstScalar(Stmt, Inst, BBMap, LTS);
299 }
300 
301 void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
302   auto NewBB = Builder.GetInsertBlock();
303   for (auto I = NewBB->rbegin(); I != NewBB->rend(); I++) {
304     Instruction *NewInst = &*I;
305 
306     if (!isInstructionTriviallyDead(NewInst))
307       continue;
308 
309     for (auto Pair : BBMap)
310       if (Pair.second == NewInst) {
311         BBMap.erase(Pair.first);
312       }
313 
314     NewInst->eraseFromParent();
315     I = NewBB->rbegin();
316   }
317 }
318 
319 void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
320                               isl_id_to_ast_expr *NewAccesses) {
321   assert(Stmt.isBlockStmt() &&
322          "Only block statements can be copied by the block generator");
323 
324   ValueMapT BBMap;
325 
326   BasicBlock *BB = Stmt.getBasicBlock();
327   copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
328   removeDeadInstructions(BB, BBMap);
329 }
330 
331 BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
332   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
333                                   &*Builder.GetInsertPoint(), &DT, &LI);
334   CopyBB->setName("polly.stmt." + BB->getName());
335   return CopyBB;
336 }
337 
338 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
339                                    ValueMapT &BBMap, LoopToScevMapT &LTS,
340                                    isl_id_to_ast_expr *NewAccesses) {
341   BasicBlock *CopyBB = splitBB(BB);
342   Builder.SetInsertPoint(&CopyBB->front());
343   generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
344 
345   copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
346 
347   // After a basic block was copied store all scalars that escape this block in
348   // their alloca.
349   generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
350   return CopyBB;
351 }
352 
353 void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
354                             ValueMapT &BBMap, LoopToScevMapT &LTS,
355                             isl_id_to_ast_expr *NewAccesses) {
356   EntryBB = &CopyBB->getParent()->getEntryBlock();
357 
358   for (Instruction &Inst : *BB)
359     copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
360 }
361 
362 Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
363   assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
364 
365   return getOrCreateAlloca(Access.getLatestScopArrayInfo());
366 }
367 
368 Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
369   assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
370 
371   auto &Addr = ScalarMap[Array];
372 
373   if (Addr) {
374     // Allow allocas to be (temporarily) redirected once by adding a new
375     // old-alloca-addr to new-addr mapping to GlobalMap. This funcitionality
376     // is used for example by the OpenMP code generation where a first use
377     // of a scalar while still in the host code allocates a normal alloca with
378     // getOrCreateAlloca. When the values of this scalar are accessed during
379     // the generation of the parallel subfunction, these values are copied over
380     // to the parallel subfunction and each request for a scalar alloca slot
381     // must be forwared to the temporary in-subfunction slot. This mapping is
382     // removed when the subfunction has been generated and again normal host
383     // code is generated. Due to the following reasons it is not possible to
384     // perform the GlobalMap lookup right after creating the alloca below, but
385     // instead we need to check GlobalMap at each call to getOrCreateAlloca:
386     //
387     //   1) GlobalMap may be changed multiple times (for each parallel loop),
388     //   2) The temporary mapping is commonly only known after the initial
389     //      alloca has already been generated, and
390     //   3) The original alloca value must be restored after leaving the
391     //      sub-function.
392     if (Value *NewAddr = GlobalMap.lookup(&*Addr))
393       return NewAddr;
394     return Addr;
395   }
396 
397   Type *Ty = Array->getElementType();
398   Value *ScalarBase = Array->getBasePtr();
399   std::string NameExt;
400   if (Array->isPHIKind())
401     NameExt = ".phiops";
402   else
403     NameExt = ".s2a";
404 
405   Addr = new AllocaInst(Ty, ScalarBase->getName() + NameExt);
406   EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
407   Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
408 
409   return Addr;
410 }
411 
412 void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
413   Instruction *Inst = cast<Instruction>(Array->getBasePtr());
414 
415   // If there are escape users we get the alloca for this instruction and put it
416   // in the EscapeMap for later finalization. Lastly, if the instruction was
417   // copied multiple times we already did this and can exit.
418   if (EscapeMap.count(Inst))
419     return;
420 
421   EscapeUserVectorTy EscapeUsers;
422   for (User *U : Inst->users()) {
423 
424     // Non-instruction user will never escape.
425     Instruction *UI = dyn_cast<Instruction>(U);
426     if (!UI)
427       continue;
428 
429     if (S.contains(UI))
430       continue;
431 
432     EscapeUsers.push_back(UI);
433   }
434 
435   // Exit if no escape uses were found.
436   if (EscapeUsers.empty())
437     return;
438 
439   // Get or create an escape alloca for this instruction.
440   auto *ScalarAddr = getOrCreateAlloca(Array);
441 
442   // Remember that this instruction has escape uses and the escape alloca.
443   EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
444 }
445 
446 void BlockGenerator::generateScalarLoads(
447     ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
448     __isl_keep isl_id_to_ast_expr *NewAccesses) {
449   for (MemoryAccess *MA : Stmt) {
450     if (MA->isOriginalArrayKind() || MA->isWrite())
451       continue;
452 
453 #ifndef NDEBUG
454     auto *StmtDom = Stmt.getDomain();
455     auto *AccDom = isl_map_domain(MA->getAccessRelation());
456     assert(isl_set_is_subset(StmtDom, AccDom) &&
457            "Scalar must be loaded in all statement instances");
458     isl_set_free(StmtDom);
459     isl_set_free(AccDom);
460 #endif
461 
462     auto *Address =
463         getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
464     assert((!isa<Instruction>(Address) ||
465             DT.dominates(cast<Instruction>(Address)->getParent(),
466                          Builder.GetInsertBlock())) &&
467            "Domination violation");
468     BBMap[MA->getAccessValue()] =
469         Builder.CreateLoad(Address, Address->getName() + ".reload");
470   }
471 }
472 
473 void BlockGenerator::generateScalarStores(
474     ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
475     __isl_keep isl_id_to_ast_expr *NewAccesses) {
476   Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
477 
478   assert(Stmt.isBlockStmt() &&
479          "Region statements need to use the generateScalarStores() function in "
480          "the RegionGenerator");
481 
482   for (MemoryAccess *MA : Stmt) {
483     if (MA->isOriginalArrayKind() || MA->isRead())
484       continue;
485 
486 #ifndef NDEBUG
487     auto *StmtDom = Stmt.getDomain();
488     auto *AccDom = isl_map_domain(MA->getAccessRelation());
489     assert(isl_set_is_subset(StmtDom, AccDom) &&
490            "Scalar must be stored in all statement instances");
491     isl_set_free(StmtDom);
492     isl_set_free(AccDom);
493 #endif
494 
495     Value *Val = MA->getAccessValue();
496     if (MA->isAnyPHIKind()) {
497       assert(MA->getIncoming().size() >= 1 &&
498              "Block statements have exactly one exiting block, or multiple but "
499              "with same incoming block and value");
500       assert(std::all_of(MA->getIncoming().begin(), MA->getIncoming().end(),
501                          [&](std::pair<BasicBlock *, Value *> p) -> bool {
502                            return p.first == Stmt.getBasicBlock();
503                          }) &&
504              "Incoming block must be statement's block");
505       Val = MA->getIncoming()[0].second;
506     }
507     auto Address =
508         getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
509 
510     Val = getNewValue(Stmt, Val, BBMap, LTS, L);
511     assert((!isa<Instruction>(Val) ||
512             DT.dominates(cast<Instruction>(Val)->getParent(),
513                          Builder.GetInsertBlock())) &&
514            "Domination violation");
515     assert((!isa<Instruction>(Address) ||
516             DT.dominates(cast<Instruction>(Address)->getParent(),
517                          Builder.GetInsertBlock())) &&
518            "Domination violation");
519     Builder.CreateStore(Val, Address);
520   }
521 }
522 
523 void BlockGenerator::createScalarInitialization(Scop &S) {
524   BasicBlock *ExitBB = S.getExit();
525   BasicBlock *PreEntryBB = S.getEnteringBlock();
526 
527   Builder.SetInsertPoint(&*StartBlock->begin());
528 
529   for (auto &Array : S.arrays()) {
530     if (Array->getNumberOfDimensions() != 0)
531       continue;
532     if (Array->isPHIKind()) {
533       // For PHI nodes, the only values we need to store are the ones that
534       // reach the PHI node from outside the region. In general there should
535       // only be one such incoming edge and this edge should enter through
536       // 'PreEntryBB'.
537       auto PHI = cast<PHINode>(Array->getBasePtr());
538 
539       for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++)
540         if (!S.contains(*BI) && *BI != PreEntryBB)
541           llvm_unreachable("Incoming edges from outside the scop should always "
542                            "come from PreEntryBB");
543 
544       int Idx = PHI->getBasicBlockIndex(PreEntryBB);
545       if (Idx < 0)
546         continue;
547 
548       Value *ScalarValue = PHI->getIncomingValue(Idx);
549 
550       Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
551       continue;
552     }
553 
554     auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
555 
556     if (Inst && S.contains(Inst))
557       continue;
558 
559     // PHI nodes that are not marked as such in their SAI object are either exit
560     // PHI nodes we model as common scalars but without initialization, or
561     // incoming phi nodes that need to be initialized. Check if the first is the
562     // case for Inst and do not create and initialize memory if so.
563     if (auto *PHI = dyn_cast_or_null<PHINode>(Inst))
564       if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0)
565         continue;
566 
567     Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
568   }
569 }
570 
571 void BlockGenerator::createScalarFinalization(Scop &S) {
572   // The exit block of the __unoptimized__ region.
573   BasicBlock *ExitBB = S.getExitingBlock();
574   // The merge block __just after__ the region and the optimized region.
575   BasicBlock *MergeBB = S.getExit();
576 
577   // The exit block of the __optimized__ region.
578   BasicBlock *OptExitBB = *(pred_begin(MergeBB));
579   if (OptExitBB == ExitBB)
580     OptExitBB = *(++pred_begin(MergeBB));
581 
582   Builder.SetInsertPoint(OptExitBB->getTerminator());
583   for (const auto &EscapeMapping : EscapeMap) {
584     // Extract the escaping instruction and the escaping users as well as the
585     // alloca the instruction was demoted to.
586     Instruction *EscapeInst = EscapeMapping.first;
587     const auto &EscapeMappingValue = EscapeMapping.second;
588     const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
589     Value *ScalarAddr = EscapeMappingValue.first;
590 
591     // Reload the demoted instruction in the optimized version of the SCoP.
592     Value *EscapeInstReload =
593         Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload");
594     EscapeInstReload =
595         Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
596 
597     // Create the merge PHI that merges the optimized and unoptimized version.
598     PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
599                                         EscapeInst->getName() + ".merge");
600     MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
601 
602     // Add the respective values to the merge PHI.
603     MergePHI->addIncoming(EscapeInstReload, OptExitBB);
604     MergePHI->addIncoming(EscapeInst, ExitBB);
605 
606     // The information of scalar evolution about the escaping instruction needs
607     // to be revoked so the new merged instruction will be used.
608     if (SE.isSCEVable(EscapeInst->getType()))
609       SE.forgetValue(EscapeInst);
610 
611     // Replace all uses of the demoted instruction with the merge PHI.
612     for (Instruction *EUser : EscapeUsers)
613       EUser->replaceUsesOfWith(EscapeInst, MergePHI);
614   }
615 }
616 
617 void BlockGenerator::findOutsideUsers(Scop &S) {
618   for (auto &Array : S.arrays()) {
619 
620     if (Array->getNumberOfDimensions() != 0)
621       continue;
622 
623     if (Array->isPHIKind())
624       continue;
625 
626     auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
627 
628     if (!Inst)
629       continue;
630 
631     // Scop invariant hoisting moves some of the base pointers out of the scop.
632     // We can ignore these, as the invariant load hoisting already registers the
633     // relevant outside users.
634     if (!S.contains(Inst))
635       continue;
636 
637     handleOutsideUsers(S, Array);
638   }
639 }
640 
641 void BlockGenerator::createExitPHINodeMerges(Scop &S) {
642   if (S.hasSingleExitEdge())
643     return;
644 
645   auto *ExitBB = S.getExitingBlock();
646   auto *MergeBB = S.getExit();
647   auto *AfterMergeBB = MergeBB->getSingleSuccessor();
648   BasicBlock *OptExitBB = *(pred_begin(MergeBB));
649   if (OptExitBB == ExitBB)
650     OptExitBB = *(++pred_begin(MergeBB));
651 
652   Builder.SetInsertPoint(OptExitBB->getTerminator());
653 
654   for (auto &SAI : S.arrays()) {
655     auto *Val = SAI->getBasePtr();
656 
657     // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
658     // the original PHI's value or the reloaded incoming values from the
659     // generated code. An llvm::Value is merged between the original code's
660     // value or the generated one.
661     if (!SAI->isExitPHIKind())
662       continue;
663 
664     PHINode *PHI = dyn_cast<PHINode>(Val);
665     if (!PHI)
666       continue;
667 
668     if (PHI->getParent() != AfterMergeBB)
669       continue;
670 
671     std::string Name = PHI->getName();
672     Value *ScalarAddr = getOrCreateAlloca(SAI);
673     Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload");
674     Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
675     Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
676     assert((!isa<Instruction>(OriginalValue) ||
677             cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
678            "Original value must no be one we just generated.");
679     auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
680     MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
681     MergePHI->addIncoming(Reload, OptExitBB);
682     MergePHI->addIncoming(OriginalValue, ExitBB);
683     int Idx = PHI->getBasicBlockIndex(MergeBB);
684     PHI->setIncomingValue(Idx, MergePHI);
685   }
686 }
687 
688 void BlockGenerator::invalidateScalarEvolution(Scop &S) {
689   for (auto &Stmt : S)
690     if (Stmt.isCopyStmt())
691       continue;
692     else if (Stmt.isBlockStmt())
693       for (auto &Inst : *Stmt.getBasicBlock())
694         SE.forgetValue(&Inst);
695     else if (Stmt.isRegionStmt())
696       for (auto *BB : Stmt.getRegion()->blocks())
697         for (auto &Inst : *BB)
698           SE.forgetValue(&Inst);
699     else
700       llvm_unreachable("Unexpected statement type found");
701 }
702 
703 void BlockGenerator::finalizeSCoP(Scop &S) {
704   findOutsideUsers(S);
705   createScalarInitialization(S);
706   createExitPHINodeMerges(S);
707   createScalarFinalization(S);
708   invalidateScalarEvolution(S);
709 }
710 
711 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
712                                            std::vector<LoopToScevMapT> &VLTS,
713                                            isl_map *Schedule)
714     : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
715   assert(Schedule && "No statement domain provided");
716 }
717 
718 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
719                                             ValueMapT &VectorMap,
720                                             VectorValueMapT &ScalarMaps,
721                                             Loop *L) {
722   if (Value *NewValue = VectorMap.lookup(Old))
723     return NewValue;
724 
725   int Width = getVectorWidth();
726 
727   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
728 
729   for (int Lane = 0; Lane < Width; Lane++)
730     Vector = Builder.CreateInsertElement(
731         Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
732         Builder.getInt32(Lane));
733 
734   VectorMap[Old] = Vector;
735 
736   return Vector;
737 }
738 
739 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
740   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
741   assert(PointerTy && "PointerType expected");
742 
743   Type *ScalarType = PointerTy->getElementType();
744   VectorType *VectorType = VectorType::get(ScalarType, Width);
745 
746   return PointerType::getUnqual(VectorType);
747 }
748 
749 Value *VectorBlockGenerator::generateStrideOneLoad(
750     ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
751     __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
752   unsigned VectorWidth = getVectorWidth();
753   auto *Pointer = Load->getPointerOperand();
754   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
755   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
756 
757   Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
758                                                VLTS[Offset], NewAccesses);
759   Value *VectorPtr =
760       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
761   LoadInst *VecLoad =
762       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
763   if (!Aligned)
764     VecLoad->setAlignment(8);
765 
766   if (NegativeStride) {
767     SmallVector<Constant *, 16> Indices;
768     for (int i = VectorWidth - 1; i >= 0; i--)
769       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
770     Constant *SV = llvm::ConstantVector::get(Indices);
771     Value *RevVecLoad = Builder.CreateShuffleVector(
772         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
773     return RevVecLoad;
774   }
775 
776   return VecLoad;
777 }
778 
779 Value *VectorBlockGenerator::generateStrideZeroLoad(
780     ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
781     __isl_keep isl_id_to_ast_expr *NewAccesses) {
782   auto *Pointer = Load->getPointerOperand();
783   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
784   Value *NewPointer =
785       generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
786   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
787                                            Load->getName() + "_p_vec_p");
788   LoadInst *ScalarLoad =
789       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
790 
791   if (!Aligned)
792     ScalarLoad->setAlignment(8);
793 
794   Constant *SplatVector = Constant::getNullValue(
795       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
796 
797   Value *VectorLoad = Builder.CreateShuffleVector(
798       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
799   return VectorLoad;
800 }
801 
802 Value *VectorBlockGenerator::generateUnknownStrideLoad(
803     ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
804     __isl_keep isl_id_to_ast_expr *NewAccesses) {
805   int VectorWidth = getVectorWidth();
806   auto *Pointer = Load->getPointerOperand();
807   VectorType *VectorType = VectorType::get(
808       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
809 
810   Value *Vector = UndefValue::get(VectorType);
811 
812   for (int i = 0; i < VectorWidth; i++) {
813     Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
814                                                  VLTS[i], NewAccesses);
815     Value *ScalarLoad =
816         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
817     Vector = Builder.CreateInsertElement(
818         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
819   }
820 
821   return Vector;
822 }
823 
824 void VectorBlockGenerator::generateLoad(
825     ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
826     VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
827   if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
828     VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
829                                                 Load->getName() + "_p");
830     return;
831   }
832 
833   if (!VectorType::isValidElementType(Load->getType())) {
834     for (int i = 0; i < getVectorWidth(); i++)
835       ScalarMaps[i][Load] =
836           generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
837     return;
838   }
839 
840   const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
841 
842   // Make sure we have scalar values available to access the pointer to
843   // the data location.
844   extractScalarValues(Load, VectorMap, ScalarMaps);
845 
846   Value *NewLoad;
847   if (Access.isStrideZero(isl_map_copy(Schedule)))
848     NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
849   else if (Access.isStrideOne(isl_map_copy(Schedule)))
850     NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
851   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
852     NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
853   else
854     NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
855 
856   VectorMap[Load] = NewLoad;
857 }
858 
859 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
860                                          ValueMapT &VectorMap,
861                                          VectorValueMapT &ScalarMaps) {
862   int VectorWidth = getVectorWidth();
863   Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
864                                      ScalarMaps, getLoopForStmt(Stmt));
865 
866   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
867 
868   const CastInst *Cast = dyn_cast<CastInst>(Inst);
869   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
870   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
871 }
872 
873 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
874                                           ValueMapT &VectorMap,
875                                           VectorValueMapT &ScalarMaps) {
876   Loop *L = getLoopForStmt(Stmt);
877   Value *OpZero = Inst->getOperand(0);
878   Value *OpOne = Inst->getOperand(1);
879 
880   Value *NewOpZero, *NewOpOne;
881   NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
882   NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
883 
884   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
885                                        Inst->getName() + "p_vec");
886   VectorMap[Inst] = NewInst;
887 }
888 
889 void VectorBlockGenerator::copyStore(
890     ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
891     VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
892   const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
893 
894   auto *Pointer = Store->getPointerOperand();
895   Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
896                                  ScalarMaps, getLoopForStmt(Stmt));
897 
898   // Make sure we have scalar values available to access the pointer to
899   // the data location.
900   extractScalarValues(Store, VectorMap, ScalarMaps);
901 
902   if (Access.isStrideOne(isl_map_copy(Schedule))) {
903     Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
904     Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
905                                                  VLTS[0], NewAccesses);
906 
907     Value *VectorPtr =
908         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
909     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
910 
911     if (!Aligned)
912       Store->setAlignment(8);
913   } else {
914     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
915       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
916       Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
917                                                    VLTS[i], NewAccesses);
918       Builder.CreateStore(Scalar, NewPointer);
919     }
920   }
921 }
922 
923 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
924                                              ValueMapT &VectorMap) {
925   for (Value *Operand : Inst->operands())
926     if (VectorMap.count(Operand))
927       return true;
928   return false;
929 }
930 
931 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
932                                                ValueMapT &VectorMap,
933                                                VectorValueMapT &ScalarMaps) {
934   bool HasVectorOperand = false;
935   int VectorWidth = getVectorWidth();
936 
937   for (Value *Operand : Inst->operands()) {
938     ValueMapT::iterator VecOp = VectorMap.find(Operand);
939 
940     if (VecOp == VectorMap.end())
941       continue;
942 
943     HasVectorOperand = true;
944     Value *NewVector = VecOp->second;
945 
946     for (int i = 0; i < VectorWidth; ++i) {
947       ValueMapT &SM = ScalarMaps[i];
948 
949       // If there is one scalar extracted, all scalar elements should have
950       // already been extracted by the code here. So no need to check for the
951       // existence of all of them.
952       if (SM.count(Operand))
953         break;
954 
955       SM[Operand] =
956           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
957     }
958   }
959 
960   return HasVectorOperand;
961 }
962 
963 void VectorBlockGenerator::copyInstScalarized(
964     ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
965     VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
966   bool HasVectorOperand;
967   int VectorWidth = getVectorWidth();
968 
969   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
970 
971   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
972     BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
973                                     VLTS[VectorLane], NewAccesses);
974 
975   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
976     return;
977 
978   // Make the result available as vector value.
979   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
980   Value *Vector = UndefValue::get(VectorType);
981 
982   for (int i = 0; i < VectorWidth; i++)
983     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
984                                          Builder.getInt32(i));
985 
986   VectorMap[Inst] = Vector;
987 }
988 
989 int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
990 
991 void VectorBlockGenerator::copyInstruction(
992     ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
993     VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
994   // Terminator instructions control the control flow. They are explicitly
995   // expressed in the clast and do not need to be copied.
996   if (Inst->isTerminator())
997     return;
998 
999   if (canSyntheziseInStmt(Stmt, Inst))
1000     return;
1001 
1002   if (auto *Load = dyn_cast<LoadInst>(Inst)) {
1003     generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
1004     return;
1005   }
1006 
1007   if (hasVectorOperands(Inst, VectorMap)) {
1008     if (auto *Store = dyn_cast<StoreInst>(Inst)) {
1009       copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
1010       return;
1011     }
1012 
1013     if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
1014       copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
1015       return;
1016     }
1017 
1018     if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
1019       copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
1020       return;
1021     }
1022 
1023     // Falltrough: We generate scalar instructions, if we don't know how to
1024     // generate vector code.
1025   }
1026 
1027   copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
1028 }
1029 
1030 void VectorBlockGenerator::generateScalarVectorLoads(
1031     ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
1032   for (MemoryAccess *MA : Stmt) {
1033     if (MA->isArrayKind() || MA->isWrite())
1034       continue;
1035 
1036     auto *Address = getOrCreateAlloca(*MA);
1037     Type *VectorPtrType = getVectorPtrTy(Address, 1);
1038     Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
1039                                              Address->getName() + "_p_vec_p");
1040     auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
1041     Constant *SplatVector = Constant::getNullValue(
1042         VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
1043 
1044     Value *VectorVal = Builder.CreateShuffleVector(
1045         Val, Val, SplatVector, Address->getName() + "_p_splat");
1046     VectorBlockMap[MA->getAccessValue()] = VectorVal;
1047   }
1048 }
1049 
1050 void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
1051   for (MemoryAccess *MA : Stmt) {
1052     if (MA->isArrayKind() || MA->isRead())
1053       continue;
1054 
1055     llvm_unreachable("Scalar stores not expected in vector loop");
1056   }
1057 }
1058 
1059 void VectorBlockGenerator::copyStmt(
1060     ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
1061   assert(Stmt.isBlockStmt() &&
1062          "TODO: Only block statements can be copied by the vector block "
1063          "generator");
1064 
1065   BasicBlock *BB = Stmt.getBasicBlock();
1066   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
1067                                   &*Builder.GetInsertPoint(), &DT, &LI);
1068   CopyBB->setName("polly.stmt." + BB->getName());
1069   Builder.SetInsertPoint(&CopyBB->front());
1070 
1071   // Create two maps that store the mapping from the original instructions of
1072   // the old basic block to their copies in the new basic block. Those maps
1073   // are basic block local.
1074   //
1075   // As vector code generation is supported there is one map for scalar values
1076   // and one for vector values.
1077   //
1078   // In case we just do scalar code generation, the vectorMap is not used and
1079   // the scalarMap has just one dimension, which contains the mapping.
1080   //
1081   // In case vector code generation is done, an instruction may either appear
1082   // in the vector map once (as it is calculating >vectorwidth< values at a
1083   // time. Or (if the values are calculated using scalar operations), it
1084   // appears once in every dimension of the scalarMap.
1085   VectorValueMapT ScalarBlockMap(getVectorWidth());
1086   ValueMapT VectorBlockMap;
1087 
1088   generateScalarVectorLoads(Stmt, VectorBlockMap);
1089 
1090   for (Instruction &Inst : *BB)
1091     copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
1092 
1093   verifyNoScalarStores(Stmt);
1094 }
1095 
1096 BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
1097                                              BasicBlock *BBCopy) {
1098 
1099   BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
1100   BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom);
1101 
1102   if (BBCopyIDom)
1103     DT.changeImmediateDominator(BBCopy, BBCopyIDom);
1104 
1105   return BBCopyIDom;
1106 }
1107 
1108 // This is to determine whether an llvm::Value (defined in @p BB) is usable when
1109 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1110 // does not work in cases where the exit block has edges from outside the
1111 // region. In that case the llvm::Value would never be usable in in the exit
1112 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1113 // for the subregion's exiting edges only. We need to determine whether an
1114 // llvm::Value is usable in there. We do this by checking whether it dominates
1115 // all exiting blocks individually.
1116 static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
1117                                       BasicBlock *BB) {
1118   for (auto ExitingBB : predecessors(R->getExit())) {
1119     // Check for non-subregion incoming edges.
1120     if (!R->contains(ExitingBB))
1121       continue;
1122 
1123     if (!DT.dominates(BB, ExitingBB))
1124       return false;
1125   }
1126 
1127   return true;
1128 }
1129 
1130 // Find the direct dominator of the subregion's exit block if the subregion was
1131 // simplified.
1132 static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
1133   BasicBlock *Common = nullptr;
1134   for (auto ExitingBB : predecessors(R->getExit())) {
1135     // Check for non-subregion incoming edges.
1136     if (!R->contains(ExitingBB))
1137       continue;
1138 
1139     // First exiting edge.
1140     if (!Common) {
1141       Common = ExitingBB;
1142       continue;
1143     }
1144 
1145     Common = DT.findNearestCommonDominator(Common, ExitingBB);
1146   }
1147 
1148   assert(Common && R->contains(Common));
1149   return Common;
1150 }
1151 
1152 void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
1153                                isl_id_to_ast_expr *IdToAstExp) {
1154   assert(Stmt.isRegionStmt() &&
1155          "Only region statements can be copied by the region generator");
1156 
1157   // Forget all old mappings.
1158   BlockMap.clear();
1159   RegionMaps.clear();
1160   IncompletePHINodeMap.clear();
1161 
1162   // Collection of all values related to this subregion.
1163   ValueMapT ValueMap;
1164 
1165   // The region represented by the statement.
1166   Region *R = Stmt.getRegion();
1167 
1168   // Create a dedicated entry for the region where we can reload all demoted
1169   // inputs.
1170   BasicBlock *EntryBB = R->getEntry();
1171   BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
1172                                        &*Builder.GetInsertPoint(), &DT, &LI);
1173   EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
1174   Builder.SetInsertPoint(&EntryBBCopy->front());
1175 
1176   ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
1177   generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
1178 
1179   for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
1180     if (!R->contains(*PI))
1181       BlockMap[*PI] = EntryBBCopy;
1182 
1183   // Iterate over all blocks in the region in a breadth-first search.
1184   std::deque<BasicBlock *> Blocks;
1185   SmallSetVector<BasicBlock *, 8> SeenBlocks;
1186   Blocks.push_back(EntryBB);
1187   SeenBlocks.insert(EntryBB);
1188 
1189   while (!Blocks.empty()) {
1190     BasicBlock *BB = Blocks.front();
1191     Blocks.pop_front();
1192 
1193     // First split the block and update dominance information.
1194     BasicBlock *BBCopy = splitBB(BB);
1195     BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
1196 
1197     // Get the mapping for this block and initialize it with either the scalar
1198     // loads from the generated entering block (which dominates all blocks of
1199     // this subregion) or the maps of the immediate dominator, if part of the
1200     // subregion. The latter necessarily includes the former.
1201     ValueMapT *InitBBMap;
1202     if (BBCopyIDom) {
1203       assert(RegionMaps.count(BBCopyIDom));
1204       InitBBMap = &RegionMaps[BBCopyIDom];
1205     } else
1206       InitBBMap = &EntryBBMap;
1207     auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
1208     ValueMapT &RegionMap = Inserted.first->second;
1209 
1210     // Copy the block with the BlockGenerator.
1211     Builder.SetInsertPoint(&BBCopy->front());
1212     copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
1213 
1214     // In order to remap PHI nodes we store also basic block mappings.
1215     BlockMap[BB] = BBCopy;
1216 
1217     // Add values to incomplete PHI nodes waiting for this block to be copied.
1218     for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
1219       addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
1220     IncompletePHINodeMap[BB].clear();
1221 
1222     // And continue with new successors inside the region.
1223     for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
1224       if (R->contains(*SI) && SeenBlocks.insert(*SI))
1225         Blocks.push_back(*SI);
1226 
1227     // Remember value in case it is visible after this subregion.
1228     if (isDominatingSubregionExit(DT, R, BB))
1229       ValueMap.insert(RegionMap.begin(), RegionMap.end());
1230   }
1231 
1232   // Now create a new dedicated region exit block and add it to the region map.
1233   BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
1234                                       &*Builder.GetInsertPoint(), &DT, &LI);
1235   ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
1236   BlockMap[R->getExit()] = ExitBBCopy;
1237 
1238   BasicBlock *ExitDomBBCopy = BlockMap.lookup(findExitDominator(DT, R));
1239   assert(ExitDomBBCopy &&
1240          "Common exit dominator must be within region; at least the entry node "
1241          "must match");
1242   DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
1243 
1244   // As the block generator doesn't handle control flow we need to add the
1245   // region control flow by hand after all blocks have been copied.
1246   for (BasicBlock *BB : SeenBlocks) {
1247 
1248     BasicBlock *BBCopy = BlockMap[BB];
1249     TerminatorInst *TI = BB->getTerminator();
1250     if (isa<UnreachableInst>(TI)) {
1251       while (!BBCopy->empty())
1252         BBCopy->begin()->eraseFromParent();
1253       new UnreachableInst(BBCopy->getContext(), BBCopy);
1254       continue;
1255     }
1256 
1257     Instruction *BICopy = BBCopy->getTerminator();
1258 
1259     ValueMapT &RegionMap = RegionMaps[BBCopy];
1260     RegionMap.insert(BlockMap.begin(), BlockMap.end());
1261 
1262     Builder.SetInsertPoint(BICopy);
1263     copyInstScalar(Stmt, TI, RegionMap, LTS);
1264     BICopy->eraseFromParent();
1265   }
1266 
1267   // Add counting PHI nodes to all loops in the region that can be used as
1268   // replacement for SCEVs refering to the old loop.
1269   for (BasicBlock *BB : SeenBlocks) {
1270     Loop *L = LI.getLoopFor(BB);
1271     if (L == nullptr || L->getHeader() != BB || !R->contains(L))
1272       continue;
1273 
1274     BasicBlock *BBCopy = BlockMap[BB];
1275     Value *NullVal = Builder.getInt32(0);
1276     PHINode *LoopPHI =
1277         PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
1278     Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
1279         LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
1280     LoopPHI->insertBefore(&BBCopy->front());
1281     LoopPHIInc->insertBefore(BBCopy->getTerminator());
1282 
1283     for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
1284       if (!R->contains(PredBB))
1285         continue;
1286       if (L->contains(PredBB))
1287         LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]);
1288       else
1289         LoopPHI->addIncoming(NullVal, BlockMap[PredBB]);
1290     }
1291 
1292     for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
1293       if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
1294         LoopPHI->addIncoming(NullVal, PredBBCopy);
1295 
1296     LTS[L] = SE.getUnknown(LoopPHI);
1297   }
1298 
1299   // Continue generating code in the exit block.
1300   Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
1301 
1302   // Write values visible to other statements.
1303   generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
1304   BlockMap.clear();
1305   RegionMaps.clear();
1306   IncompletePHINodeMap.clear();
1307 }
1308 
1309 PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS,
1310                                        ValueMapT &BBMap, Loop *L) {
1311   ScopStmt *Stmt = MA->getStatement();
1312   Region *SubR = Stmt->getRegion();
1313   auto Incoming = MA->getIncoming();
1314 
1315   PollyIRBuilder::InsertPointGuard IPGuard(Builder);
1316   PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
1317   BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
1318 
1319   // This can happen if the subregion is simplified after the ScopStmts
1320   // have been created; simplification happens as part of CodeGeneration.
1321   if (OrigPHI->getParent() != SubR->getExit()) {
1322     BasicBlock *FormerExit = SubR->getExitingBlock();
1323     if (FormerExit)
1324       NewSubregionExit = BlockMap.lookup(FormerExit);
1325   }
1326 
1327   PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
1328                                     "polly." + OrigPHI->getName(),
1329                                     NewSubregionExit->getFirstNonPHI());
1330 
1331   // Add the incoming values to the PHI.
1332   for (auto &Pair : Incoming) {
1333     BasicBlock *OrigIncomingBlock = Pair.first;
1334     BasicBlock *NewIncomingBlock = BlockMap.lookup(OrigIncomingBlock);
1335     Builder.SetInsertPoint(NewIncomingBlock->getTerminator());
1336     assert(RegionMaps.count(NewIncomingBlock));
1337     ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlock];
1338 
1339     Value *OrigIncomingValue = Pair.second;
1340     Value *NewIncomingValue =
1341         getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
1342     NewPHI->addIncoming(NewIncomingValue, NewIncomingBlock);
1343   }
1344 
1345   return NewPHI;
1346 }
1347 
1348 Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS,
1349                                       ValueMapT &BBMap) {
1350   ScopStmt *Stmt = MA->getStatement();
1351 
1352   // TODO: Add some test cases that ensure this is really the right choice.
1353   Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
1354 
1355   if (MA->isAnyPHIKind()) {
1356     auto Incoming = MA->getIncoming();
1357     assert(!Incoming.empty() &&
1358            "PHI WRITEs must have originate from at least one incoming block");
1359 
1360     // If there is only one incoming value, we do not need to create a PHI.
1361     if (Incoming.size() == 1) {
1362       Value *OldVal = Incoming[0].second;
1363       return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
1364     }
1365 
1366     return buildExitPHI(MA, LTS, BBMap, L);
1367   }
1368 
1369   // MemoryKind::Value accesses leaving the subregion must dominate the exit
1370   // block; just pass the copied value.
1371   Value *OldVal = MA->getAccessValue();
1372   return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
1373 }
1374 
1375 void RegionGenerator::generateScalarStores(
1376     ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
1377     __isl_keep isl_id_to_ast_expr *NewAccesses) {
1378   assert(Stmt.getRegion() &&
1379          "Block statements need to use the generateScalarStores() "
1380          "function in the BlockGenerator");
1381 
1382   for (MemoryAccess *MA : Stmt) {
1383     if (MA->isOriginalArrayKind() || MA->isRead())
1384       continue;
1385 
1386     Value *NewVal = getExitScalar(MA, LTS, BBMap);
1387     Value *Address =
1388         getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
1389     assert((!isa<Instruction>(NewVal) ||
1390             DT.dominates(cast<Instruction>(NewVal)->getParent(),
1391                          Builder.GetInsertBlock())) &&
1392            "Domination violation");
1393     assert((!isa<Instruction>(Address) ||
1394             DT.dominates(cast<Instruction>(Address)->getParent(),
1395                          Builder.GetInsertBlock())) &&
1396            "Domination violation");
1397     Builder.CreateStore(NewVal, Address);
1398   }
1399 }
1400 
1401 void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
1402                                       PHINode *PHICopy, BasicBlock *IncomingBB,
1403                                       LoopToScevMapT &LTS) {
1404   Region *StmtR = Stmt.getRegion();
1405 
1406   // If the incoming block was not yet copied mark this PHI as incomplete.
1407   // Once the block will be copied the incoming value will be added.
1408   BasicBlock *BBCopy = BlockMap[IncomingBB];
1409   if (!BBCopy) {
1410     assert(StmtR->contains(IncomingBB) &&
1411            "Bad incoming block for PHI in non-affine region");
1412     IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
1413     return;
1414   }
1415 
1416   assert(RegionMaps.count(BBCopy) && "Incoming PHI block did not have a BBMap");
1417   ValueMapT &BBCopyMap = RegionMaps[BBCopy];
1418 
1419   Value *OpCopy = nullptr;
1420 
1421   if (StmtR->contains(IncomingBB)) {
1422     Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
1423 
1424     // If the current insert block is different from the PHIs incoming block
1425     // change it, otherwise do not.
1426     auto IP = Builder.GetInsertPoint();
1427     if (IP->getParent() != BBCopy)
1428       Builder.SetInsertPoint(BBCopy->getTerminator());
1429     OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
1430     if (IP->getParent() != BBCopy)
1431       Builder.SetInsertPoint(&*IP);
1432   } else {
1433     // All edges from outside the non-affine region become a single edge
1434     // in the new copy of the non-affine region. Make sure to only add the
1435     // corresponding edge the first time we encounter a basic block from
1436     // outside the non-affine region.
1437     if (PHICopy->getBasicBlockIndex(BBCopy) >= 0)
1438       return;
1439 
1440     // Get the reloaded value.
1441     OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
1442   }
1443 
1444   assert(OpCopy && "Incoming PHI value was not copied properly");
1445   assert(BBCopy && "Incoming PHI block was not copied properly");
1446   PHICopy->addIncoming(OpCopy, BBCopy);
1447 }
1448 
1449 void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
1450                                          ValueMapT &BBMap,
1451                                          LoopToScevMapT &LTS) {
1452   unsigned NumIncoming = PHI->getNumIncomingValues();
1453   PHINode *PHICopy =
1454       Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
1455   PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
1456   BBMap[PHI] = PHICopy;
1457 
1458   for (BasicBlock *IncomingBB : PHI->blocks())
1459     addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
1460 }
1461