1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the IslNodeBuilder, a class to translate an isl AST into
10 // a LLVM-IR AST.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/CodeGen/IslNodeBuilder.h"
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/CodeGeneration.h"
17 #include "polly/CodeGen/IslAst.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
20 #include "polly/CodeGen/LoopGeneratorsKMP.h"
21 #include "polly/CodeGen/RuntimeDebugBuilder.h"
22 #include "polly/Options.h"
23 #include "polly/ScopInfo.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/PostOrderIterator.h"
29 #include "llvm/ADT/SetVector.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/Analysis/RegionInfo.h"
34 #include "llvm/Analysis/ScalarEvolution.h"
35 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/Constant.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/InstrTypes.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
52 #include "isl/aff.h"
53 #include "isl/aff_type.h"
54 #include "isl/ast.h"
55 #include "isl/ast_build.h"
56 #include "isl/isl-noexceptions.h"
57 #include "isl/map.h"
58 #include "isl/set.h"
59 #include "isl/union_map.h"
60 #include "isl/union_set.h"
61 #include "isl/val.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <string>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 using namespace polly;
72 
73 #define DEBUG_TYPE "polly-codegen"
74 
75 STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
76 
77 STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
78 STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
79 STATISTIC(VectorLoops, "Number of generated vector for-loops");
80 STATISTIC(IfConditions, "Number of generated if-conditions");
81 
82 /// OpenMP backend options
83 enum class OpenMPBackend { GNU, LLVM };
84 
85 static cl::opt<bool> PollyGenerateRTCPrint(
86     "polly-codegen-emit-rtc-print",
87     cl::desc("Emit code that prints the runtime check result dynamically."),
88     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
89 
90 // If this option is set we always use the isl AST generator to regenerate
91 // memory accesses. Without this option set we regenerate expressions using the
92 // original SCEV expressions and only generate new expressions in case the
93 // access relation has been changed and consequently must be regenerated.
94 static cl::opt<bool> PollyGenerateExpressions(
95     "polly-codegen-generate-expressions",
96     cl::desc("Generate AST expressions for unmodified and modified accesses"),
97     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
98 
99 static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
100     "polly-target-first-level-cache-line-size",
101     cl::desc("The size of the first level cache line size specified in bytes."),
102     cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
103 
104 static cl::opt<OpenMPBackend> PollyOmpBackend(
105     "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"),
106     cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP"),
107                clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")),
108     cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory));
109 
110 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For,
111                                             ICmpInst::Predicate &Predicate) {
112   isl::ast_expr Cond = For.for_get_cond();
113   isl::ast_expr Iterator = For.for_get_iterator();
114   assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op &&
115          "conditional expression is not an atomic upper bound");
116 
117   isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get());
118 
119   switch (OpType) {
120   case isl_ast_op_le:
121     Predicate = ICmpInst::ICMP_SLE;
122     break;
123   case isl_ast_op_lt:
124     Predicate = ICmpInst::ICMP_SLT;
125     break;
126   default:
127     llvm_unreachable("Unexpected comparison type in loop condition");
128   }
129 
130   isl::ast_expr Arg0 = Cond.get_op_arg(0);
131 
132   assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id &&
133          "conditional expression is not an atomic upper bound");
134 
135   isl::id UBID = Arg0.get_id();
136 
137   assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id &&
138          "Could not get the iterator");
139 
140   isl::id IteratorID = Iterator.get_id();
141 
142   assert(UBID.get() == IteratorID.get() &&
143          "conditional expression is not an atomic upper bound");
144 
145   return Cond.get_op_arg(1);
146 }
147 
148 /// Return true if a return value of Predicate is true for the value represented
149 /// by passed isl_ast_expr_int.
150 static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
151                                isl_bool (*Predicate)(__isl_keep isl_val *)) {
152   if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) {
153     isl_ast_expr_free(Expr);
154     return false;
155   }
156   auto ExprVal = isl_ast_expr_get_val(Expr);
157   isl_ast_expr_free(Expr);
158   if (Predicate(ExprVal) != isl_bool_true) {
159     isl_val_free(ExprVal);
160     return false;
161   }
162   isl_val_free(ExprVal);
163   return true;
164 }
165 
166 int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) {
167   assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for);
168   isl::ast_node Body = For.for_get_body();
169 
170   // First, check if we can actually handle this code.
171   switch (isl_ast_node_get_type(Body.get())) {
172   case isl_ast_node_user:
173     break;
174   case isl_ast_node_block: {
175     isl::ast_node_list List = Body.block_get_children();
176     for (isl::ast_node Node : List) {
177       isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get());
178       if (NodeType != isl_ast_node_user)
179         return -1;
180     }
181     break;
182   }
183   default:
184     return -1;
185   }
186 
187   isl::ast_expr Init = For.for_get_init();
188   if (!checkIslAstExprInt(Init.release(), isl_val_is_zero))
189     return -1;
190   isl::ast_expr Inc = For.for_get_inc();
191   if (!checkIslAstExprInt(Inc.release(), isl_val_is_one))
192     return -1;
193   CmpInst::Predicate Predicate;
194   isl::ast_expr UB = getUpperBound(For, Predicate);
195   if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int)
196     return -1;
197   isl::val UpVal = UB.get_val();
198   int NumberIterations = UpVal.get_num_si();
199   if (NumberIterations < 0)
200     return -1;
201   if (Predicate == CmpInst::ICMP_SLT)
202     return NumberIterations;
203   else
204     return NumberIterations + 1;
205 }
206 
207 /// Extract the values and SCEVs needed to generate code for a block.
208 static int findReferencesInBlock(struct SubtreeReferences &References,
209                                  const ScopStmt *Stmt, BasicBlock *BB) {
210   for (Instruction &Inst : *BB) {
211     // Include invariant loads
212     if (isa<LoadInst>(Inst))
213       if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
214         References.Values.insert(InvariantLoad);
215 
216     for (Value *SrcVal : Inst.operands()) {
217       auto *Scope = References.LI.getLoopFor(BB);
218       if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
219         References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
220         continue;
221       } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
222         References.Values.insert(NewVal);
223     }
224   }
225   return 0;
226 }
227 
228 void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
229                            bool CreateScalarRefs) {
230   auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
231 
232   if (Stmt->isBlockStmt())
233     findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
234   else {
235     assert(Stmt->isRegionStmt() &&
236            "Stmt was neither block nor region statement");
237     for (BasicBlock *BB : Stmt->getRegion()->blocks())
238       findReferencesInBlock(References, Stmt, BB);
239   }
240 
241   for (auto &Access : *Stmt) {
242     if (References.ParamSpace) {
243       isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
244       (*References.ParamSpace) =
245           References.ParamSpace->align_params(ParamSpace);
246     }
247 
248     if (Access->isLatestArrayKind()) {
249       auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr();
250       if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
251         if (Stmt->getParent()->contains(OpInst))
252           continue;
253 
254       References.Values.insert(BasePtr);
255       continue;
256     }
257 
258     if (CreateScalarRefs)
259       References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
260   }
261 }
262 
263 /// Extract the out-of-scop values and SCEVs referenced from a set describing
264 /// a ScopStmt.
265 ///
266 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
267 /// statement and the base pointers of the memory accesses. For scalar
268 /// statements we force the generation of alloca memory locations and list
269 /// these locations in the set of out-of-scop values as well.
270 ///
271 /// @param Set     A set which references the ScopStmt we are interested in.
272 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences
273 ///                structure.
274 static void addReferencesFromStmtSet(isl::set Set,
275                                      struct SubtreeReferences *UserPtr) {
276   isl::id Id = Set.get_tuple_id();
277   auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
278   return addReferencesFromStmt(Stmt, UserPtr);
279 }
280 
281 /// Extract the out-of-scop values and SCEVs referenced from a union set
282 /// referencing multiple ScopStmts.
283 ///
284 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
285 /// statement and the base pointers of the memory accesses. For scalar
286 /// statements we force the generation of alloca memory locations and list
287 /// these locations in the set of out-of-scop values as well.
288 ///
289 /// @param USet       A union set referencing the ScopStmts we are interested
290 ///                   in.
291 /// @param References The SubtreeReferences data structure through which
292 ///                   results are returned and further information is
293 ///                   provided.
294 static void
295 addReferencesFromStmtUnionSet(isl::union_set USet,
296                               struct SubtreeReferences &References) {
297 
298   for (isl::set Set : USet.get_set_list())
299     addReferencesFromStmtSet(Set, &References);
300 }
301 
302 __isl_give isl_union_map *
303 IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
304   return IslAstInfo::getSchedule(For);
305 }
306 
307 void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
308                                             SetVector<Value *> &Values,
309                                             SetVector<const Loop *> &Loops) {
310   SetVector<const SCEV *> SCEVs;
311   struct SubtreeReferences References = {
312       LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
313 
314   for (const auto &I : IDToValue)
315     Values.insert(I.second);
316 
317   // NOTE: this is populated in IslNodeBuilder::addParameters
318   for (const auto &I : OutsideLoopIterations)
319     Values.insert(cast<SCEVUnknown>(I.second)->getValue());
320 
321   isl::union_set Schedule =
322       isl::manage(isl_union_map_domain(getScheduleForAstNode(For)));
323   addReferencesFromStmtUnionSet(Schedule, References);
324 
325   for (const SCEV *Expr : SCEVs) {
326     findValues(Expr, SE, Values);
327     findLoops(Expr, Loops);
328   }
329 
330   Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
331 
332   /// Note: Code generation of induction variables of loops outside Scops
333   ///
334   /// Remove loops that contain the scop or that are part of the scop, as they
335   /// are considered local. This leaves only loops that are before the scop, but
336   /// do not contain the scop itself.
337   /// We ignore loops perfectly contained in the Scop because these are already
338   /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
339   /// whose induction variables are referred to by the Scop, but the Scop is not
340   /// fully contained in these Loops. Since there can be many of these,
341   /// we choose to codegen these on-demand.
342   /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
343   Loops.remove_if([this](const Loop *L) {
344     return S.contains(L) || L->contains(S.getEntry());
345   });
346 
347   // Contains Values that may need to be replaced with other values
348   // due to replacements from the ValueMap. We should make sure
349   // that we return correctly remapped values.
350   // NOTE: this code path is tested by:
351   //     1.  test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
352   //     2.  test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
353   SetVector<Value *> ReplacedValues;
354   for (Value *V : Values) {
355     ReplacedValues.insert(getLatestValue(V));
356   }
357   Values = ReplacedValues;
358 }
359 
360 void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
361   SmallPtrSet<Value *, 5> Inserted;
362 
363   for (const auto &I : IDToValue) {
364     IDToValue[I.first] = NewValues[I.second];
365     Inserted.insert(I.second);
366   }
367 
368   for (const auto &I : NewValues) {
369     if (Inserted.count(I.first))
370       continue;
371 
372     ValueMap[I.first] = I.second;
373   }
374 }
375 
376 Value *IslNodeBuilder::getLatestValue(Value *Original) const {
377   auto It = ValueMap.find(Original);
378   if (It == ValueMap.end())
379     return Original;
380   return It->second;
381 }
382 
383 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
384                                       std::vector<Value *> &IVS,
385                                       __isl_take isl_id *IteratorID,
386                                       __isl_take isl_union_map *Schedule) {
387   isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
388   isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
389   isl_id *Id = isl_ast_expr_get_id(StmtExpr);
390   isl_ast_expr_free(StmtExpr);
391   ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
392   std::vector<LoopToScevMapT> VLTS(IVS.size());
393 
394   isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
395   Schedule = isl_union_map_intersect_domain(Schedule, Domain);
396   isl_map *S = isl_map_from_union_map(Schedule);
397 
398   auto *NewAccesses = createNewAccesses(Stmt, User);
399   createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
400   VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
401   isl_id_to_ast_expr_free(NewAccesses);
402   isl_map_free(S);
403   isl_id_free(Id);
404   isl_ast_node_free(User);
405 }
406 
407 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
408   auto *Id = isl_ast_node_mark_get_id(Node);
409   auto Child = isl_ast_node_mark_get_node(Node);
410   isl_ast_node_free(Node);
411   // If a child node of a 'SIMD mark' is a loop that has a single iteration,
412   // it will be optimized away and we should skip it.
413   if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
414       isl_ast_node_get_type(Child) == isl_ast_node_for) {
415     bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
416     int VectorWidth = getNumberOfIterations(isl::manage_copy(Child));
417     if (Vector && 1 < VectorWidth && VectorWidth <= 16)
418       createForVector(Child, VectorWidth);
419     else
420       createForSequential(isl::manage(Child), true);
421     isl_id_free(Id);
422     return;
423   }
424   if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) {
425     auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
426     Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
427   }
428   create(Child);
429   isl_id_free(Id);
430 }
431 
432 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
433                                      int VectorWidth) {
434   isl_ast_node *Body = isl_ast_node_for_get_body(For);
435   isl_ast_expr *Init = isl_ast_node_for_get_init(For);
436   isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
437   isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
438   isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
439 
440   Value *ValueLB = ExprBuilder.create(Init);
441   Value *ValueInc = ExprBuilder.create(Inc);
442 
443   Type *MaxType = ExprBuilder.getType(Iterator);
444   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
445   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
446 
447   if (MaxType != ValueLB->getType())
448     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
449   if (MaxType != ValueInc->getType())
450     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
451 
452   std::vector<Value *> IVS(VectorWidth);
453   IVS[0] = ValueLB;
454 
455   for (int i = 1; i < VectorWidth; i++)
456     IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
457 
458   isl_union_map *Schedule = getScheduleForAstNode(For);
459   assert(Schedule && "For statement annotation does not contain its schedule");
460 
461   IDToValue[IteratorID] = ValueLB;
462 
463   switch (isl_ast_node_get_type(Body)) {
464   case isl_ast_node_user:
465     createUserVector(Body, IVS, isl_id_copy(IteratorID),
466                      isl_union_map_copy(Schedule));
467     break;
468   case isl_ast_node_block: {
469     isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
470 
471     for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
472       createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
473                        isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
474 
475     isl_ast_node_free(Body);
476     isl_ast_node_list_free(List);
477     break;
478   }
479   default:
480     isl_ast_node_dump(Body);
481     llvm_unreachable("Unhandled isl_ast_node in vectorizer");
482   }
483 
484   IDToValue.erase(IDToValue.find(IteratorID));
485   isl_id_free(IteratorID);
486   isl_union_map_free(Schedule);
487 
488   isl_ast_node_free(For);
489   isl_ast_expr_free(Iterator);
490 
491   VectorLoops++;
492 }
493 
494 /// Restore the initial ordering of dimensions of the band node
495 ///
496 /// In case the band node represents all the dimensions of the iteration
497 /// domain, recreate the band node to restore the initial ordering of the
498 /// dimensions.
499 ///
500 /// @param Node The band node to be modified.
501 /// @return The modified schedule node.
502 static bool IsLoopVectorizerDisabled(isl::ast_node Node) {
503   assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for);
504   auto Body = Node.for_get_body();
505   if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark)
506     return false;
507   auto Id = Body.mark_get_id();
508   if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
509     return true;
510   return false;
511 }
512 
513 void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) {
514   Value *ValueLB, *ValueUB, *ValueInc;
515   Type *MaxType;
516   BasicBlock *ExitBlock;
517   Value *IV;
518   CmpInst::Predicate Predicate;
519 
520   bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For);
521 
522   isl::ast_node Body = For.for_get_body();
523 
524   // isl_ast_node_for_is_degenerate(For)
525   //
526   // TODO: For degenerated loops we could generate a plain assignment.
527   //       However, for now we just reuse the logic for normal loops, which will
528   //       create a loop with a single iteration.
529 
530   isl::ast_expr Init = For.for_get_init();
531   isl::ast_expr Inc = For.for_get_inc();
532   isl::ast_expr Iterator = For.for_get_iterator();
533   isl::id IteratorID = Iterator.get_id();
534   isl::ast_expr UB = getUpperBound(For, Predicate);
535 
536   ValueLB = ExprBuilder.create(Init.release());
537   ValueUB = ExprBuilder.create(UB.release());
538   ValueInc = ExprBuilder.create(Inc.release());
539 
540   MaxType = ExprBuilder.getType(Iterator.get());
541   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
542   MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
543   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
544 
545   if (MaxType != ValueLB->getType())
546     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
547   if (MaxType != ValueUB->getType())
548     ValueUB = Builder.CreateSExt(ValueUB, MaxType);
549   if (MaxType != ValueInc->getType())
550     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
551 
552   // If we can show that LB <Predicate> UB holds at least once, we can
553   // omit the GuardBB in front of the loop.
554   bool UseGuardBB =
555       !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
556   IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
557                   Predicate, &Annotator, MarkParallel, UseGuardBB,
558                   LoopVectorizerDisabled);
559   IDToValue[IteratorID.get()] = IV;
560 
561   create(Body.release());
562 
563   Annotator.popLoop(MarkParallel);
564 
565   IDToValue.erase(IDToValue.find(IteratorID.get()));
566 
567   Builder.SetInsertPoint(&ExitBlock->front());
568 
569   SequentialLoops++;
570 }
571 
572 /// Remove the BBs contained in a (sub)function from the dominator tree.
573 ///
574 /// This function removes the basic blocks that are part of a subfunction from
575 /// the dominator tree. Specifically, when generating code it may happen that at
576 /// some point the code generation continues in a new sub-function (e.g., when
577 /// generating OpenMP code). The basic blocks that are created in this
578 /// sub-function are then still part of the dominator tree of the original
579 /// function, such that the dominator tree reaches over function boundaries.
580 /// This is not only incorrect, but also causes crashes. This function now
581 /// removes from the dominator tree all basic blocks that are dominated (and
582 /// consequently reachable) from the entry block of this (sub)function.
583 ///
584 /// FIXME: A LLVM (function or region) pass should not touch anything outside of
585 /// the function/region it runs on. Hence, the pure need for this function shows
586 /// that we do not comply to this rule. At the moment, this does not cause any
587 /// issues, but we should be aware that such issues may appear. Unfortunately
588 /// the current LLVM pass infrastructure does not allow to make Polly a module
589 /// or call-graph pass to solve this issue, as such a pass would not have access
590 /// to the per-function analyses passes needed by Polly. A future pass manager
591 /// infrastructure is supposed to enable such kind of access possibly allowing
592 /// us to create a cleaner solution here.
593 ///
594 /// FIXME: Instead of adding the dominance information and then dropping it
595 /// later on, we should try to just not add it in the first place. This requires
596 /// some careful testing to make sure this does not break in interaction with
597 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
598 /// which may try to update it.
599 ///
600 /// @param F The function which contains the BBs to removed.
601 /// @param DT The dominator tree from which to remove the BBs.
602 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
603   DomTreeNode *N = DT.getNode(&F->getEntryBlock());
604   std::vector<BasicBlock *> Nodes;
605 
606   // We can only remove an element from the dominator tree, if all its children
607   // have been removed. To ensure this we obtain the list of nodes to remove
608   // using a post-order tree traversal.
609   for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I)
610     Nodes.push_back(I->getBlock());
611 
612   for (BasicBlock *BB : Nodes)
613     DT.eraseNode(BB);
614 }
615 
616 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
617   isl_ast_node *Body;
618   isl_ast_expr *Init, *Inc, *Iterator, *UB;
619   isl_id *IteratorID;
620   Value *ValueLB, *ValueUB, *ValueInc;
621   Type *MaxType;
622   Value *IV;
623   CmpInst::Predicate Predicate;
624 
625   // The preamble of parallel code interacts different than normal code with
626   // e.g., scalar initialization. Therefore, we ensure the parallel code is
627   // separated from the last basic block.
628   BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
629                                  &*Builder.GetInsertPoint(), &DT, &LI);
630   ParBB->setName("polly.parallel.for");
631   Builder.SetInsertPoint(&ParBB->front());
632 
633   Body = isl_ast_node_for_get_body(For);
634   Init = isl_ast_node_for_get_init(For);
635   Inc = isl_ast_node_for_get_inc(For);
636   Iterator = isl_ast_node_for_get_iterator(For);
637   IteratorID = isl_ast_expr_get_id(Iterator);
638   UB = getUpperBound(isl::manage_copy(For), Predicate).release();
639 
640   ValueLB = ExprBuilder.create(Init);
641   ValueUB = ExprBuilder.create(UB);
642   ValueInc = ExprBuilder.create(Inc);
643 
644   // OpenMP always uses SLE. In case the isl generated AST uses a SLT
645   // expression, we need to adjust the loop bound by one.
646   if (Predicate == CmpInst::ICMP_SLT)
647     ValueUB = Builder.CreateAdd(
648         ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
649 
650   MaxType = ExprBuilder.getType(Iterator);
651   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
652   MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
653   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
654 
655   if (MaxType != ValueLB->getType())
656     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
657   if (MaxType != ValueUB->getType())
658     ValueUB = Builder.CreateSExt(ValueUB, MaxType);
659   if (MaxType != ValueInc->getType())
660     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
661 
662   BasicBlock::iterator LoopBody;
663 
664   SetVector<Value *> SubtreeValues;
665   SetVector<const Loop *> Loops;
666 
667   getReferencesInSubtree(For, SubtreeValues, Loops);
668 
669   // Create for all loops we depend on values that contain the current loop
670   // iteration. These values are necessary to generate code for SCEVs that
671   // depend on such loops. As a result we need to pass them to the subfunction.
672   // See [Code generation of induction variables of loops outside Scops]
673   for (const Loop *L : Loops) {
674     Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L);
675     SubtreeValues.insert(LoopInductionVar);
676   }
677 
678   ValueMapT NewValues;
679 
680   std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr;
681 
682   switch (PollyOmpBackend) {
683   case OpenMPBackend::GNU:
684     ParallelLoopGenPtr.reset(
685         new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL));
686     break;
687   case OpenMPBackend::LLVM:
688     ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL));
689     break;
690   }
691 
692   IV = ParallelLoopGenPtr->createParallelLoop(
693       ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody);
694   BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
695   Builder.SetInsertPoint(&*LoopBody);
696 
697   // Remember the parallel subfunction
698   ParallelSubfunctions.push_back(LoopBody->getFunction());
699 
700   // Save the current values.
701   auto ValueMapCopy = ValueMap;
702   IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
703 
704   updateValues(NewValues);
705   IDToValue[IteratorID] = IV;
706 
707   ValueMapT NewValuesReverse;
708 
709   for (auto P : NewValues)
710     NewValuesReverse[P.second] = P.first;
711 
712   Annotator.addAlternativeAliasBases(NewValuesReverse);
713 
714   create(Body);
715 
716   Annotator.resetAlternativeAliasBases();
717   // Restore the original values.
718   ValueMap = ValueMapCopy;
719   IDToValue = IDToValueCopy;
720 
721   Builder.SetInsertPoint(&*AfterLoop);
722   removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
723 
724   for (const Loop *L : Loops)
725     OutsideLoopIterations.erase(L);
726 
727   isl_ast_node_free(For);
728   isl_ast_expr_free(Iterator);
729   isl_id_free(IteratorID);
730 
731   ParallelLoops++;
732 }
733 
734 /// Return whether any of @p Node's statements contain partial accesses.
735 ///
736 /// Partial accesses are not supported by Polly's vector code generator.
737 static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
738   return isl_ast_node_foreach_descendant_top_down(
739              Node,
740              [](isl_ast_node *Node, void *User) -> isl_bool {
741                if (isl_ast_node_get_type(Node) != isl_ast_node_user)
742                  return isl_bool_true;
743 
744                isl::ast_expr Expr =
745                    isl::manage(isl_ast_node_user_get_expr(Node));
746                isl::ast_expr StmtExpr = Expr.get_op_arg(0);
747                isl::id Id = StmtExpr.get_id();
748 
749                ScopStmt *Stmt =
750                    static_cast<ScopStmt *>(isl_id_get_user(Id.get()));
751                isl::set StmtDom = Stmt->getDomain();
752                for (auto *MA : *Stmt) {
753                  if (MA->isLatestPartialAccess())
754                    return isl_bool_error;
755                }
756                return isl_bool_true;
757              },
758              nullptr) == isl_stat_error;
759 }
760 
761 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
762   bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
763 
764   if (Vector && IslAstInfo::isInnermostParallel(For) &&
765       !IslAstInfo::isReductionParallel(For)) {
766     int VectorWidth = getNumberOfIterations(isl::manage_copy(For));
767     if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) {
768       createForVector(For, VectorWidth);
769       return;
770     }
771   }
772 
773   if (IslAstInfo::isExecutedInParallel(For)) {
774     createForParallel(For);
775     return;
776   }
777   bool Parallel =
778       (IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For));
779   createForSequential(isl::manage(For), Parallel);
780 }
781 
782 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
783   isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
784 
785   Function *F = Builder.GetInsertBlock()->getParent();
786   LLVMContext &Context = F->getContext();
787 
788   BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
789                                   &*Builder.GetInsertPoint(), &DT, &LI);
790   CondBB->setName("polly.cond");
791   BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
792   MergeBB->setName("polly.merge");
793   BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
794   BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
795 
796   DT.addNewBlock(ThenBB, CondBB);
797   DT.addNewBlock(ElseBB, CondBB);
798   DT.changeImmediateDominator(MergeBB, CondBB);
799 
800   Loop *L = LI.getLoopFor(CondBB);
801   if (L) {
802     L->addBasicBlockToLoop(ThenBB, LI);
803     L->addBasicBlockToLoop(ElseBB, LI);
804   }
805 
806   CondBB->getTerminator()->eraseFromParent();
807 
808   Builder.SetInsertPoint(CondBB);
809   Value *Predicate = ExprBuilder.create(Cond);
810   Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
811   Builder.SetInsertPoint(ThenBB);
812   Builder.CreateBr(MergeBB);
813   Builder.SetInsertPoint(ElseBB);
814   Builder.CreateBr(MergeBB);
815   Builder.SetInsertPoint(&ThenBB->front());
816 
817   create(isl_ast_node_if_get_then(If));
818 
819   Builder.SetInsertPoint(&ElseBB->front());
820 
821   if (isl_ast_node_if_has_else(If))
822     create(isl_ast_node_if_get_else(If));
823 
824   Builder.SetInsertPoint(&MergeBB->front());
825 
826   isl_ast_node_free(If);
827 
828   IfConditions++;
829 }
830 
831 __isl_give isl_id_to_ast_expr *
832 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
833                                   __isl_keep isl_ast_node *Node) {
834   isl_id_to_ast_expr *NewAccesses =
835       isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0);
836 
837   auto *Build = IslAstInfo::getBuild(Node);
838   assert(Build && "Could not obtain isl_ast_build from user node");
839   Stmt->setAstBuild(isl::manage_copy(Build));
840 
841   for (auto *MA : *Stmt) {
842     if (!MA->hasNewAccessRelation()) {
843       if (PollyGenerateExpressions) {
844         if (!MA->isAffine())
845           continue;
846         if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI())
847           continue;
848 
849         auto *BasePtr =
850             dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
851         if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr))
852           continue;
853       } else {
854         continue;
855       }
856     }
857     assert(MA->isAffine() &&
858            "Only affine memory accesses can be code generated");
859 
860     auto Schedule = isl_ast_build_get_schedule(Build);
861 
862 #ifndef NDEBUG
863     if (MA->isRead()) {
864       auto Dom = Stmt->getDomain().release();
865       auto SchedDom = isl_set_from_union_set(
866           isl_union_map_domain(isl_union_map_copy(Schedule)));
867       auto AccDom = isl_map_domain(MA->getAccessRelation().release());
868       Dom = isl_set_intersect_params(Dom,
869                                      Stmt->getParent()->getContext().release());
870       SchedDom = isl_set_intersect_params(
871           SchedDom, Stmt->getParent()->getContext().release());
872       assert(isl_set_is_subset(SchedDom, AccDom) &&
873              "Access relation not defined on full schedule domain");
874       assert(isl_set_is_subset(Dom, AccDom) &&
875              "Access relation not defined on full domain");
876       isl_set_free(AccDom);
877       isl_set_free(SchedDom);
878       isl_set_free(Dom);
879     }
880 #endif
881 
882     auto PWAccRel =
883         MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release();
884 
885     // isl cannot generate an index expression for access-nothing accesses.
886     isl::set AccDomain =
887         isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel)));
888     isl::set Context = S.getContext();
889     AccDomain = AccDomain.intersect_params(Context);
890     if (AccDomain.is_empty()) {
891       isl_pw_multi_aff_free(PWAccRel);
892       continue;
893     }
894 
895     auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
896     NewAccesses =
897         isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr);
898   }
899 
900   return NewAccesses;
901 }
902 
903 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
904                                          ScopStmt *Stmt, LoopToScevMapT &LTS) {
905   assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
906          "Expression of type 'op' expected");
907   assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
908          "Operation of type 'call' expected");
909   for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) {
910     isl_ast_expr *SubExpr;
911     Value *V;
912 
913     SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
914     V = ExprBuilder.create(SubExpr);
915     ScalarEvolution *SE = Stmt->getParent()->getSE();
916     LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
917   }
918 
919   isl_ast_expr_free(Expr);
920 }
921 
922 void IslNodeBuilder::createSubstitutionsVector(
923     __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
924     std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
925     __isl_take isl_id *IteratorID) {
926   int i = 0;
927 
928   Value *OldValue = IDToValue[IteratorID];
929   for (Value *IV : IVS) {
930     IDToValue[IteratorID] = IV;
931     createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
932     i++;
933   }
934 
935   IDToValue[IteratorID] = OldValue;
936   isl_id_free(IteratorID);
937   isl_ast_expr_free(Expr);
938 }
939 
940 void IslNodeBuilder::generateCopyStmt(
941     ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
942   assert(Stmt->size() == 2);
943   auto ReadAccess = Stmt->begin();
944   auto WriteAccess = ReadAccess++;
945   assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
946   assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
947          "Accesses use the same data type");
948   assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
949   auto *AccessExpr =
950       isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release());
951   auto *LoadValue = ExprBuilder.create(AccessExpr);
952   AccessExpr =
953       isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release());
954   auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
955   Builder.CreateStore(LoadValue, StoreAddr);
956 }
957 
958 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) {
959   assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&
960          "trying to materialize loop induction variable twice");
961   const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
962                                           SE.getUnknown(Builder.getInt64(1)), L,
963                                           SCEV::FlagAnyWrap);
964   Value *V = generateSCEV(OuterLIV);
965   OutsideLoopIterations[L] = SE.getUnknown(V);
966   return V;
967 }
968 
969 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
970   LoopToScevMapT LTS;
971   isl_id *Id;
972   ScopStmt *Stmt;
973 
974   isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
975   isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
976   Id = isl_ast_expr_get_id(StmtExpr);
977   isl_ast_expr_free(StmtExpr);
978 
979   LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
980 
981   Stmt = (ScopStmt *)isl_id_get_user(Id);
982   auto *NewAccesses = createNewAccesses(Stmt, User);
983   if (Stmt->isCopyStmt()) {
984     generateCopyStmt(Stmt, NewAccesses);
985     isl_ast_expr_free(Expr);
986   } else {
987     createSubstitutions(Expr, Stmt, LTS);
988 
989     if (Stmt->isBlockStmt())
990       BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
991     else
992       RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
993   }
994 
995   isl_id_to_ast_expr_free(NewAccesses);
996   isl_ast_node_free(User);
997   isl_id_free(Id);
998 }
999 
1000 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
1001   isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
1002 
1003   for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
1004     create(isl_ast_node_list_get_ast_node(List, i));
1005 
1006   isl_ast_node_free(Block);
1007   isl_ast_node_list_free(List);
1008 }
1009 
1010 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
1011   switch (isl_ast_node_get_type(Node)) {
1012   case isl_ast_node_error:
1013     llvm_unreachable("code generation error");
1014   case isl_ast_node_mark:
1015     createMark(Node);
1016     return;
1017   case isl_ast_node_for:
1018     createFor(Node);
1019     return;
1020   case isl_ast_node_if:
1021     createIf(Node);
1022     return;
1023   case isl_ast_node_user:
1024     createUser(Node);
1025     return;
1026   case isl_ast_node_block:
1027     createBlock(Node);
1028     return;
1029   }
1030 
1031   llvm_unreachable("Unknown isl_ast_node type");
1032 }
1033 
1034 bool IslNodeBuilder::materializeValue(isl_id *Id) {
1035   // If the Id is already mapped, skip it.
1036   if (!IDToValue.count(Id)) {
1037     auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
1038     Value *V = nullptr;
1039 
1040     // Parameters could refer to invariant loads that need to be
1041     // preloaded before we can generate code for the parameter. Thus,
1042     // check if any value referred to in ParamSCEV is an invariant load
1043     // and if so make sure its equivalence class is preloaded.
1044     SetVector<Value *> Values;
1045     findValues(ParamSCEV, SE, Values);
1046     for (auto *Val : Values) {
1047       // Check if the value is an instruction in a dead block within the SCoP
1048       // and if so do not code generate it.
1049       if (auto *Inst = dyn_cast<Instruction>(Val)) {
1050         if (S.contains(Inst)) {
1051           bool IsDead = true;
1052 
1053           // Check for "undef" loads first, then if there is a statement for
1054           // the parent of Inst and lastly if the parent of Inst has an empty
1055           // domain. In the first and last case the instruction is dead but if
1056           // there is a statement or the domain is not empty Inst is not dead.
1057           auto MemInst = MemAccInst::dyn_cast(Inst);
1058           auto Address = MemInst ? MemInst.getPointerOperand() : nullptr;
1059           if (Address && SE.getUnknown(UndefValue::get(Address->getType())) ==
1060                              SE.getPointerBase(SE.getSCEV(Address))) {
1061           } else if (S.getStmtFor(Inst)) {
1062             IsDead = false;
1063           } else {
1064             auto *Domain = S.getDomainConditions(Inst->getParent()).release();
1065             IsDead = isl_set_is_empty(Domain);
1066             isl_set_free(Domain);
1067           }
1068 
1069           if (IsDead) {
1070             V = UndefValue::get(ParamSCEV->getType());
1071             break;
1072           }
1073         }
1074       }
1075 
1076       if (auto *IAClass = S.lookupInvariantEquivClass(Val)) {
1077         // Check if this invariant access class is empty, hence if we never
1078         // actually added a loads instruction to it. In that case it has no
1079         // (meaningful) users and we should not try to code generate it.
1080         if (IAClass->InvariantAccesses.empty())
1081           V = UndefValue::get(ParamSCEV->getType());
1082 
1083         if (!preloadInvariantEquivClass(*IAClass)) {
1084           isl_id_free(Id);
1085           return false;
1086         }
1087       }
1088     }
1089 
1090     V = V ? V : generateSCEV(ParamSCEV);
1091     IDToValue[Id] = V;
1092   }
1093 
1094   isl_id_free(Id);
1095   return true;
1096 }
1097 
1098 bool IslNodeBuilder::materializeParameters(isl_set *Set) {
1099   for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) {
1100     if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
1101       continue;
1102     isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
1103     if (!materializeValue(Id))
1104       return false;
1105   }
1106   return true;
1107 }
1108 
1109 bool IslNodeBuilder::materializeParameters() {
1110   for (const SCEV *Param : S.parameters()) {
1111     isl_id *Id = S.getIdForParam(Param).release();
1112     if (!materializeValue(Id))
1113       return false;
1114   }
1115   return true;
1116 }
1117 
1118 /// Generate the computation of the size of the outermost dimension from the
1119 /// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
1120 /// contains the size of the array.
1121 ///
1122 /// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
1123 /// %desc.dimensionty = type { i64, i64, i64 }
1124 /// @g_arr = global %arrty zeroinitializer, align 32
1125 /// ...
1126 /// %0 = load i64, i64* getelementptr inbounds
1127 ///                       (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
1128 /// %1 = load i64, i64* getelementptr inbounds
1129 ///                      (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
1130 /// %2 = sub nsw i64 %0, %1
1131 /// %size = add nsw i64 %2, 1
1132 static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
1133                                              PollyIRBuilder &Builder,
1134                                              std::string ArrayName) {
1135   assert(GlobalDescriptor && "invalid global descriptor given");
1136 
1137   Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1138                       Builder.getInt64(0), Builder.getInt32(2)};
1139   Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
1140                                             ArrayName + "_end_ptr");
1141   Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
1142 
1143   Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1144                         Builder.getInt64(0), Builder.getInt32(1)};
1145   Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
1146                                               ArrayName + "_begin_ptr");
1147   Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
1148 
1149   Value *size =
1150       Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
1151   Type *endType = dyn_cast<IntegerType>(end->getType());
1152   assert(endType && "expected type of end to be integral");
1153 
1154   size = Builder.CreateNSWAdd(end,
1155                               ConstantInt::get(endType, 1, /* signed = */ true),
1156                               ArrayName + "_size");
1157 
1158   return size;
1159 }
1160 
1161 bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
1162   for (ScopArrayInfo *Array : S.arrays()) {
1163     if (Array->getNumberOfDimensions() == 0)
1164       continue;
1165 
1166     Value *FAD = Array->getFortranArrayDescriptor();
1167     if (!FAD)
1168       continue;
1169 
1170     isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release();
1171     assert(ParametricPwAff && "parametric pw_aff corresponding "
1172                               "to outermost dimension does not "
1173                               "exist");
1174 
1175     isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
1176     isl_pw_aff_free(ParametricPwAff);
1177 
1178     assert(Id && "pw_aff is not parametric");
1179 
1180     if (IDToValue.count(Id)) {
1181       isl_id_free(Id);
1182       continue;
1183     }
1184 
1185     Value *FinalValue =
1186         buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
1187     assert(FinalValue && "unable to build Fortran array "
1188                          "descriptor load of outermost dimension");
1189     IDToValue[Id] = FinalValue;
1190     isl_id_free(Id);
1191   }
1192   return true;
1193 }
1194 
1195 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1196                                               isl_ast_build *Build,
1197                                               Instruction *AccInst) {
1198   isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1199   isl_ast_expr *Access =
1200       isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1201   auto *Address = isl_ast_expr_address_of(Access);
1202   auto *AddressValue = ExprBuilder.create(Address);
1203   Value *PreloadVal;
1204 
1205   // Correct the type as the SAI might have a different type than the user
1206   // expects, especially if the base pointer is a struct.
1207   Type *Ty = AccInst->getType();
1208 
1209   auto *Ptr = AddressValue;
1210   auto Name = Ptr->getName();
1211   auto AS = Ptr->getType()->getPointerAddressSpace();
1212   Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1213   PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1214   if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1215     PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1216 
1217   // TODO: This is only a hot fix for SCoP sequences that use the same load
1218   //       instruction contained and hoisted by one of the SCoPs.
1219   if (SE.isSCEVable(Ty))
1220     SE.forgetValue(AccInst);
1221 
1222   return PreloadVal;
1223 }
1224 
1225 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1226                                             isl_set *Domain) {
1227   isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release());
1228   AccessRange = isl_set_gist_params(AccessRange, S.getContext().release());
1229 
1230   if (!materializeParameters(AccessRange)) {
1231     isl_set_free(AccessRange);
1232     isl_set_free(Domain);
1233     return nullptr;
1234   }
1235 
1236   auto *Build =
1237       isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release()));
1238   isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1239   bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1240   isl_set_free(Universe);
1241 
1242   Instruction *AccInst = MA.getAccessInstruction();
1243   Type *AccInstTy = AccInst->getType();
1244 
1245   Value *PreloadVal = nullptr;
1246   if (AlwaysExecuted) {
1247     PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1248     isl_ast_build_free(Build);
1249     isl_set_free(Domain);
1250     return PreloadVal;
1251   }
1252 
1253   if (!materializeParameters(Domain)) {
1254     isl_ast_build_free(Build);
1255     isl_set_free(AccessRange);
1256     isl_set_free(Domain);
1257     return nullptr;
1258   }
1259 
1260   isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1261   Domain = nullptr;
1262 
1263   ExprBuilder.setTrackOverflow(true);
1264   Value *Cond = ExprBuilder.create(DomainCond);
1265   Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1266                                               "polly.preload.cond.overflown");
1267   Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1268   ExprBuilder.setTrackOverflow(false);
1269 
1270   if (!Cond->getType()->isIntegerTy(1))
1271     Cond = Builder.CreateIsNotNull(Cond);
1272 
1273   BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1274                                   &*Builder.GetInsertPoint(), &DT, &LI);
1275   CondBB->setName("polly.preload.cond");
1276 
1277   BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1278   MergeBB->setName("polly.preload.merge");
1279 
1280   Function *F = Builder.GetInsertBlock()->getParent();
1281   LLVMContext &Context = F->getContext();
1282   BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1283 
1284   DT.addNewBlock(ExecBB, CondBB);
1285   if (Loop *L = LI.getLoopFor(CondBB))
1286     L->addBasicBlockToLoop(ExecBB, LI);
1287 
1288   auto *CondBBTerminator = CondBB->getTerminator();
1289   Builder.SetInsertPoint(CondBBTerminator);
1290   Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1291   CondBBTerminator->eraseFromParent();
1292 
1293   Builder.SetInsertPoint(ExecBB);
1294   Builder.CreateBr(MergeBB);
1295 
1296   Builder.SetInsertPoint(ExecBB->getTerminator());
1297   Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1298   Builder.SetInsertPoint(MergeBB->getTerminator());
1299   auto *MergePHI = Builder.CreatePHI(
1300       AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1301   PreloadVal = MergePHI;
1302 
1303   if (!PreAccInst) {
1304     PreloadVal = nullptr;
1305     PreAccInst = UndefValue::get(AccInstTy);
1306   }
1307 
1308   MergePHI->addIncoming(PreAccInst, ExecBB);
1309   MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1310 
1311   isl_ast_build_free(Build);
1312   return PreloadVal;
1313 }
1314 
1315 bool IslNodeBuilder::preloadInvariantEquivClass(
1316     InvariantEquivClassTy &IAClass) {
1317   // For an equivalence class of invariant loads we pre-load the representing
1318   // element with the unified execution context. However, we have to map all
1319   // elements of the class to the one preloaded load as they are referenced
1320   // during the code generation and therefor need to be mapped.
1321   const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1322   if (MAs.empty())
1323     return true;
1324 
1325   MemoryAccess *MA = MAs.front();
1326   assert(MA->isArrayKind() && MA->isRead());
1327 
1328   // If the access function was already mapped, the preload of this equivalence
1329   // class was triggered earlier already and doesn't need to be done again.
1330   if (ValueMap.count(MA->getAccessInstruction()))
1331     return true;
1332 
1333   // Check for recursion which can be caused by additional constraints, e.g.,
1334   // non-finite loop constraints. In such a case we have to bail out and insert
1335   // a "false" runtime check that will cause the original code to be executed.
1336   auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1337   if (!PreloadedPtrs.insert(PtrId).second)
1338     return false;
1339 
1340   // The execution context of the IAClass.
1341   isl::set &ExecutionCtx = IAClass.ExecutionContext;
1342 
1343   // If the base pointer of this class is dependent on another one we have to
1344   // make sure it was preloaded already.
1345   auto *SAI = MA->getScopArrayInfo();
1346   if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) {
1347     if (!preloadInvariantEquivClass(*BaseIAClass))
1348       return false;
1349 
1350     // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1351     // we need to refine the ExecutionCtx.
1352     isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1353     ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1354   }
1355 
1356   // If the size of a dimension is dependent on another class, make sure it is
1357   // preloaded.
1358   for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) {
1359     const SCEV *Dim = SAI->getDimensionSize(i);
1360     SetVector<Value *> Values;
1361     findValues(Dim, SE, Values);
1362     for (auto *Val : Values) {
1363       if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) {
1364         if (!preloadInvariantEquivClass(*BaseIAClass))
1365           return false;
1366 
1367         // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1368         // and we need to refine the ExecutionCtx.
1369         isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1370         ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1371       }
1372     }
1373   }
1374 
1375   Instruction *AccInst = MA->getAccessInstruction();
1376   Type *AccInstTy = AccInst->getType();
1377 
1378   Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy());
1379   if (!PreloadVal)
1380     return false;
1381 
1382   for (const MemoryAccess *MA : MAs) {
1383     Instruction *MAAccInst = MA->getAccessInstruction();
1384     assert(PreloadVal->getType() == MAAccInst->getType());
1385     ValueMap[MAAccInst] = PreloadVal;
1386   }
1387 
1388   if (SE.isSCEVable(AccInstTy)) {
1389     isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release();
1390     if (ParamId)
1391       IDToValue[ParamId] = PreloadVal;
1392     isl_id_free(ParamId);
1393   }
1394 
1395   BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1396   auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1397                                 AccInst->getName() + ".preload.s2a");
1398   Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1399   Builder.CreateStore(PreloadVal, Alloca);
1400   ValueMapT PreloadedPointer;
1401   PreloadedPointer[PreloadVal] = AccInst;
1402   Annotator.addAlternativeAliasBases(PreloadedPointer);
1403 
1404   for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1405     Value *BasePtr = DerivedSAI->getBasePtr();
1406 
1407     for (const MemoryAccess *MA : MAs) {
1408       // As the derived SAI information is quite coarse, any load from the
1409       // current SAI could be the base pointer of the derived SAI, however we
1410       // should only change the base pointer of the derived SAI if we actually
1411       // preloaded it.
1412       if (BasePtr == MA->getOriginalBaseAddr()) {
1413         assert(BasePtr->getType() == PreloadVal->getType());
1414         DerivedSAI->setBasePtr(PreloadVal);
1415       }
1416 
1417       // For scalar derived SAIs we remap the alloca used for the derived value.
1418       if (BasePtr == MA->getAccessInstruction())
1419         ScalarMap[DerivedSAI] = Alloca;
1420     }
1421   }
1422 
1423   for (const MemoryAccess *MA : MAs) {
1424     Instruction *MAAccInst = MA->getAccessInstruction();
1425     // Use the escape system to get the correct value to users outside the SCoP.
1426     BlockGenerator::EscapeUserVectorTy EscapeUsers;
1427     for (auto *U : MAAccInst->users())
1428       if (Instruction *UI = dyn_cast<Instruction>(U))
1429         if (!S.contains(UI))
1430           EscapeUsers.push_back(UI);
1431 
1432     if (EscapeUsers.empty())
1433       continue;
1434 
1435     EscapeMap[MA->getAccessInstruction()] =
1436         std::make_pair(Alloca, std::move(EscapeUsers));
1437   }
1438 
1439   return true;
1440 }
1441 
1442 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
1443   for (auto &SAI : S.arrays()) {
1444     if (SAI->getBasePtr())
1445       continue;
1446 
1447     assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1448            "The size of the outermost dimension is used to declare newly "
1449            "created arrays that require memory allocation.");
1450 
1451     Type *NewArrayType = nullptr;
1452 
1453     // Get the size of the array = size(dim_1)*...*size(dim_n)
1454     uint64_t ArraySizeInt = 1;
1455     for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) {
1456       auto *DimSize = SAI->getDimensionSize(i);
1457       unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1458                                      ->getAPInt()
1459                                      .getLimitedValue();
1460 
1461       if (!NewArrayType)
1462         NewArrayType = SAI->getElementType();
1463 
1464       NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1465       ArraySizeInt *= UnsignedDimSize;
1466     }
1467 
1468     if (SAI->isOnHeap()) {
1469       LLVMContext &Ctx = NewArrayType->getContext();
1470 
1471       // Get the IntPtrTy from the Datalayout
1472       auto IntPtrTy = DL.getIntPtrType(Ctx);
1473 
1474       // Get the size of the element type in bits
1475       unsigned Size = SAI->getElemSizeInBytes();
1476 
1477       // Insert the malloc call at polly.start
1478       auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
1479       auto *CreatedArray = CallInst::CreateMalloc(
1480           &*InstIt, IntPtrTy, SAI->getElementType(),
1481           ConstantInt::get(Type::getInt64Ty(Ctx), Size),
1482           ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
1483           SAI->getName());
1484 
1485       SAI->setBasePtr(CreatedArray);
1486 
1487       // Insert the free call at polly.exiting
1488       CallInst::CreateFree(CreatedArray,
1489                            std::get<1>(StartExitBlocks)->getTerminator());
1490     } else {
1491       auto InstIt = Builder.GetInsertBlock()
1492                         ->getParent()
1493                         ->getEntryBlock()
1494                         .getTerminator();
1495 
1496       auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1497                                           SAI->getName(), &*InstIt);
1498       CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1499       SAI->setBasePtr(CreatedArray);
1500     }
1501   }
1502 }
1503 
1504 bool IslNodeBuilder::preloadInvariantLoads() {
1505   auto &InvariantEquivClasses = S.getInvariantAccesses();
1506   if (InvariantEquivClasses.empty())
1507     return true;
1508 
1509   BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1510                                      &*Builder.GetInsertPoint(), &DT, &LI);
1511   PreLoadBB->setName("polly.preload.begin");
1512   Builder.SetInsertPoint(&PreLoadBB->front());
1513 
1514   for (auto &IAClass : InvariantEquivClasses)
1515     if (!preloadInvariantEquivClass(IAClass))
1516       return false;
1517 
1518   return true;
1519 }
1520 
1521 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1522   // Materialize values for the parameters of the SCoP.
1523   materializeParameters();
1524 
1525   // materialize the outermost dimension parameters for a Fortran array.
1526   // NOTE: materializeParameters() does not work since it looks through
1527   // the SCEVs. We don't have a corresponding SCEV for the array size
1528   // parameter
1529   materializeFortranArrayOutermostDimension();
1530 
1531   // Generate values for the current loop iteration for all surrounding loops.
1532   //
1533   // We may also reference loops outside of the scop which do not contain the
1534   // scop itself, but as the number of such scops may be arbitrarily large we do
1535   // not generate code for them here, but only at the point of code generation
1536   // where these values are needed.
1537   Loop *L = LI.getLoopFor(S.getEntry());
1538 
1539   while (L != nullptr && S.contains(L))
1540     L = L->getParentLoop();
1541 
1542   while (L != nullptr) {
1543     materializeNonScopLoopInductionVariable(L);
1544     L = L->getParentLoop();
1545   }
1546 
1547   isl_set_free(Context);
1548 }
1549 
1550 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1551   /// We pass the insert location of our Builder, as Polly ensures during IR
1552   /// generation that there is always a valid CFG into which instructions are
1553   /// inserted. As a result, the insertpoint is known to be always followed by a
1554   /// terminator instruction. This means the insert point may be specified by a
1555   /// terminator instruction, but it can never point to an ->end() iterator
1556   /// which does not have a corresponding instruction. Hence, dereferencing
1557   /// the insertpoint to obtain an instruction is known to be save.
1558   ///
1559   /// We also do not need to update the Builder here, as new instructions are
1560   /// always inserted _before_ the given InsertLocation. As a result, the
1561   /// insert location remains valid.
1562   assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1563          "Insert location points after last valid instruction");
1564   Instruction *InsertLocation = &*Builder.GetInsertPoint();
1565   return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1566                        InsertLocation, &ValueMap,
1567                        StartBlock->getSinglePredecessor());
1568 }
1569 
1570 /// The AST expression we generate to perform the run-time check assumes
1571 /// computations on integer types of infinite size. As we only use 64-bit
1572 /// arithmetic we check for overflows, in case of which we set the result
1573 /// of this run-time check to false to be conservatively correct,
1574 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1575   auto ExprBuilder = getExprBuilder();
1576 
1577   // In case the AST expression has integers larger than 64 bit, bail out. The
1578   // resulting LLVM-IR will contain operations on types that use more than 64
1579   // bits. These are -- in case wrapping intrinsics are used -- translated to
1580   // runtime library calls that are not available on all systems (e.g., Android)
1581   // and consequently will result in linker errors.
1582   if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) {
1583     isl_ast_expr_free(Condition);
1584     return Builder.getFalse();
1585   }
1586 
1587   ExprBuilder.setTrackOverflow(true);
1588   Value *RTC = ExprBuilder.create(Condition);
1589   if (!RTC->getType()->isIntegerTy(1))
1590     RTC = Builder.CreateIsNotNull(RTC);
1591   Value *OverflowHappened =
1592       Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1593 
1594   if (PollyGenerateRTCPrint) {
1595     auto *F = Builder.GetInsertBlock()->getParent();
1596     RuntimeDebugBuilder::createCPUPrinter(
1597         Builder,
1598         "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1599             "RTC: ",
1600         RTC, " Overflow: ", OverflowHappened,
1601         "\n"
1602         "  (0 failed, -1 succeeded)\n"
1603         "  (if one or both are 0 falling back to original code, if both are -1 "
1604         "executing Polly code)\n");
1605   }
1606 
1607   RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1608   ExprBuilder.setTrackOverflow(false);
1609 
1610   if (!isa<ConstantInt>(RTC))
1611     VersionedScops++;
1612 
1613   return RTC;
1614 }
1615