1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the IslNodeBuilder, a class to translate an isl AST into
10 // a LLVM-IR AST.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/CodeGen/IslNodeBuilder.h"
15 #include "polly/CodeGen/BlockGenerators.h"
16 #include "polly/CodeGen/CodeGeneration.h"
17 #include "polly/CodeGen/IslAst.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
20 #include "polly/CodeGen/LoopGeneratorsKMP.h"
21 #include "polly/CodeGen/RuntimeDebugBuilder.h"
22 #include "polly/Config/config.h"
23 #include "polly/Options.h"
24 #include "polly/ScopInfo.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLTools.h"
27 #include "polly/Support/SCEVValidator.h"
28 #include "polly/Support/ScopHelper.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/PostOrderIterator.h"
31 #include "llvm/ADT/SetVector.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/ScalarEvolution.h"
37 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
54 #include "isl/aff.h"
55 #include "isl/aff_type.h"
56 #include "isl/ast.h"
57 #include "isl/ast_build.h"
58 #include "isl/isl-noexceptions.h"
59 #include "isl/map.h"
60 #include "isl/set.h"
61 #include "isl/union_map.h"
62 #include "isl/union_set.h"
63 #include "isl/val.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <cstdint>
67 #include <cstring>
68 #include <string>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 using namespace polly;
74 
75 #define DEBUG_TYPE "polly-codegen"
76 
77 STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
78 
79 STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
80 STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
81 STATISTIC(VectorLoops, "Number of generated vector for-loops");
82 STATISTIC(IfConditions, "Number of generated if-conditions");
83 
84 /// OpenMP backend options
85 enum class OpenMPBackend { GNU, LLVM };
86 
87 static cl::opt<bool> PollyGenerateRTCPrint(
88     "polly-codegen-emit-rtc-print",
89     cl::desc("Emit code that prints the runtime check result dynamically."),
90     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
91 
92 // If this option is set we always use the isl AST generator to regenerate
93 // memory accesses. Without this option set we regenerate expressions using the
94 // original SCEV expressions and only generate new expressions in case the
95 // access relation has been changed and consequently must be regenerated.
96 static cl::opt<bool> PollyGenerateExpressions(
97     "polly-codegen-generate-expressions",
98     cl::desc("Generate AST expressions for unmodified and modified accesses"),
99     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
100 
101 static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
102     "polly-target-first-level-cache-line-size",
103     cl::desc("The size of the first level cache line size specified in bytes."),
104     cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
105 
106 static cl::opt<OpenMPBackend> PollyOmpBackend(
107     "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"),
108     cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP"),
109                clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")),
110     cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory));
111 
112 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For,
113                                             ICmpInst::Predicate &Predicate) {
114   isl::ast_expr Cond = For.for_get_cond();
115   isl::ast_expr Iterator = For.for_get_iterator();
116   assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op &&
117          "conditional expression is not an atomic upper bound");
118 
119   isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get());
120 
121   switch (OpType) {
122   case isl_ast_op_le:
123     Predicate = ICmpInst::ICMP_SLE;
124     break;
125   case isl_ast_op_lt:
126     Predicate = ICmpInst::ICMP_SLT;
127     break;
128   default:
129     llvm_unreachable("Unexpected comparison type in loop condition");
130   }
131 
132   isl::ast_expr Arg0 = Cond.get_op_arg(0);
133 
134   assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id &&
135          "conditional expression is not an atomic upper bound");
136 
137   isl::id UBID = Arg0.get_id();
138 
139   assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id &&
140          "Could not get the iterator");
141 
142   isl::id IteratorID = Iterator.get_id();
143 
144   assert(UBID.get() == IteratorID.get() &&
145          "conditional expression is not an atomic upper bound");
146 
147   return Cond.get_op_arg(1);
148 }
149 
150 /// Return true if a return value of Predicate is true for the value represented
151 /// by passed isl_ast_expr_int.
152 static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
153                                isl_bool (*Predicate)(__isl_keep isl_val *)) {
154   if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) {
155     isl_ast_expr_free(Expr);
156     return false;
157   }
158   auto ExprVal = isl_ast_expr_get_val(Expr);
159   isl_ast_expr_free(Expr);
160   if (Predicate(ExprVal) != isl_bool_true) {
161     isl_val_free(ExprVal);
162     return false;
163   }
164   isl_val_free(ExprVal);
165   return true;
166 }
167 
168 int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) {
169   assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for);
170   isl::ast_node Body = For.for_get_body();
171 
172   // First, check if we can actually handle this code.
173   switch (isl_ast_node_get_type(Body.get())) {
174   case isl_ast_node_user:
175     break;
176   case isl_ast_node_block: {
177     isl::ast_node_list List = Body.block_get_children();
178     for (isl::ast_node Node : List) {
179       isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get());
180       if (NodeType != isl_ast_node_user)
181         return -1;
182     }
183     break;
184   }
185   default:
186     return -1;
187   }
188 
189   isl::ast_expr Init = For.for_get_init();
190   if (!checkIslAstExprInt(Init.release(), isl_val_is_zero))
191     return -1;
192   isl::ast_expr Inc = For.for_get_inc();
193   if (!checkIslAstExprInt(Inc.release(), isl_val_is_one))
194     return -1;
195   CmpInst::Predicate Predicate;
196   isl::ast_expr UB = getUpperBound(For, Predicate);
197   if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int)
198     return -1;
199   isl::val UpVal = UB.get_val();
200   int NumberIterations = UpVal.get_num_si();
201   if (NumberIterations < 0)
202     return -1;
203   if (Predicate == CmpInst::ICMP_SLT)
204     return NumberIterations;
205   else
206     return NumberIterations + 1;
207 }
208 
209 /// Extract the values and SCEVs needed to generate code for a block.
210 static int findReferencesInBlock(struct SubtreeReferences &References,
211                                  const ScopStmt *Stmt, BasicBlock *BB) {
212   for (Instruction &Inst : *BB) {
213     // Include invariant loads
214     if (isa<LoadInst>(Inst))
215       if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
216         References.Values.insert(InvariantLoad);
217 
218     for (Value *SrcVal : Inst.operands()) {
219       auto *Scope = References.LI.getLoopFor(BB);
220       if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
221         References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
222         continue;
223       } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
224         References.Values.insert(NewVal);
225     }
226   }
227   return 0;
228 }
229 
230 void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
231                            bool CreateScalarRefs) {
232   auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
233 
234   if (Stmt->isBlockStmt())
235     findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
236   else {
237     assert(Stmt->isRegionStmt() &&
238            "Stmt was neither block nor region statement");
239     for (BasicBlock *BB : Stmt->getRegion()->blocks())
240       findReferencesInBlock(References, Stmt, BB);
241   }
242 
243   for (auto &Access : *Stmt) {
244     if (References.ParamSpace) {
245       isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
246       (*References.ParamSpace) =
247           References.ParamSpace->align_params(ParamSpace);
248     }
249 
250     if (Access->isLatestArrayKind()) {
251       auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr();
252       if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
253         if (Stmt->getParent()->contains(OpInst))
254           continue;
255 
256       References.Values.insert(BasePtr);
257       continue;
258     }
259 
260     if (CreateScalarRefs)
261       References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
262   }
263 }
264 
265 /// Extract the out-of-scop values and SCEVs referenced from a set describing
266 /// a ScopStmt.
267 ///
268 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
269 /// statement and the base pointers of the memory accesses. For scalar
270 /// statements we force the generation of alloca memory locations and list
271 /// these locations in the set of out-of-scop values as well.
272 ///
273 /// @param Set     A set which references the ScopStmt we are interested in.
274 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences
275 ///                structure.
276 static void addReferencesFromStmtSet(isl::set Set,
277                                      struct SubtreeReferences *UserPtr) {
278   isl::id Id = Set.get_tuple_id();
279   auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
280   return addReferencesFromStmt(Stmt, UserPtr);
281 }
282 
283 /// Extract the out-of-scop values and SCEVs referenced from a union set
284 /// referencing multiple ScopStmts.
285 ///
286 /// This includes the SCEVUnknowns referenced by the SCEVs used in the
287 /// statement and the base pointers of the memory accesses. For scalar
288 /// statements we force the generation of alloca memory locations and list
289 /// these locations in the set of out-of-scop values as well.
290 ///
291 /// @param USet       A union set referencing the ScopStmts we are interested
292 ///                   in.
293 /// @param References The SubtreeReferences data structure through which
294 ///                   results are returned and further information is
295 ///                   provided.
296 static void
297 addReferencesFromStmtUnionSet(isl::union_set USet,
298                               struct SubtreeReferences &References) {
299 
300   for (isl::set Set : USet.get_set_list())
301     addReferencesFromStmtSet(Set, &References);
302 }
303 
304 __isl_give isl_union_map *
305 IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
306   return IslAstInfo::getSchedule(For);
307 }
308 
309 void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
310                                             SetVector<Value *> &Values,
311                                             SetVector<const Loop *> &Loops) {
312   SetVector<const SCEV *> SCEVs;
313   struct SubtreeReferences References = {
314       LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
315 
316   for (const auto &I : IDToValue)
317     Values.insert(I.second);
318 
319   // NOTE: this is populated in IslNodeBuilder::addParameters
320   for (const auto &I : OutsideLoopIterations)
321     Values.insert(cast<SCEVUnknown>(I.second)->getValue());
322 
323   isl::union_set Schedule =
324       isl::manage(isl_union_map_domain(getScheduleForAstNode(For)));
325   addReferencesFromStmtUnionSet(Schedule, References);
326 
327   for (const SCEV *Expr : SCEVs) {
328     findValues(Expr, SE, Values);
329     findLoops(Expr, Loops);
330   }
331 
332   Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
333 
334   /// Note: Code generation of induction variables of loops outside Scops
335   ///
336   /// Remove loops that contain the scop or that are part of the scop, as they
337   /// are considered local. This leaves only loops that are before the scop, but
338   /// do not contain the scop itself.
339   /// We ignore loops perfectly contained in the Scop because these are already
340   /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
341   /// whose induction variables are referred to by the Scop, but the Scop is not
342   /// fully contained in these Loops. Since there can be many of these,
343   /// we choose to codegen these on-demand.
344   /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
345   Loops.remove_if([this](const Loop *L) {
346     return S.contains(L) || L->contains(S.getEntry());
347   });
348 
349   // Contains Values that may need to be replaced with other values
350   // due to replacements from the ValueMap. We should make sure
351   // that we return correctly remapped values.
352   // NOTE: this code path is tested by:
353   //     1.  test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
354   //     2.  test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
355   SetVector<Value *> ReplacedValues;
356   for (Value *V : Values) {
357     ReplacedValues.insert(getLatestValue(V));
358   }
359   Values = ReplacedValues;
360 }
361 
362 void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
363   SmallPtrSet<Value *, 5> Inserted;
364 
365   for (const auto &I : IDToValue) {
366     IDToValue[I.first] = NewValues[I.second];
367     Inserted.insert(I.second);
368   }
369 
370   for (const auto &I : NewValues) {
371     if (Inserted.count(I.first))
372       continue;
373 
374     ValueMap[I.first] = I.second;
375   }
376 }
377 
378 Value *IslNodeBuilder::getLatestValue(Value *Original) const {
379   auto It = ValueMap.find(Original);
380   if (It == ValueMap.end())
381     return Original;
382   return It->second;
383 }
384 
385 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
386                                       std::vector<Value *> &IVS,
387                                       __isl_take isl_id *IteratorID,
388                                       __isl_take isl_union_map *Schedule) {
389   isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
390   isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
391   isl_id *Id = isl_ast_expr_get_id(StmtExpr);
392   isl_ast_expr_free(StmtExpr);
393   ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
394   std::vector<LoopToScevMapT> VLTS(IVS.size());
395 
396   isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
397   Schedule = isl_union_map_intersect_domain(Schedule, Domain);
398   isl_map *S = isl_map_from_union_map(Schedule);
399 
400   auto *NewAccesses = createNewAccesses(Stmt, User);
401   createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
402   VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
403   isl_id_to_ast_expr_free(NewAccesses);
404   isl_map_free(S);
405   isl_id_free(Id);
406   isl_ast_node_free(User);
407 }
408 
409 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
410   auto *Id = isl_ast_node_mark_get_id(Node);
411   auto Child = isl_ast_node_mark_get_node(Node);
412   isl_ast_node_free(Node);
413   // If a child node of a 'SIMD mark' is a loop that has a single iteration,
414   // it will be optimized away and we should skip it.
415   if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
416       isl_ast_node_get_type(Child) == isl_ast_node_for) {
417     bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
418     int VectorWidth = getNumberOfIterations(isl::manage_copy(Child));
419     if (Vector && 1 < VectorWidth && VectorWidth <= 16)
420       createForVector(Child, VectorWidth);
421     else
422       createForSequential(isl::manage(Child), true);
423     isl_id_free(Id);
424     return;
425   }
426   if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) {
427     auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
428     Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
429   }
430   create(Child);
431   isl_id_free(Id);
432 }
433 
434 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
435                                      int VectorWidth) {
436   isl_ast_node *Body = isl_ast_node_for_get_body(For);
437   isl_ast_expr *Init = isl_ast_node_for_get_init(For);
438   isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
439   isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
440   isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
441 
442   Value *ValueLB = ExprBuilder.create(Init);
443   Value *ValueInc = ExprBuilder.create(Inc);
444 
445   Type *MaxType = ExprBuilder.getType(Iterator);
446   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
447   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
448 
449   if (MaxType != ValueLB->getType())
450     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
451   if (MaxType != ValueInc->getType())
452     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
453 
454   std::vector<Value *> IVS(VectorWidth);
455   IVS[0] = ValueLB;
456 
457   for (int i = 1; i < VectorWidth; i++)
458     IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
459 
460   isl_union_map *Schedule = getScheduleForAstNode(For);
461   assert(Schedule && "For statement annotation does not contain its schedule");
462 
463   IDToValue[IteratorID] = ValueLB;
464 
465   switch (isl_ast_node_get_type(Body)) {
466   case isl_ast_node_user:
467     createUserVector(Body, IVS, isl_id_copy(IteratorID),
468                      isl_union_map_copy(Schedule));
469     break;
470   case isl_ast_node_block: {
471     isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
472 
473     for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
474       createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
475                        isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
476 
477     isl_ast_node_free(Body);
478     isl_ast_node_list_free(List);
479     break;
480   }
481   default:
482     isl_ast_node_dump(Body);
483     llvm_unreachable("Unhandled isl_ast_node in vectorizer");
484   }
485 
486   IDToValue.erase(IDToValue.find(IteratorID));
487   isl_id_free(IteratorID);
488   isl_union_map_free(Schedule);
489 
490   isl_ast_node_free(For);
491   isl_ast_expr_free(Iterator);
492 
493   VectorLoops++;
494 }
495 
496 /// Restore the initial ordering of dimensions of the band node
497 ///
498 /// In case the band node represents all the dimensions of the iteration
499 /// domain, recreate the band node to restore the initial ordering of the
500 /// dimensions.
501 ///
502 /// @param Node The band node to be modified.
503 /// @return The modified schedule node.
504 static bool IsLoopVectorizerDisabled(isl::ast_node Node) {
505   assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for);
506   auto Body = Node.for_get_body();
507   if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark)
508     return false;
509   auto Id = Body.mark_get_id();
510   if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
511     return true;
512   return false;
513 }
514 
515 void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) {
516   Value *ValueLB, *ValueUB, *ValueInc;
517   Type *MaxType;
518   BasicBlock *ExitBlock;
519   Value *IV;
520   CmpInst::Predicate Predicate;
521 
522   bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For);
523 
524   isl::ast_node Body = For.for_get_body();
525 
526   // isl_ast_node_for_is_degenerate(For)
527   //
528   // TODO: For degenerated loops we could generate a plain assignment.
529   //       However, for now we just reuse the logic for normal loops, which will
530   //       create a loop with a single iteration.
531 
532   isl::ast_expr Init = For.for_get_init();
533   isl::ast_expr Inc = For.for_get_inc();
534   isl::ast_expr Iterator = For.for_get_iterator();
535   isl::id IteratorID = Iterator.get_id();
536   isl::ast_expr UB = getUpperBound(For, Predicate);
537 
538   ValueLB = ExprBuilder.create(Init.release());
539   ValueUB = ExprBuilder.create(UB.release());
540   ValueInc = ExprBuilder.create(Inc.release());
541 
542   MaxType = ExprBuilder.getType(Iterator.get());
543   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
544   MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
545   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
546 
547   if (MaxType != ValueLB->getType())
548     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
549   if (MaxType != ValueUB->getType())
550     ValueUB = Builder.CreateSExt(ValueUB, MaxType);
551   if (MaxType != ValueInc->getType())
552     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
553 
554   // If we can show that LB <Predicate> UB holds at least once, we can
555   // omit the GuardBB in front of the loop.
556   bool UseGuardBB =
557       !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
558   IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
559                   Predicate, &Annotator, MarkParallel, UseGuardBB,
560                   LoopVectorizerDisabled);
561   IDToValue[IteratorID.get()] = IV;
562 
563   create(Body.release());
564 
565   Annotator.popLoop(MarkParallel);
566 
567   IDToValue.erase(IDToValue.find(IteratorID.get()));
568 
569   Builder.SetInsertPoint(&ExitBlock->front());
570 
571   SequentialLoops++;
572 }
573 
574 /// Remove the BBs contained in a (sub)function from the dominator tree.
575 ///
576 /// This function removes the basic blocks that are part of a subfunction from
577 /// the dominator tree. Specifically, when generating code it may happen that at
578 /// some point the code generation continues in a new sub-function (e.g., when
579 /// generating OpenMP code). The basic blocks that are created in this
580 /// sub-function are then still part of the dominator tree of the original
581 /// function, such that the dominator tree reaches over function boundaries.
582 /// This is not only incorrect, but also causes crashes. This function now
583 /// removes from the dominator tree all basic blocks that are dominated (and
584 /// consequently reachable) from the entry block of this (sub)function.
585 ///
586 /// FIXME: A LLVM (function or region) pass should not touch anything outside of
587 /// the function/region it runs on. Hence, the pure need for this function shows
588 /// that we do not comply to this rule. At the moment, this does not cause any
589 /// issues, but we should be aware that such issues may appear. Unfortunately
590 /// the current LLVM pass infrastructure does not allow to make Polly a module
591 /// or call-graph pass to solve this issue, as such a pass would not have access
592 /// to the per-function analyses passes needed by Polly. A future pass manager
593 /// infrastructure is supposed to enable such kind of access possibly allowing
594 /// us to create a cleaner solution here.
595 ///
596 /// FIXME: Instead of adding the dominance information and then dropping it
597 /// later on, we should try to just not add it in the first place. This requires
598 /// some careful testing to make sure this does not break in interaction with
599 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
600 /// which may try to update it.
601 ///
602 /// @param F The function which contains the BBs to removed.
603 /// @param DT The dominator tree from which to remove the BBs.
604 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
605   DomTreeNode *N = DT.getNode(&F->getEntryBlock());
606   std::vector<BasicBlock *> Nodes;
607 
608   // We can only remove an element from the dominator tree, if all its children
609   // have been removed. To ensure this we obtain the list of nodes to remove
610   // using a post-order tree traversal.
611   for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I)
612     Nodes.push_back(I->getBlock());
613 
614   for (BasicBlock *BB : Nodes)
615     DT.eraseNode(BB);
616 }
617 
618 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
619   isl_ast_node *Body;
620   isl_ast_expr *Init, *Inc, *Iterator, *UB;
621   isl_id *IteratorID;
622   Value *ValueLB, *ValueUB, *ValueInc;
623   Type *MaxType;
624   Value *IV;
625   CmpInst::Predicate Predicate;
626 
627   // The preamble of parallel code interacts different than normal code with
628   // e.g., scalar initialization. Therefore, we ensure the parallel code is
629   // separated from the last basic block.
630   BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
631                                  &*Builder.GetInsertPoint(), &DT, &LI);
632   ParBB->setName("polly.parallel.for");
633   Builder.SetInsertPoint(&ParBB->front());
634 
635   Body = isl_ast_node_for_get_body(For);
636   Init = isl_ast_node_for_get_init(For);
637   Inc = isl_ast_node_for_get_inc(For);
638   Iterator = isl_ast_node_for_get_iterator(For);
639   IteratorID = isl_ast_expr_get_id(Iterator);
640   UB = getUpperBound(isl::manage_copy(For), Predicate).release();
641 
642   ValueLB = ExprBuilder.create(Init);
643   ValueUB = ExprBuilder.create(UB);
644   ValueInc = ExprBuilder.create(Inc);
645 
646   // OpenMP always uses SLE. In case the isl generated AST uses a SLT
647   // expression, we need to adjust the loop bound by one.
648   if (Predicate == CmpInst::ICMP_SLT)
649     ValueUB = Builder.CreateAdd(
650         ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
651 
652   MaxType = ExprBuilder.getType(Iterator);
653   MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
654   MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
655   MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
656 
657   if (MaxType != ValueLB->getType())
658     ValueLB = Builder.CreateSExt(ValueLB, MaxType);
659   if (MaxType != ValueUB->getType())
660     ValueUB = Builder.CreateSExt(ValueUB, MaxType);
661   if (MaxType != ValueInc->getType())
662     ValueInc = Builder.CreateSExt(ValueInc, MaxType);
663 
664   BasicBlock::iterator LoopBody;
665 
666   SetVector<Value *> SubtreeValues;
667   SetVector<const Loop *> Loops;
668 
669   getReferencesInSubtree(For, SubtreeValues, Loops);
670 
671   // Create for all loops we depend on values that contain the current loop
672   // iteration. These values are necessary to generate code for SCEVs that
673   // depend on such loops. As a result we need to pass them to the subfunction.
674   // See [Code generation of induction variables of loops outside Scops]
675   for (const Loop *L : Loops) {
676     Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L);
677     SubtreeValues.insert(LoopInductionVar);
678   }
679 
680   ValueMapT NewValues;
681 
682   std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr;
683 
684   switch (PollyOmpBackend) {
685   case OpenMPBackend::GNU:
686     ParallelLoopGenPtr.reset(
687         new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL));
688     break;
689   case OpenMPBackend::LLVM:
690     ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL));
691     break;
692   }
693 
694   IV = ParallelLoopGenPtr->createParallelLoop(
695       ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody);
696   BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
697   Builder.SetInsertPoint(&*LoopBody);
698 
699   // Remember the parallel subfunction
700   ParallelSubfunctions.push_back(LoopBody->getFunction());
701 
702   // Save the current values.
703   auto ValueMapCopy = ValueMap;
704   IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
705 
706   updateValues(NewValues);
707   IDToValue[IteratorID] = IV;
708 
709   ValueMapT NewValuesReverse;
710 
711   for (auto P : NewValues)
712     NewValuesReverse[P.second] = P.first;
713 
714   Annotator.addAlternativeAliasBases(NewValuesReverse);
715 
716   create(Body);
717 
718   Annotator.resetAlternativeAliasBases();
719   // Restore the original values.
720   ValueMap = ValueMapCopy;
721   IDToValue = IDToValueCopy;
722 
723   Builder.SetInsertPoint(&*AfterLoop);
724   removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
725 
726   for (const Loop *L : Loops)
727     OutsideLoopIterations.erase(L);
728 
729   isl_ast_node_free(For);
730   isl_ast_expr_free(Iterator);
731   isl_id_free(IteratorID);
732 
733   ParallelLoops++;
734 }
735 
736 /// Return whether any of @p Node's statements contain partial accesses.
737 ///
738 /// Partial accesses are not supported by Polly's vector code generator.
739 static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
740   return isl_ast_node_foreach_descendant_top_down(
741              Node,
742              [](isl_ast_node *Node, void *User) -> isl_bool {
743                if (isl_ast_node_get_type(Node) != isl_ast_node_user)
744                  return isl_bool_true;
745 
746                isl::ast_expr Expr =
747                    isl::manage(isl_ast_node_user_get_expr(Node));
748                isl::ast_expr StmtExpr = Expr.get_op_arg(0);
749                isl::id Id = StmtExpr.get_id();
750 
751                ScopStmt *Stmt =
752                    static_cast<ScopStmt *>(isl_id_get_user(Id.get()));
753                isl::set StmtDom = Stmt->getDomain();
754                for (auto *MA : *Stmt) {
755                  if (MA->isLatestPartialAccess())
756                    return isl_bool_error;
757                }
758                return isl_bool_true;
759              },
760              nullptr) == isl_stat_error;
761 }
762 
763 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
764   bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
765 
766   if (Vector && IslAstInfo::isInnermostParallel(For) &&
767       !IslAstInfo::isReductionParallel(For)) {
768     int VectorWidth = getNumberOfIterations(isl::manage_copy(For));
769     if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) {
770       createForVector(For, VectorWidth);
771       return;
772     }
773   }
774 
775   if (IslAstInfo::isExecutedInParallel(For)) {
776     createForParallel(For);
777     return;
778   }
779   bool Parallel =
780       (IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For));
781   createForSequential(isl::manage(For), Parallel);
782 }
783 
784 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
785   isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
786 
787   Function *F = Builder.GetInsertBlock()->getParent();
788   LLVMContext &Context = F->getContext();
789 
790   BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
791                                   &*Builder.GetInsertPoint(), &DT, &LI);
792   CondBB->setName("polly.cond");
793   BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
794   MergeBB->setName("polly.merge");
795   BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
796   BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
797 
798   DT.addNewBlock(ThenBB, CondBB);
799   DT.addNewBlock(ElseBB, CondBB);
800   DT.changeImmediateDominator(MergeBB, CondBB);
801 
802   Loop *L = LI.getLoopFor(CondBB);
803   if (L) {
804     L->addBasicBlockToLoop(ThenBB, LI);
805     L->addBasicBlockToLoop(ElseBB, LI);
806   }
807 
808   CondBB->getTerminator()->eraseFromParent();
809 
810   Builder.SetInsertPoint(CondBB);
811   Value *Predicate = ExprBuilder.create(Cond);
812   Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
813   Builder.SetInsertPoint(ThenBB);
814   Builder.CreateBr(MergeBB);
815   Builder.SetInsertPoint(ElseBB);
816   Builder.CreateBr(MergeBB);
817   Builder.SetInsertPoint(&ThenBB->front());
818 
819   create(isl_ast_node_if_get_then(If));
820 
821   Builder.SetInsertPoint(&ElseBB->front());
822 
823   if (isl_ast_node_if_has_else(If))
824     create(isl_ast_node_if_get_else(If));
825 
826   Builder.SetInsertPoint(&MergeBB->front());
827 
828   isl_ast_node_free(If);
829 
830   IfConditions++;
831 }
832 
833 __isl_give isl_id_to_ast_expr *
834 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
835                                   __isl_keep isl_ast_node *Node) {
836   isl_id_to_ast_expr *NewAccesses =
837       isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0);
838 
839   auto *Build = IslAstInfo::getBuild(Node);
840   assert(Build && "Could not obtain isl_ast_build from user node");
841   Stmt->setAstBuild(isl::manage_copy(Build));
842 
843   for (auto *MA : *Stmt) {
844     if (!MA->hasNewAccessRelation()) {
845       if (PollyGenerateExpressions) {
846         if (!MA->isAffine())
847           continue;
848         if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI())
849           continue;
850 
851         auto *BasePtr =
852             dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
853         if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr))
854           continue;
855       } else {
856         continue;
857       }
858     }
859     assert(MA->isAffine() &&
860            "Only affine memory accesses can be code generated");
861 
862     auto Schedule = isl_ast_build_get_schedule(Build);
863 
864 #ifndef NDEBUG
865     if (MA->isRead()) {
866       auto Dom = Stmt->getDomain().release();
867       auto SchedDom = isl_set_from_union_set(
868           isl_union_map_domain(isl_union_map_copy(Schedule)));
869       auto AccDom = isl_map_domain(MA->getAccessRelation().release());
870       Dom = isl_set_intersect_params(Dom,
871                                      Stmt->getParent()->getContext().release());
872       SchedDom = isl_set_intersect_params(
873           SchedDom, Stmt->getParent()->getContext().release());
874       assert(isl_set_is_subset(SchedDom, AccDom) &&
875              "Access relation not defined on full schedule domain");
876       assert(isl_set_is_subset(Dom, AccDom) &&
877              "Access relation not defined on full domain");
878       isl_set_free(AccDom);
879       isl_set_free(SchedDom);
880       isl_set_free(Dom);
881     }
882 #endif
883 
884     auto PWAccRel =
885         MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release();
886 
887     // isl cannot generate an index expression for access-nothing accesses.
888     isl::set AccDomain =
889         isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel)));
890     isl::set Context = S.getContext();
891     AccDomain = AccDomain.intersect_params(Context);
892     if (AccDomain.is_empty()) {
893       isl_pw_multi_aff_free(PWAccRel);
894       continue;
895     }
896 
897     auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
898     NewAccesses =
899         isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr);
900   }
901 
902   return NewAccesses;
903 }
904 
905 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
906                                          ScopStmt *Stmt, LoopToScevMapT &LTS) {
907   assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
908          "Expression of type 'op' expected");
909   assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
910          "Operation of type 'call' expected");
911   for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) {
912     isl_ast_expr *SubExpr;
913     Value *V;
914 
915     SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
916     V = ExprBuilder.create(SubExpr);
917     ScalarEvolution *SE = Stmt->getParent()->getSE();
918     LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
919   }
920 
921   isl_ast_expr_free(Expr);
922 }
923 
924 void IslNodeBuilder::createSubstitutionsVector(
925     __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
926     std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
927     __isl_take isl_id *IteratorID) {
928   int i = 0;
929 
930   Value *OldValue = IDToValue[IteratorID];
931   for (Value *IV : IVS) {
932     IDToValue[IteratorID] = IV;
933     createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
934     i++;
935   }
936 
937   IDToValue[IteratorID] = OldValue;
938   isl_id_free(IteratorID);
939   isl_ast_expr_free(Expr);
940 }
941 
942 void IslNodeBuilder::generateCopyStmt(
943     ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
944   assert(Stmt->size() == 2);
945   auto ReadAccess = Stmt->begin();
946   auto WriteAccess = ReadAccess++;
947   assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
948   assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
949          "Accesses use the same data type");
950   assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
951   auto *AccessExpr =
952       isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release());
953   auto *LoadValue = ExprBuilder.create(AccessExpr);
954   AccessExpr =
955       isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release());
956   auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
957   Builder.CreateStore(LoadValue, StoreAddr);
958 }
959 
960 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) {
961   assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&
962          "trying to materialize loop induction variable twice");
963   const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
964                                           SE.getUnknown(Builder.getInt64(1)), L,
965                                           SCEV::FlagAnyWrap);
966   Value *V = generateSCEV(OuterLIV);
967   OutsideLoopIterations[L] = SE.getUnknown(V);
968   return V;
969 }
970 
971 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
972   LoopToScevMapT LTS;
973   isl_id *Id;
974   ScopStmt *Stmt;
975 
976   isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
977   isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
978   Id = isl_ast_expr_get_id(StmtExpr);
979   isl_ast_expr_free(StmtExpr);
980 
981   LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
982 
983   Stmt = (ScopStmt *)isl_id_get_user(Id);
984   auto *NewAccesses = createNewAccesses(Stmt, User);
985   if (Stmt->isCopyStmt()) {
986     generateCopyStmt(Stmt, NewAccesses);
987     isl_ast_expr_free(Expr);
988   } else {
989     createSubstitutions(Expr, Stmt, LTS);
990 
991     if (Stmt->isBlockStmt())
992       BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
993     else
994       RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
995   }
996 
997   isl_id_to_ast_expr_free(NewAccesses);
998   isl_ast_node_free(User);
999   isl_id_free(Id);
1000 }
1001 
1002 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
1003   isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
1004 
1005   for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
1006     create(isl_ast_node_list_get_ast_node(List, i));
1007 
1008   isl_ast_node_free(Block);
1009   isl_ast_node_list_free(List);
1010 }
1011 
1012 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
1013   switch (isl_ast_node_get_type(Node)) {
1014   case isl_ast_node_error:
1015     llvm_unreachable("code generation error");
1016   case isl_ast_node_mark:
1017     createMark(Node);
1018     return;
1019   case isl_ast_node_for:
1020     createFor(Node);
1021     return;
1022   case isl_ast_node_if:
1023     createIf(Node);
1024     return;
1025   case isl_ast_node_user:
1026     createUser(Node);
1027     return;
1028   case isl_ast_node_block:
1029     createBlock(Node);
1030     return;
1031   }
1032 
1033   llvm_unreachable("Unknown isl_ast_node type");
1034 }
1035 
1036 bool IslNodeBuilder::materializeValue(isl_id *Id) {
1037   // If the Id is already mapped, skip it.
1038   if (!IDToValue.count(Id)) {
1039     auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
1040     Value *V = nullptr;
1041 
1042     // Parameters could refer to invariant loads that need to be
1043     // preloaded before we can generate code for the parameter. Thus,
1044     // check if any value referred to in ParamSCEV is an invariant load
1045     // and if so make sure its equivalence class is preloaded.
1046     SetVector<Value *> Values;
1047     findValues(ParamSCEV, SE, Values);
1048     for (auto *Val : Values) {
1049       // Check if the value is an instruction in a dead block within the SCoP
1050       // and if so do not code generate it.
1051       if (auto *Inst = dyn_cast<Instruction>(Val)) {
1052         if (S.contains(Inst)) {
1053           bool IsDead = true;
1054 
1055           // Check for "undef" loads first, then if there is a statement for
1056           // the parent of Inst and lastly if the parent of Inst has an empty
1057           // domain. In the first and last case the instruction is dead but if
1058           // there is a statement or the domain is not empty Inst is not dead.
1059           auto MemInst = MemAccInst::dyn_cast(Inst);
1060           auto Address = MemInst ? MemInst.getPointerOperand() : nullptr;
1061           if (Address && SE.getUnknown(UndefValue::get(Address->getType())) ==
1062                              SE.getPointerBase(SE.getSCEV(Address))) {
1063           } else if (S.getStmtFor(Inst)) {
1064             IsDead = false;
1065           } else {
1066             auto *Domain = S.getDomainConditions(Inst->getParent()).release();
1067             IsDead = isl_set_is_empty(Domain);
1068             isl_set_free(Domain);
1069           }
1070 
1071           if (IsDead) {
1072             V = UndefValue::get(ParamSCEV->getType());
1073             break;
1074           }
1075         }
1076       }
1077 
1078       if (auto *IAClass = S.lookupInvariantEquivClass(Val)) {
1079         // Check if this invariant access class is empty, hence if we never
1080         // actually added a loads instruction to it. In that case it has no
1081         // (meaningful) users and we should not try to code generate it.
1082         if (IAClass->InvariantAccesses.empty())
1083           V = UndefValue::get(ParamSCEV->getType());
1084 
1085         if (!preloadInvariantEquivClass(*IAClass)) {
1086           isl_id_free(Id);
1087           return false;
1088         }
1089       }
1090     }
1091 
1092     V = V ? V : generateSCEV(ParamSCEV);
1093     IDToValue[Id] = V;
1094   }
1095 
1096   isl_id_free(Id);
1097   return true;
1098 }
1099 
1100 bool IslNodeBuilder::materializeParameters(isl_set *Set) {
1101   for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) {
1102     if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
1103       continue;
1104     isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
1105     if (!materializeValue(Id))
1106       return false;
1107   }
1108   return true;
1109 }
1110 
1111 bool IslNodeBuilder::materializeParameters() {
1112   for (const SCEV *Param : S.parameters()) {
1113     isl_id *Id = S.getIdForParam(Param).release();
1114     if (!materializeValue(Id))
1115       return false;
1116   }
1117   return true;
1118 }
1119 
1120 /// Generate the computation of the size of the outermost dimension from the
1121 /// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
1122 /// contains the size of the array.
1123 ///
1124 /// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
1125 /// %desc.dimensionty = type { i64, i64, i64 }
1126 /// @g_arr = global %arrty zeroinitializer, align 32
1127 /// ...
1128 /// %0 = load i64, i64* getelementptr inbounds
1129 ///                       (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
1130 /// %1 = load i64, i64* getelementptr inbounds
1131 ///                      (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
1132 /// %2 = sub nsw i64 %0, %1
1133 /// %size = add nsw i64 %2, 1
1134 static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
1135                                              PollyIRBuilder &Builder,
1136                                              std::string ArrayName) {
1137   assert(GlobalDescriptor && "invalid global descriptor given");
1138 
1139   Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1140                       Builder.getInt64(0), Builder.getInt32(2)};
1141   Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
1142                                             ArrayName + "_end_ptr");
1143   Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
1144 
1145   Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1146                         Builder.getInt64(0), Builder.getInt32(1)};
1147   Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
1148                                               ArrayName + "_begin_ptr");
1149   Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
1150 
1151   Value *size =
1152       Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
1153   Type *endType = dyn_cast<IntegerType>(end->getType());
1154   assert(endType && "expected type of end to be integral");
1155 
1156   size = Builder.CreateNSWAdd(end,
1157                               ConstantInt::get(endType, 1, /* signed = */ true),
1158                               ArrayName + "_size");
1159 
1160   return size;
1161 }
1162 
1163 bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
1164   for (ScopArrayInfo *Array : S.arrays()) {
1165     if (Array->getNumberOfDimensions() == 0)
1166       continue;
1167 
1168     Value *FAD = Array->getFortranArrayDescriptor();
1169     if (!FAD)
1170       continue;
1171 
1172     isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release();
1173     assert(ParametricPwAff && "parametric pw_aff corresponding "
1174                               "to outermost dimension does not "
1175                               "exist");
1176 
1177     isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
1178     isl_pw_aff_free(ParametricPwAff);
1179 
1180     assert(Id && "pw_aff is not parametric");
1181 
1182     if (IDToValue.count(Id)) {
1183       isl_id_free(Id);
1184       continue;
1185     }
1186 
1187     Value *FinalValue =
1188         buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
1189     assert(FinalValue && "unable to build Fortran array "
1190                          "descriptor load of outermost dimension");
1191     IDToValue[Id] = FinalValue;
1192     isl_id_free(Id);
1193   }
1194   return true;
1195 }
1196 
1197 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1198                                               isl_ast_build *Build,
1199                                               Instruction *AccInst) {
1200   isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1201   isl_ast_expr *Access =
1202       isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1203   auto *Address = isl_ast_expr_address_of(Access);
1204   auto *AddressValue = ExprBuilder.create(Address);
1205   Value *PreloadVal;
1206 
1207   // Correct the type as the SAI might have a different type than the user
1208   // expects, especially if the base pointer is a struct.
1209   Type *Ty = AccInst->getType();
1210 
1211   auto *Ptr = AddressValue;
1212   auto Name = Ptr->getName();
1213   auto AS = Ptr->getType()->getPointerAddressSpace();
1214   Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1215   PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1216   if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1217     PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1218 
1219   // TODO: This is only a hot fix for SCoP sequences that use the same load
1220   //       instruction contained and hoisted by one of the SCoPs.
1221   if (SE.isSCEVable(Ty))
1222     SE.forgetValue(AccInst);
1223 
1224   return PreloadVal;
1225 }
1226 
1227 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1228                                             isl_set *Domain) {
1229   isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release());
1230   AccessRange = isl_set_gist_params(AccessRange, S.getContext().release());
1231 
1232   if (!materializeParameters(AccessRange)) {
1233     isl_set_free(AccessRange);
1234     isl_set_free(Domain);
1235     return nullptr;
1236   }
1237 
1238   auto *Build =
1239       isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release()));
1240   isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1241   bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1242   isl_set_free(Universe);
1243 
1244   Instruction *AccInst = MA.getAccessInstruction();
1245   Type *AccInstTy = AccInst->getType();
1246 
1247   Value *PreloadVal = nullptr;
1248   if (AlwaysExecuted) {
1249     PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1250     isl_ast_build_free(Build);
1251     isl_set_free(Domain);
1252     return PreloadVal;
1253   }
1254 
1255   if (!materializeParameters(Domain)) {
1256     isl_ast_build_free(Build);
1257     isl_set_free(AccessRange);
1258     isl_set_free(Domain);
1259     return nullptr;
1260   }
1261 
1262   isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1263   Domain = nullptr;
1264 
1265   ExprBuilder.setTrackOverflow(true);
1266   Value *Cond = ExprBuilder.create(DomainCond);
1267   Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1268                                               "polly.preload.cond.overflown");
1269   Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1270   ExprBuilder.setTrackOverflow(false);
1271 
1272   if (!Cond->getType()->isIntegerTy(1))
1273     Cond = Builder.CreateIsNotNull(Cond);
1274 
1275   BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1276                                   &*Builder.GetInsertPoint(), &DT, &LI);
1277   CondBB->setName("polly.preload.cond");
1278 
1279   BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1280   MergeBB->setName("polly.preload.merge");
1281 
1282   Function *F = Builder.GetInsertBlock()->getParent();
1283   LLVMContext &Context = F->getContext();
1284   BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1285 
1286   DT.addNewBlock(ExecBB, CondBB);
1287   if (Loop *L = LI.getLoopFor(CondBB))
1288     L->addBasicBlockToLoop(ExecBB, LI);
1289 
1290   auto *CondBBTerminator = CondBB->getTerminator();
1291   Builder.SetInsertPoint(CondBBTerminator);
1292   Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1293   CondBBTerminator->eraseFromParent();
1294 
1295   Builder.SetInsertPoint(ExecBB);
1296   Builder.CreateBr(MergeBB);
1297 
1298   Builder.SetInsertPoint(ExecBB->getTerminator());
1299   Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1300   Builder.SetInsertPoint(MergeBB->getTerminator());
1301   auto *MergePHI = Builder.CreatePHI(
1302       AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1303   PreloadVal = MergePHI;
1304 
1305   if (!PreAccInst) {
1306     PreloadVal = nullptr;
1307     PreAccInst = UndefValue::get(AccInstTy);
1308   }
1309 
1310   MergePHI->addIncoming(PreAccInst, ExecBB);
1311   MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1312 
1313   isl_ast_build_free(Build);
1314   return PreloadVal;
1315 }
1316 
1317 bool IslNodeBuilder::preloadInvariantEquivClass(
1318     InvariantEquivClassTy &IAClass) {
1319   // For an equivalence class of invariant loads we pre-load the representing
1320   // element with the unified execution context. However, we have to map all
1321   // elements of the class to the one preloaded load as they are referenced
1322   // during the code generation and therefor need to be mapped.
1323   const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1324   if (MAs.empty())
1325     return true;
1326 
1327   MemoryAccess *MA = MAs.front();
1328   assert(MA->isArrayKind() && MA->isRead());
1329 
1330   // If the access function was already mapped, the preload of this equivalence
1331   // class was triggered earlier already and doesn't need to be done again.
1332   if (ValueMap.count(MA->getAccessInstruction()))
1333     return true;
1334 
1335   // Check for recursion which can be caused by additional constraints, e.g.,
1336   // non-finite loop constraints. In such a case we have to bail out and insert
1337   // a "false" runtime check that will cause the original code to be executed.
1338   auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1339   if (!PreloadedPtrs.insert(PtrId).second)
1340     return false;
1341 
1342   // The execution context of the IAClass.
1343   isl::set &ExecutionCtx = IAClass.ExecutionContext;
1344 
1345   // If the base pointer of this class is dependent on another one we have to
1346   // make sure it was preloaded already.
1347   auto *SAI = MA->getScopArrayInfo();
1348   if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) {
1349     if (!preloadInvariantEquivClass(*BaseIAClass))
1350       return false;
1351 
1352     // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1353     // we need to refine the ExecutionCtx.
1354     isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1355     ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1356   }
1357 
1358   // If the size of a dimension is dependent on another class, make sure it is
1359   // preloaded.
1360   for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) {
1361     const SCEV *Dim = SAI->getDimensionSize(i);
1362     SetVector<Value *> Values;
1363     findValues(Dim, SE, Values);
1364     for (auto *Val : Values) {
1365       if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) {
1366         if (!preloadInvariantEquivClass(*BaseIAClass))
1367           return false;
1368 
1369         // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1370         // and we need to refine the ExecutionCtx.
1371         isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1372         ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1373       }
1374     }
1375   }
1376 
1377   Instruction *AccInst = MA->getAccessInstruction();
1378   Type *AccInstTy = AccInst->getType();
1379 
1380   Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy());
1381   if (!PreloadVal)
1382     return false;
1383 
1384   for (const MemoryAccess *MA : MAs) {
1385     Instruction *MAAccInst = MA->getAccessInstruction();
1386     assert(PreloadVal->getType() == MAAccInst->getType());
1387     ValueMap[MAAccInst] = PreloadVal;
1388   }
1389 
1390   if (SE.isSCEVable(AccInstTy)) {
1391     isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release();
1392     if (ParamId)
1393       IDToValue[ParamId] = PreloadVal;
1394     isl_id_free(ParamId);
1395   }
1396 
1397   BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1398   auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1399                                 AccInst->getName() + ".preload.s2a");
1400   Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1401   Builder.CreateStore(PreloadVal, Alloca);
1402   ValueMapT PreloadedPointer;
1403   PreloadedPointer[PreloadVal] = AccInst;
1404   Annotator.addAlternativeAliasBases(PreloadedPointer);
1405 
1406   for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1407     Value *BasePtr = DerivedSAI->getBasePtr();
1408 
1409     for (const MemoryAccess *MA : MAs) {
1410       // As the derived SAI information is quite coarse, any load from the
1411       // current SAI could be the base pointer of the derived SAI, however we
1412       // should only change the base pointer of the derived SAI if we actually
1413       // preloaded it.
1414       if (BasePtr == MA->getOriginalBaseAddr()) {
1415         assert(BasePtr->getType() == PreloadVal->getType());
1416         DerivedSAI->setBasePtr(PreloadVal);
1417       }
1418 
1419       // For scalar derived SAIs we remap the alloca used for the derived value.
1420       if (BasePtr == MA->getAccessInstruction())
1421         ScalarMap[DerivedSAI] = Alloca;
1422     }
1423   }
1424 
1425   for (const MemoryAccess *MA : MAs) {
1426     Instruction *MAAccInst = MA->getAccessInstruction();
1427     // Use the escape system to get the correct value to users outside the SCoP.
1428     BlockGenerator::EscapeUserVectorTy EscapeUsers;
1429     for (auto *U : MAAccInst->users())
1430       if (Instruction *UI = dyn_cast<Instruction>(U))
1431         if (!S.contains(UI))
1432           EscapeUsers.push_back(UI);
1433 
1434     if (EscapeUsers.empty())
1435       continue;
1436 
1437     EscapeMap[MA->getAccessInstruction()] =
1438         std::make_pair(Alloca, std::move(EscapeUsers));
1439   }
1440 
1441   return true;
1442 }
1443 
1444 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
1445   for (auto &SAI : S.arrays()) {
1446     if (SAI->getBasePtr())
1447       continue;
1448 
1449     assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1450            "The size of the outermost dimension is used to declare newly "
1451            "created arrays that require memory allocation.");
1452 
1453     Type *NewArrayType = nullptr;
1454 
1455     // Get the size of the array = size(dim_1)*...*size(dim_n)
1456     uint64_t ArraySizeInt = 1;
1457     for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) {
1458       auto *DimSize = SAI->getDimensionSize(i);
1459       unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1460                                      ->getAPInt()
1461                                      .getLimitedValue();
1462 
1463       if (!NewArrayType)
1464         NewArrayType = SAI->getElementType();
1465 
1466       NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1467       ArraySizeInt *= UnsignedDimSize;
1468     }
1469 
1470     if (SAI->isOnHeap()) {
1471       LLVMContext &Ctx = NewArrayType->getContext();
1472 
1473       // Get the IntPtrTy from the Datalayout
1474       auto IntPtrTy = DL.getIntPtrType(Ctx);
1475 
1476       // Get the size of the element type in bits
1477       unsigned Size = SAI->getElemSizeInBytes();
1478 
1479       // Insert the malloc call at polly.start
1480       auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
1481       auto *CreatedArray = CallInst::CreateMalloc(
1482           &*InstIt, IntPtrTy, SAI->getElementType(),
1483           ConstantInt::get(Type::getInt64Ty(Ctx), Size),
1484           ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
1485           SAI->getName());
1486 
1487       SAI->setBasePtr(CreatedArray);
1488 
1489       // Insert the free call at polly.exiting
1490       CallInst::CreateFree(CreatedArray,
1491                            std::get<1>(StartExitBlocks)->getTerminator());
1492     } else {
1493       auto InstIt = Builder.GetInsertBlock()
1494                         ->getParent()
1495                         ->getEntryBlock()
1496                         .getTerminator();
1497 
1498       auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1499                                           SAI->getName(), &*InstIt);
1500       CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1501       SAI->setBasePtr(CreatedArray);
1502     }
1503   }
1504 }
1505 
1506 bool IslNodeBuilder::preloadInvariantLoads() {
1507   auto &InvariantEquivClasses = S.getInvariantAccesses();
1508   if (InvariantEquivClasses.empty())
1509     return true;
1510 
1511   BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1512                                      &*Builder.GetInsertPoint(), &DT, &LI);
1513   PreLoadBB->setName("polly.preload.begin");
1514   Builder.SetInsertPoint(&PreLoadBB->front());
1515 
1516   for (auto &IAClass : InvariantEquivClasses)
1517     if (!preloadInvariantEquivClass(IAClass))
1518       return false;
1519 
1520   return true;
1521 }
1522 
1523 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1524   // Materialize values for the parameters of the SCoP.
1525   materializeParameters();
1526 
1527   // materialize the outermost dimension parameters for a Fortran array.
1528   // NOTE: materializeParameters() does not work since it looks through
1529   // the SCEVs. We don't have a corresponding SCEV for the array size
1530   // parameter
1531   materializeFortranArrayOutermostDimension();
1532 
1533   // Generate values for the current loop iteration for all surrounding loops.
1534   //
1535   // We may also reference loops outside of the scop which do not contain the
1536   // scop itself, but as the number of such scops may be arbitrarily large we do
1537   // not generate code for them here, but only at the point of code generation
1538   // where these values are needed.
1539   Loop *L = LI.getLoopFor(S.getEntry());
1540 
1541   while (L != nullptr && S.contains(L))
1542     L = L->getParentLoop();
1543 
1544   while (L != nullptr) {
1545     materializeNonScopLoopInductionVariable(L);
1546     L = L->getParentLoop();
1547   }
1548 
1549   isl_set_free(Context);
1550 }
1551 
1552 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1553   /// We pass the insert location of our Builder, as Polly ensures during IR
1554   /// generation that there is always a valid CFG into which instructions are
1555   /// inserted. As a result, the insertpoint is known to be always followed by a
1556   /// terminator instruction. This means the insert point may be specified by a
1557   /// terminator instruction, but it can never point to an ->end() iterator
1558   /// which does not have a corresponding instruction. Hence, dereferencing
1559   /// the insertpoint to obtain an instruction is known to be save.
1560   ///
1561   /// We also do not need to update the Builder here, as new instructions are
1562   /// always inserted _before_ the given InsertLocation. As a result, the
1563   /// insert location remains valid.
1564   assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1565          "Insert location points after last valid instruction");
1566   Instruction *InsertLocation = &*Builder.GetInsertPoint();
1567   return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1568                        InsertLocation, &ValueMap,
1569                        StartBlock->getSinglePredecessor());
1570 }
1571 
1572 /// The AST expression we generate to perform the run-time check assumes
1573 /// computations on integer types of infinite size. As we only use 64-bit
1574 /// arithmetic we check for overflows, in case of which we set the result
1575 /// of this run-time check to false to be conservatively correct,
1576 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1577   auto ExprBuilder = getExprBuilder();
1578 
1579   // In case the AST expression has integers larger than 64 bit, bail out. The
1580   // resulting LLVM-IR will contain operations on types that use more than 64
1581   // bits. These are -- in case wrapping intrinsics are used -- translated to
1582   // runtime library calls that are not available on all systems (e.g., Android)
1583   // and consequently will result in linker errors.
1584   if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) {
1585     isl_ast_expr_free(Condition);
1586     return Builder.getFalse();
1587   }
1588 
1589   ExprBuilder.setTrackOverflow(true);
1590   Value *RTC = ExprBuilder.create(Condition);
1591   if (!RTC->getType()->isIntegerTy(1))
1592     RTC = Builder.CreateIsNotNull(RTC);
1593   Value *OverflowHappened =
1594       Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1595 
1596   if (PollyGenerateRTCPrint) {
1597     auto *F = Builder.GetInsertBlock()->getParent();
1598     RuntimeDebugBuilder::createCPUPrinter(
1599         Builder,
1600         "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1601             "RTC: ",
1602         RTC, " Overflow: ", OverflowHappened,
1603         "\n"
1604         "  (0 failed, -1 succeeded)\n"
1605         "  (if one or both are 0 falling back to original code, if both are -1 "
1606         "executing Polly code)\n");
1607   }
1608 
1609   RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1610   ExprBuilder.setTrackOverflow(false);
1611 
1612   if (!isa<ConstantInt>(RTC))
1613     VersionedScops++;
1614 
1615   return RTC;
1616 }
1617