1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the IslNodeBuilder, a class to translate an isl AST into 11 // a LLVM-IR AST. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/CodeGen/BlockGenerators.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/IslAst.h" 19 #include "polly/CodeGen/IslExprBuilder.h" 20 #include "polly/CodeGen/LoopGenerators.h" 21 #include "polly/CodeGen/RuntimeDebugBuilder.h" 22 #include "polly/Config/config.h" 23 #include "polly/Options.h" 24 #include "polly/ScopInfo.h" 25 #include "polly/Support/GICHelper.h" 26 #include "polly/Support/ISLTools.h" 27 #include "polly/Support/SCEVValidator.h" 28 #include "polly/Support/ScopHelper.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/PostOrderIterator.h" 31 #include "llvm/ADT/SetVector.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/RegionInfo.h" 36 #include "llvm/Analysis/ScalarEvolution.h" 37 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Constant.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/DerivedTypes.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Instruction.h" 47 #include "llvm/IR/Instructions.h" 48 #include "llvm/IR/Type.h" 49 #include "llvm/IR/Value.h" 50 #include "llvm/Support/Casting.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 54 #include "isl/aff.h" 55 #include "isl/aff_type.h" 56 #include "isl/ast.h" 57 #include "isl/ast_build.h" 58 #include "isl/isl-noexceptions.h" 59 #include "isl/map.h" 60 #include "isl/set.h" 61 #include "isl/union_map.h" 62 #include "isl/union_set.h" 63 #include "isl/val.h" 64 #include <algorithm> 65 #include <cassert> 66 #include <cstdint> 67 #include <cstring> 68 #include <string> 69 #include <utility> 70 #include <vector> 71 72 using namespace llvm; 73 using namespace polly; 74 75 #define DEBUG_TYPE "polly-codegen" 76 77 STATISTIC(VersionedScops, "Number of SCoPs that required versioning."); 78 79 STATISTIC(SequentialLoops, "Number of generated sequential for-loops"); 80 STATISTIC(ParallelLoops, "Number of generated parallel for-loops"); 81 STATISTIC(VectorLoops, "Number of generated vector for-loops"); 82 STATISTIC(IfConditions, "Number of generated if-conditions"); 83 84 static cl::opt<bool> PollyGenerateRTCPrint( 85 "polly-codegen-emit-rtc-print", 86 cl::desc("Emit code that prints the runtime check result dynamically."), 87 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 88 89 // If this option is set we always use the isl AST generator to regenerate 90 // memory accesses. Without this option set we regenerate expressions using the 91 // original SCEV expressions and only generate new expressions in case the 92 // access relation has been changed and consequently must be regenerated. 93 static cl::opt<bool> PollyGenerateExpressions( 94 "polly-codegen-generate-expressions", 95 cl::desc("Generate AST expressions for unmodified and modified accesses"), 96 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 97 98 static cl::opt<int> PollyTargetFirstLevelCacheLineSize( 99 "polly-target-first-level-cache-line-size", 100 cl::desc("The size of the first level cache line size specified in bytes."), 101 cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); 102 103 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For, 104 ICmpInst::Predicate &Predicate) { 105 isl::ast_expr Cond = For.for_get_cond(); 106 isl::ast_expr Iterator = For.for_get_iterator(); 107 assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op && 108 "conditional expression is not an atomic upper bound"); 109 110 isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get()); 111 112 switch (OpType) { 113 case isl_ast_op_le: 114 Predicate = ICmpInst::ICMP_SLE; 115 break; 116 case isl_ast_op_lt: 117 Predicate = ICmpInst::ICMP_SLT; 118 break; 119 default: 120 llvm_unreachable("Unexpected comparison type in loop condition"); 121 } 122 123 isl::ast_expr Arg0 = Cond.get_op_arg(0); 124 125 assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id && 126 "conditional expression is not an atomic upper bound"); 127 128 isl::id UBID = Arg0.get_id(); 129 130 assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id && 131 "Could not get the iterator"); 132 133 isl::id IteratorID = Iterator.get_id(); 134 135 assert(UBID.get() == IteratorID.get() && 136 "conditional expression is not an atomic upper bound"); 137 138 return Cond.get_op_arg(1); 139 } 140 141 /// Return true if a return value of Predicate is true for the value represented 142 /// by passed isl_ast_expr_int. 143 static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr, 144 isl_bool (*Predicate)(__isl_keep isl_val *)) { 145 if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) { 146 isl_ast_expr_free(Expr); 147 return false; 148 } 149 auto ExprVal = isl_ast_expr_get_val(Expr); 150 isl_ast_expr_free(Expr); 151 if (Predicate(ExprVal) != isl_bool_true) { 152 isl_val_free(ExprVal); 153 return false; 154 } 155 isl_val_free(ExprVal); 156 return true; 157 } 158 159 int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) { 160 assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for); 161 isl::ast_node Body = For.for_get_body(); 162 163 // First, check if we can actually handle this code. 164 switch (isl_ast_node_get_type(Body.get())) { 165 case isl_ast_node_user: 166 break; 167 case isl_ast_node_block: { 168 isl::ast_node_list List = Body.block_get_children(); 169 for (isl::ast_node Node : List) { 170 isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get()); 171 if (NodeType != isl_ast_node_user) 172 return -1; 173 } 174 break; 175 } 176 default: 177 return -1; 178 } 179 180 isl::ast_expr Init = For.for_get_init(); 181 if (!checkIslAstExprInt(Init.release(), isl_val_is_zero)) 182 return -1; 183 isl::ast_expr Inc = For.for_get_inc(); 184 if (!checkIslAstExprInt(Inc.release(), isl_val_is_one)) 185 return -1; 186 CmpInst::Predicate Predicate; 187 isl::ast_expr UB = getUpperBound(For, Predicate); 188 if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int) 189 return -1; 190 isl::val UpVal = UB.get_val(); 191 int NumberIterations = UpVal.get_num_si(); 192 if (NumberIterations < 0) 193 return -1; 194 if (Predicate == CmpInst::ICMP_SLT) 195 return NumberIterations; 196 else 197 return NumberIterations + 1; 198 } 199 200 /// Extract the values and SCEVs needed to generate code for a block. 201 static int findReferencesInBlock(struct SubtreeReferences &References, 202 const ScopStmt *Stmt, BasicBlock *BB) { 203 for (Instruction &Inst : *BB) { 204 // Include invariant loads 205 if (isa<LoadInst>(Inst)) 206 if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst)) 207 References.Values.insert(InvariantLoad); 208 209 for (Value *SrcVal : Inst.operands()) { 210 auto *Scope = References.LI.getLoopFor(BB); 211 if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) { 212 References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope)); 213 continue; 214 } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) 215 References.Values.insert(NewVal); 216 } 217 } 218 return 0; 219 } 220 221 void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr, 222 bool CreateScalarRefs) { 223 auto &References = *static_cast<struct SubtreeReferences *>(UserPtr); 224 225 if (Stmt->isBlockStmt()) 226 findReferencesInBlock(References, Stmt, Stmt->getBasicBlock()); 227 else { 228 assert(Stmt->isRegionStmt() && 229 "Stmt was neither block nor region statement"); 230 for (BasicBlock *BB : Stmt->getRegion()->blocks()) 231 findReferencesInBlock(References, Stmt, BB); 232 } 233 234 for (auto &Access : *Stmt) { 235 if (References.ParamSpace) { 236 isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); 237 (*References.ParamSpace) = 238 References.ParamSpace->align_params(ParamSpace); 239 } 240 241 if (Access->isLatestArrayKind()) { 242 auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr(); 243 if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) 244 if (Stmt->getParent()->contains(OpInst)) 245 continue; 246 247 References.Values.insert(BasePtr); 248 continue; 249 } 250 251 if (CreateScalarRefs) 252 References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); 253 } 254 } 255 256 /// Extract the out-of-scop values and SCEVs referenced from a set describing 257 /// a ScopStmt. 258 /// 259 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 260 /// statement and the base pointers of the memory accesses. For scalar 261 /// statements we force the generation of alloca memory locations and list 262 /// these locations in the set of out-of-scop values as well. 263 /// 264 /// @param Set A set which references the ScopStmt we are interested in. 265 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences 266 /// structure. 267 static void addReferencesFromStmtSet(isl::set Set, 268 struct SubtreeReferences *UserPtr) { 269 isl::id Id = Set.get_tuple_id(); 270 auto *Stmt = static_cast<const ScopStmt *>(Id.get_user()); 271 return addReferencesFromStmt(Stmt, UserPtr); 272 } 273 274 /// Extract the out-of-scop values and SCEVs referenced from a union set 275 /// referencing multiple ScopStmts. 276 /// 277 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 278 /// statement and the base pointers of the memory accesses. For scalar 279 /// statements we force the generation of alloca memory locations and list 280 /// these locations in the set of out-of-scop values as well. 281 /// 282 /// @param USet A union set referencing the ScopStmts we are interested 283 /// in. 284 /// @param References The SubtreeReferences data structure through which 285 /// results are returned and further information is 286 /// provided. 287 static void 288 addReferencesFromStmtUnionSet(isl::union_set USet, 289 struct SubtreeReferences &References) { 290 291 for (isl::set Set : USet.get_set_list()) 292 addReferencesFromStmtSet(Set, &References); 293 } 294 295 __isl_give isl_union_map * 296 IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) { 297 return IslAstInfo::getSchedule(For); 298 } 299 300 void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, 301 SetVector<Value *> &Values, 302 SetVector<const Loop *> &Loops) { 303 SetVector<const SCEV *> SCEVs; 304 struct SubtreeReferences References = { 305 LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; 306 307 for (const auto &I : IDToValue) 308 Values.insert(I.second); 309 310 // NOTE: this is populated in IslNodeBuilder::addParameters 311 for (const auto &I : OutsideLoopIterations) 312 Values.insert(cast<SCEVUnknown>(I.second)->getValue()); 313 314 isl::union_set Schedule = 315 isl::manage(isl_union_map_domain(getScheduleForAstNode(For))); 316 addReferencesFromStmtUnionSet(Schedule, References); 317 318 for (const SCEV *Expr : SCEVs) { 319 findValues(Expr, SE, Values); 320 findLoops(Expr, Loops); 321 } 322 323 Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); 324 325 /// Note: Code generation of induction variables of loops outside Scops 326 /// 327 /// Remove loops that contain the scop or that are part of the scop, as they 328 /// are considered local. This leaves only loops that are before the scop, but 329 /// do not contain the scop itself. 330 /// We ignore loops perfectly contained in the Scop because these are already 331 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops 332 /// whose induction variables are referred to by the Scop, but the Scop is not 333 /// fully contained in these Loops. Since there can be many of these, 334 /// we choose to codegen these on-demand. 335 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable. 336 Loops.remove_if([this](const Loop *L) { 337 return S.contains(L) || L->contains(S.getEntry()); 338 }); 339 340 // Contains Values that may need to be replaced with other values 341 // due to replacements from the ValueMap. We should make sure 342 // that we return correctly remapped values. 343 // NOTE: this code path is tested by: 344 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll 345 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll 346 SetVector<Value *> ReplacedValues; 347 for (Value *V : Values) { 348 ReplacedValues.insert(getLatestValue(V)); 349 } 350 Values = ReplacedValues; 351 } 352 353 void IslNodeBuilder::updateValues(ValueMapT &NewValues) { 354 SmallPtrSet<Value *, 5> Inserted; 355 356 for (const auto &I : IDToValue) { 357 IDToValue[I.first] = NewValues[I.second]; 358 Inserted.insert(I.second); 359 } 360 361 for (const auto &I : NewValues) { 362 if (Inserted.count(I.first)) 363 continue; 364 365 ValueMap[I.first] = I.second; 366 } 367 } 368 369 Value *IslNodeBuilder::getLatestValue(Value *Original) const { 370 auto It = ValueMap.find(Original); 371 if (It == ValueMap.end()) 372 return Original; 373 return It->second; 374 } 375 376 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, 377 std::vector<Value *> &IVS, 378 __isl_take isl_id *IteratorID, 379 __isl_take isl_union_map *Schedule) { 380 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 381 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 382 isl_id *Id = isl_ast_expr_get_id(StmtExpr); 383 isl_ast_expr_free(StmtExpr); 384 ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); 385 std::vector<LoopToScevMapT> VLTS(IVS.size()); 386 387 isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release()); 388 Schedule = isl_union_map_intersect_domain(Schedule, Domain); 389 isl_map *S = isl_map_from_union_map(Schedule); 390 391 auto *NewAccesses = createNewAccesses(Stmt, User); 392 createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); 393 VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); 394 isl_id_to_ast_expr_free(NewAccesses); 395 isl_map_free(S); 396 isl_id_free(Id); 397 isl_ast_node_free(User); 398 } 399 400 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { 401 auto *Id = isl_ast_node_mark_get_id(Node); 402 auto Child = isl_ast_node_mark_get_node(Node); 403 isl_ast_node_free(Node); 404 // If a child node of a 'SIMD mark' is a loop that has a single iteration, 405 // it will be optimized away and we should skip it. 406 if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && 407 isl_ast_node_get_type(Child) == isl_ast_node_for) { 408 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 409 int VectorWidth = getNumberOfIterations(isl::manage_copy(Child)); 410 if (Vector && 1 < VectorWidth && VectorWidth <= 16) 411 createForVector(Child, VectorWidth); 412 else 413 createForSequential(isl::manage(Child), true); 414 isl_id_free(Id); 415 return; 416 } 417 if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) { 418 auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id)); 419 Annotator.addInterIterationAliasFreeBasePtr(BasePtr); 420 } 421 create(Child); 422 isl_id_free(Id); 423 } 424 425 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, 426 int VectorWidth) { 427 isl_ast_node *Body = isl_ast_node_for_get_body(For); 428 isl_ast_expr *Init = isl_ast_node_for_get_init(For); 429 isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); 430 isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); 431 isl_id *IteratorID = isl_ast_expr_get_id(Iterator); 432 433 Value *ValueLB = ExprBuilder.create(Init); 434 Value *ValueInc = ExprBuilder.create(Inc); 435 436 Type *MaxType = ExprBuilder.getType(Iterator); 437 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 438 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 439 440 if (MaxType != ValueLB->getType()) 441 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 442 if (MaxType != ValueInc->getType()) 443 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 444 445 std::vector<Value *> IVS(VectorWidth); 446 IVS[0] = ValueLB; 447 448 for (int i = 1; i < VectorWidth; i++) 449 IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); 450 451 isl_union_map *Schedule = getScheduleForAstNode(For); 452 assert(Schedule && "For statement annotation does not contain its schedule"); 453 454 IDToValue[IteratorID] = ValueLB; 455 456 switch (isl_ast_node_get_type(Body)) { 457 case isl_ast_node_user: 458 createUserVector(Body, IVS, isl_id_copy(IteratorID), 459 isl_union_map_copy(Schedule)); 460 break; 461 case isl_ast_node_block: { 462 isl_ast_node_list *List = isl_ast_node_block_get_children(Body); 463 464 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 465 createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, 466 isl_id_copy(IteratorID), isl_union_map_copy(Schedule)); 467 468 isl_ast_node_free(Body); 469 isl_ast_node_list_free(List); 470 break; 471 } 472 default: 473 isl_ast_node_dump(Body); 474 llvm_unreachable("Unhandled isl_ast_node in vectorizer"); 475 } 476 477 IDToValue.erase(IDToValue.find(IteratorID)); 478 isl_id_free(IteratorID); 479 isl_union_map_free(Schedule); 480 481 isl_ast_node_free(For); 482 isl_ast_expr_free(Iterator); 483 484 VectorLoops++; 485 } 486 487 /// Restore the initial ordering of dimensions of the band node 488 /// 489 /// In case the band node represents all the dimensions of the iteration 490 /// domain, recreate the band node to restore the initial ordering of the 491 /// dimensions. 492 /// 493 /// @param Node The band node to be modified. 494 /// @return The modified schedule node. 495 static bool IsLoopVectorizerDisabled(isl::ast_node Node) { 496 assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for); 497 auto Body = Node.for_get_body(); 498 if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark) 499 return false; 500 auto Id = Body.mark_get_id(); 501 if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0) 502 return true; 503 return false; 504 } 505 506 void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) { 507 Value *ValueLB, *ValueUB, *ValueInc; 508 Type *MaxType; 509 BasicBlock *ExitBlock; 510 Value *IV; 511 CmpInst::Predicate Predicate; 512 513 bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For); 514 515 isl::ast_node Body = For.for_get_body(); 516 517 // isl_ast_node_for_is_degenerate(For) 518 // 519 // TODO: For degenerated loops we could generate a plain assignment. 520 // However, for now we just reuse the logic for normal loops, which will 521 // create a loop with a single iteration. 522 523 isl::ast_expr Init = For.for_get_init(); 524 isl::ast_expr Inc = For.for_get_inc(); 525 isl::ast_expr Iterator = For.for_get_iterator(); 526 isl::id IteratorID = Iterator.get_id(); 527 isl::ast_expr UB = getUpperBound(For, Predicate); 528 529 ValueLB = ExprBuilder.create(Init.release()); 530 ValueUB = ExprBuilder.create(UB.release()); 531 ValueInc = ExprBuilder.create(Inc.release()); 532 533 MaxType = ExprBuilder.getType(Iterator.get()); 534 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 535 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 536 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 537 538 if (MaxType != ValueLB->getType()) 539 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 540 if (MaxType != ValueUB->getType()) 541 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 542 if (MaxType != ValueInc->getType()) 543 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 544 545 // If we can show that LB <Predicate> UB holds at least once, we can 546 // omit the GuardBB in front of the loop. 547 bool UseGuardBB = 548 !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); 549 IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, 550 Predicate, &Annotator, MarkParallel, UseGuardBB, 551 LoopVectorizerDisabled); 552 IDToValue[IteratorID.get()] = IV; 553 554 create(Body.release()); 555 556 Annotator.popLoop(MarkParallel); 557 558 IDToValue.erase(IDToValue.find(IteratorID.get())); 559 560 Builder.SetInsertPoint(&ExitBlock->front()); 561 562 SequentialLoops++; 563 } 564 565 /// Remove the BBs contained in a (sub)function from the dominator tree. 566 /// 567 /// This function removes the basic blocks that are part of a subfunction from 568 /// the dominator tree. Specifically, when generating code it may happen that at 569 /// some point the code generation continues in a new sub-function (e.g., when 570 /// generating OpenMP code). The basic blocks that are created in this 571 /// sub-function are then still part of the dominator tree of the original 572 /// function, such that the dominator tree reaches over function boundaries. 573 /// This is not only incorrect, but also causes crashes. This function now 574 /// removes from the dominator tree all basic blocks that are dominated (and 575 /// consequently reachable) from the entry block of this (sub)function. 576 /// 577 /// FIXME: A LLVM (function or region) pass should not touch anything outside of 578 /// the function/region it runs on. Hence, the pure need for this function shows 579 /// that we do not comply to this rule. At the moment, this does not cause any 580 /// issues, but we should be aware that such issues may appear. Unfortunately 581 /// the current LLVM pass infrastructure does not allow to make Polly a module 582 /// or call-graph pass to solve this issue, as such a pass would not have access 583 /// to the per-function analyses passes needed by Polly. A future pass manager 584 /// infrastructure is supposed to enable such kind of access possibly allowing 585 /// us to create a cleaner solution here. 586 /// 587 /// FIXME: Instead of adding the dominance information and then dropping it 588 /// later on, we should try to just not add it in the first place. This requires 589 /// some careful testing to make sure this does not break in interaction with 590 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or 591 /// which may try to update it. 592 /// 593 /// @param F The function which contains the BBs to removed. 594 /// @param DT The dominator tree from which to remove the BBs. 595 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { 596 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 597 std::vector<BasicBlock *> Nodes; 598 599 // We can only remove an element from the dominator tree, if all its children 600 // have been removed. To ensure this we obtain the list of nodes to remove 601 // using a post-order tree traversal. 602 for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) 603 Nodes.push_back(I->getBlock()); 604 605 for (BasicBlock *BB : Nodes) 606 DT.eraseNode(BB); 607 } 608 609 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { 610 isl_ast_node *Body; 611 isl_ast_expr *Init, *Inc, *Iterator, *UB; 612 isl_id *IteratorID; 613 Value *ValueLB, *ValueUB, *ValueInc; 614 Type *MaxType; 615 Value *IV; 616 CmpInst::Predicate Predicate; 617 618 // The preamble of parallel code interacts different than normal code with 619 // e.g., scalar initialization. Therefore, we ensure the parallel code is 620 // separated from the last basic block. 621 BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), 622 &*Builder.GetInsertPoint(), &DT, &LI); 623 ParBB->setName("polly.parallel.for"); 624 Builder.SetInsertPoint(&ParBB->front()); 625 626 Body = isl_ast_node_for_get_body(For); 627 Init = isl_ast_node_for_get_init(For); 628 Inc = isl_ast_node_for_get_inc(For); 629 Iterator = isl_ast_node_for_get_iterator(For); 630 IteratorID = isl_ast_expr_get_id(Iterator); 631 UB = getUpperBound(isl::manage_copy(For), Predicate).release(); 632 633 ValueLB = ExprBuilder.create(Init); 634 ValueUB = ExprBuilder.create(UB); 635 ValueInc = ExprBuilder.create(Inc); 636 637 // OpenMP always uses SLE. In case the isl generated AST uses a SLT 638 // expression, we need to adjust the loop bound by one. 639 if (Predicate == CmpInst::ICMP_SLT) 640 ValueUB = Builder.CreateAdd( 641 ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); 642 643 MaxType = ExprBuilder.getType(Iterator); 644 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 645 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 646 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 647 648 if (MaxType != ValueLB->getType()) 649 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 650 if (MaxType != ValueUB->getType()) 651 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 652 if (MaxType != ValueInc->getType()) 653 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 654 655 BasicBlock::iterator LoopBody; 656 657 SetVector<Value *> SubtreeValues; 658 SetVector<const Loop *> Loops; 659 660 getReferencesInSubtree(For, SubtreeValues, Loops); 661 662 // Create for all loops we depend on values that contain the current loop 663 // iteration. These values are necessary to generate code for SCEVs that 664 // depend on such loops. As a result we need to pass them to the subfunction. 665 // See [Code generation of induction variables of loops outside Scops] 666 for (const Loop *L : Loops) { 667 Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L); 668 SubtreeValues.insert(LoopInductionVar); 669 } 670 671 ValueMapT NewValues; 672 ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL); 673 674 IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, 675 SubtreeValues, NewValues, &LoopBody); 676 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 677 Builder.SetInsertPoint(&*LoopBody); 678 679 // Remember the parallel subfunction 680 ParallelSubfunctions.push_back(LoopBody->getFunction()); 681 682 // Save the current values. 683 auto ValueMapCopy = ValueMap; 684 IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; 685 686 updateValues(NewValues); 687 IDToValue[IteratorID] = IV; 688 689 ValueMapT NewValuesReverse; 690 691 for (auto P : NewValues) 692 NewValuesReverse[P.second] = P.first; 693 694 Annotator.addAlternativeAliasBases(NewValuesReverse); 695 696 create(Body); 697 698 Annotator.resetAlternativeAliasBases(); 699 // Restore the original values. 700 ValueMap = ValueMapCopy; 701 IDToValue = IDToValueCopy; 702 703 Builder.SetInsertPoint(&*AfterLoop); 704 removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); 705 706 for (const Loop *L : Loops) 707 OutsideLoopIterations.erase(L); 708 709 isl_ast_node_free(For); 710 isl_ast_expr_free(Iterator); 711 isl_id_free(IteratorID); 712 713 ParallelLoops++; 714 } 715 716 /// Return whether any of @p Node's statements contain partial accesses. 717 /// 718 /// Partial accesses are not supported by Polly's vector code generator. 719 static bool hasPartialAccesses(__isl_take isl_ast_node *Node) { 720 return isl_ast_node_foreach_descendant_top_down( 721 Node, 722 [](isl_ast_node *Node, void *User) -> isl_bool { 723 if (isl_ast_node_get_type(Node) != isl_ast_node_user) 724 return isl_bool_true; 725 726 isl::ast_expr Expr = 727 isl::manage(isl_ast_node_user_get_expr(Node)); 728 isl::ast_expr StmtExpr = Expr.get_op_arg(0); 729 isl::id Id = StmtExpr.get_id(); 730 731 ScopStmt *Stmt = 732 static_cast<ScopStmt *>(isl_id_get_user(Id.get())); 733 isl::set StmtDom = Stmt->getDomain(); 734 for (auto *MA : *Stmt) { 735 if (MA->isLatestPartialAccess()) 736 return isl_bool_error; 737 } 738 return isl_bool_true; 739 }, 740 nullptr) == isl_stat_error; 741 } 742 743 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { 744 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 745 746 if (Vector && IslAstInfo::isInnermostParallel(For) && 747 !IslAstInfo::isReductionParallel(For)) { 748 int VectorWidth = getNumberOfIterations(isl::manage_copy(For)); 749 if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) { 750 createForVector(For, VectorWidth); 751 return; 752 } 753 } 754 755 if (IslAstInfo::isExecutedInParallel(For)) { 756 createForParallel(For); 757 return; 758 } 759 bool Parallel = 760 (IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For)); 761 createForSequential(isl::manage(For), Parallel); 762 } 763 764 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { 765 isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); 766 767 Function *F = Builder.GetInsertBlock()->getParent(); 768 LLVMContext &Context = F->getContext(); 769 770 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 771 &*Builder.GetInsertPoint(), &DT, &LI); 772 CondBB->setName("polly.cond"); 773 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 774 MergeBB->setName("polly.merge"); 775 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 776 BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); 777 778 DT.addNewBlock(ThenBB, CondBB); 779 DT.addNewBlock(ElseBB, CondBB); 780 DT.changeImmediateDominator(MergeBB, CondBB); 781 782 Loop *L = LI.getLoopFor(CondBB); 783 if (L) { 784 L->addBasicBlockToLoop(ThenBB, LI); 785 L->addBasicBlockToLoop(ElseBB, LI); 786 } 787 788 CondBB->getTerminator()->eraseFromParent(); 789 790 Builder.SetInsertPoint(CondBB); 791 Value *Predicate = ExprBuilder.create(Cond); 792 Builder.CreateCondBr(Predicate, ThenBB, ElseBB); 793 Builder.SetInsertPoint(ThenBB); 794 Builder.CreateBr(MergeBB); 795 Builder.SetInsertPoint(ElseBB); 796 Builder.CreateBr(MergeBB); 797 Builder.SetInsertPoint(&ThenBB->front()); 798 799 create(isl_ast_node_if_get_then(If)); 800 801 Builder.SetInsertPoint(&ElseBB->front()); 802 803 if (isl_ast_node_if_has_else(If)) 804 create(isl_ast_node_if_get_else(If)); 805 806 Builder.SetInsertPoint(&MergeBB->front()); 807 808 isl_ast_node_free(If); 809 810 IfConditions++; 811 } 812 813 __isl_give isl_id_to_ast_expr * 814 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, 815 __isl_keep isl_ast_node *Node) { 816 isl_id_to_ast_expr *NewAccesses = 817 isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0); 818 819 auto *Build = IslAstInfo::getBuild(Node); 820 assert(Build && "Could not obtain isl_ast_build from user node"); 821 Stmt->setAstBuild(isl::manage_copy(Build)); 822 823 for (auto *MA : *Stmt) { 824 if (!MA->hasNewAccessRelation()) { 825 if (PollyGenerateExpressions) { 826 if (!MA->isAffine()) 827 continue; 828 if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) 829 continue; 830 831 auto *BasePtr = 832 dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); 833 if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) 834 continue; 835 } else { 836 continue; 837 } 838 } 839 assert(MA->isAffine() && 840 "Only affine memory accesses can be code generated"); 841 842 auto Schedule = isl_ast_build_get_schedule(Build); 843 844 #ifndef NDEBUG 845 if (MA->isRead()) { 846 auto Dom = Stmt->getDomain().release(); 847 auto SchedDom = isl_set_from_union_set( 848 isl_union_map_domain(isl_union_map_copy(Schedule))); 849 auto AccDom = isl_map_domain(MA->getAccessRelation().release()); 850 Dom = isl_set_intersect_params(Dom, 851 Stmt->getParent()->getContext().release()); 852 SchedDom = isl_set_intersect_params( 853 SchedDom, Stmt->getParent()->getContext().release()); 854 assert(isl_set_is_subset(SchedDom, AccDom) && 855 "Access relation not defined on full schedule domain"); 856 assert(isl_set_is_subset(Dom, AccDom) && 857 "Access relation not defined on full domain"); 858 isl_set_free(AccDom); 859 isl_set_free(SchedDom); 860 isl_set_free(Dom); 861 } 862 #endif 863 864 auto PWAccRel = 865 MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release(); 866 867 // isl cannot generate an index expression for access-nothing accesses. 868 isl::set AccDomain = 869 isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel))); 870 isl::set Context = S.getContext(); 871 AccDomain = AccDomain.intersect_params(Context); 872 if (AccDomain.is_empty()) { 873 isl_pw_multi_aff_free(PWAccRel); 874 continue; 875 } 876 877 auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 878 NewAccesses = 879 isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr); 880 } 881 882 return NewAccesses; 883 } 884 885 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, 886 ScopStmt *Stmt, LoopToScevMapT <S) { 887 assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op && 888 "Expression of type 'op' expected"); 889 assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call && 890 "Operation of type 'call' expected"); 891 for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { 892 isl_ast_expr *SubExpr; 893 Value *V; 894 895 SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); 896 V = ExprBuilder.create(SubExpr); 897 ScalarEvolution *SE = Stmt->getParent()->getSE(); 898 LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); 899 } 900 901 isl_ast_expr_free(Expr); 902 } 903 904 void IslNodeBuilder::createSubstitutionsVector( 905 __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, 906 std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, 907 __isl_take isl_id *IteratorID) { 908 int i = 0; 909 910 Value *OldValue = IDToValue[IteratorID]; 911 for (Value *IV : IVS) { 912 IDToValue[IteratorID] = IV; 913 createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); 914 i++; 915 } 916 917 IDToValue[IteratorID] = OldValue; 918 isl_id_free(IteratorID); 919 isl_ast_expr_free(Expr); 920 } 921 922 void IslNodeBuilder::generateCopyStmt( 923 ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 924 assert(Stmt->size() == 2); 925 auto ReadAccess = Stmt->begin(); 926 auto WriteAccess = ReadAccess++; 927 assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite()); 928 assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() && 929 "Accesses use the same data type"); 930 assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind()); 931 auto *AccessExpr = 932 isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release()); 933 auto *LoadValue = ExprBuilder.create(AccessExpr); 934 AccessExpr = 935 isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release()); 936 auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr); 937 Builder.CreateStore(LoadValue, StoreAddr); 938 } 939 940 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) { 941 assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() && 942 "trying to materialize loop induction variable twice"); 943 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 944 SE.getUnknown(Builder.getInt64(1)), L, 945 SCEV::FlagAnyWrap); 946 Value *V = generateSCEV(OuterLIV); 947 OutsideLoopIterations[L] = SE.getUnknown(V); 948 return V; 949 } 950 951 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { 952 LoopToScevMapT LTS; 953 isl_id *Id; 954 ScopStmt *Stmt; 955 956 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 957 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 958 Id = isl_ast_expr_get_id(StmtExpr); 959 isl_ast_expr_free(StmtExpr); 960 961 LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 962 963 Stmt = (ScopStmt *)isl_id_get_user(Id); 964 auto *NewAccesses = createNewAccesses(Stmt, User); 965 if (Stmt->isCopyStmt()) { 966 generateCopyStmt(Stmt, NewAccesses); 967 isl_ast_expr_free(Expr); 968 } else { 969 createSubstitutions(Expr, Stmt, LTS); 970 971 if (Stmt->isBlockStmt()) 972 BlockGen.copyStmt(*Stmt, LTS, NewAccesses); 973 else 974 RegionGen.copyStmt(*Stmt, LTS, NewAccesses); 975 } 976 977 isl_id_to_ast_expr_free(NewAccesses); 978 isl_ast_node_free(User); 979 isl_id_free(Id); 980 } 981 982 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { 983 isl_ast_node_list *List = isl_ast_node_block_get_children(Block); 984 985 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 986 create(isl_ast_node_list_get_ast_node(List, i)); 987 988 isl_ast_node_free(Block); 989 isl_ast_node_list_free(List); 990 } 991 992 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { 993 switch (isl_ast_node_get_type(Node)) { 994 case isl_ast_node_error: 995 llvm_unreachable("code generation error"); 996 case isl_ast_node_mark: 997 createMark(Node); 998 return; 999 case isl_ast_node_for: 1000 createFor(Node); 1001 return; 1002 case isl_ast_node_if: 1003 createIf(Node); 1004 return; 1005 case isl_ast_node_user: 1006 createUser(Node); 1007 return; 1008 case isl_ast_node_block: 1009 createBlock(Node); 1010 return; 1011 } 1012 1013 llvm_unreachable("Unknown isl_ast_node type"); 1014 } 1015 1016 bool IslNodeBuilder::materializeValue(isl_id *Id) { 1017 // If the Id is already mapped, skip it. 1018 if (!IDToValue.count(Id)) { 1019 auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); 1020 Value *V = nullptr; 1021 1022 // Parameters could refer to invariant loads that need to be 1023 // preloaded before we can generate code for the parameter. Thus, 1024 // check if any value referred to in ParamSCEV is an invariant load 1025 // and if so make sure its equivalence class is preloaded. 1026 SetVector<Value *> Values; 1027 findValues(ParamSCEV, SE, Values); 1028 for (auto *Val : Values) { 1029 // Check if the value is an instruction in a dead block within the SCoP 1030 // and if so do not code generate it. 1031 if (auto *Inst = dyn_cast<Instruction>(Val)) { 1032 if (S.contains(Inst)) { 1033 bool IsDead = true; 1034 1035 // Check for "undef" loads first, then if there is a statement for 1036 // the parent of Inst and lastly if the parent of Inst has an empty 1037 // domain. In the first and last case the instruction is dead but if 1038 // there is a statement or the domain is not empty Inst is not dead. 1039 auto MemInst = MemAccInst::dyn_cast(Inst); 1040 auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; 1041 if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == 1042 SE.getPointerBase(SE.getSCEV(Address))) { 1043 } else if (S.getStmtFor(Inst)) { 1044 IsDead = false; 1045 } else { 1046 auto *Domain = S.getDomainConditions(Inst->getParent()).release(); 1047 IsDead = isl_set_is_empty(Domain); 1048 isl_set_free(Domain); 1049 } 1050 1051 if (IsDead) { 1052 V = UndefValue::get(ParamSCEV->getType()); 1053 break; 1054 } 1055 } 1056 } 1057 1058 if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { 1059 // Check if this invariant access class is empty, hence if we never 1060 // actually added a loads instruction to it. In that case it has no 1061 // (meaningful) users and we should not try to code generate it. 1062 if (IAClass->InvariantAccesses.empty()) 1063 V = UndefValue::get(ParamSCEV->getType()); 1064 1065 if (!preloadInvariantEquivClass(*IAClass)) { 1066 isl_id_free(Id); 1067 return false; 1068 } 1069 } 1070 } 1071 1072 V = V ? V : generateSCEV(ParamSCEV); 1073 IDToValue[Id] = V; 1074 } 1075 1076 isl_id_free(Id); 1077 return true; 1078 } 1079 1080 bool IslNodeBuilder::materializeParameters(isl_set *Set) { 1081 for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { 1082 if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) 1083 continue; 1084 isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); 1085 if (!materializeValue(Id)) 1086 return false; 1087 } 1088 return true; 1089 } 1090 1091 bool IslNodeBuilder::materializeParameters() { 1092 for (const SCEV *Param : S.parameters()) { 1093 isl_id *Id = S.getIdForParam(Param).release(); 1094 if (!materializeValue(Id)) 1095 return false; 1096 } 1097 return true; 1098 } 1099 1100 /// Generate the computation of the size of the outermost dimension from the 1101 /// Fortran array descriptor (in this case, `@g_arr`). The final `%size` 1102 /// contains the size of the array. 1103 /// 1104 /// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] } 1105 /// %desc.dimensionty = type { i64, i64, i64 } 1106 /// @g_arr = global %arrty zeroinitializer, align 32 1107 /// ... 1108 /// %0 = load i64, i64* getelementptr inbounds 1109 /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2) 1110 /// %1 = load i64, i64* getelementptr inbounds 1111 /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1) 1112 /// %2 = sub nsw i64 %0, %1 1113 /// %size = add nsw i64 %2, 1 1114 static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor, 1115 PollyIRBuilder &Builder, 1116 std::string ArrayName) { 1117 assert(GlobalDescriptor && "invalid global descriptor given"); 1118 1119 Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), 1120 Builder.getInt64(0), Builder.getInt32(2)}; 1121 Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx, 1122 ArrayName + "_end_ptr"); 1123 Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end"); 1124 1125 Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), 1126 Builder.getInt64(0), Builder.getInt32(1)}; 1127 Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx, 1128 ArrayName + "_begin_ptr"); 1129 Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin"); 1130 1131 Value *size = 1132 Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta"); 1133 Type *endType = dyn_cast<IntegerType>(end->getType()); 1134 assert(endType && "expected type of end to be integral"); 1135 1136 size = Builder.CreateNSWAdd(end, 1137 ConstantInt::get(endType, 1, /* signed = */ true), 1138 ArrayName + "_size"); 1139 1140 return size; 1141 } 1142 1143 bool IslNodeBuilder::materializeFortranArrayOutermostDimension() { 1144 for (ScopArrayInfo *Array : S.arrays()) { 1145 if (Array->getNumberOfDimensions() == 0) 1146 continue; 1147 1148 Value *FAD = Array->getFortranArrayDescriptor(); 1149 if (!FAD) 1150 continue; 1151 1152 isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release(); 1153 assert(ParametricPwAff && "parametric pw_aff corresponding " 1154 "to outermost dimension does not " 1155 "exist"); 1156 1157 isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); 1158 isl_pw_aff_free(ParametricPwAff); 1159 1160 assert(Id && "pw_aff is not parametric"); 1161 1162 if (IDToValue.count(Id)) { 1163 isl_id_free(Id); 1164 continue; 1165 } 1166 1167 Value *FinalValue = 1168 buildFADOutermostDimensionLoad(FAD, Builder, Array->getName()); 1169 assert(FinalValue && "unable to build Fortran array " 1170 "descriptor load of outermost dimension"); 1171 IDToValue[Id] = FinalValue; 1172 isl_id_free(Id); 1173 } 1174 return true; 1175 } 1176 1177 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, 1178 isl_ast_build *Build, 1179 Instruction *AccInst) { 1180 isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); 1181 isl_ast_expr *Access = 1182 isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 1183 auto *Address = isl_ast_expr_address_of(Access); 1184 auto *AddressValue = ExprBuilder.create(Address); 1185 Value *PreloadVal; 1186 1187 // Correct the type as the SAI might have a different type than the user 1188 // expects, especially if the base pointer is a struct. 1189 Type *Ty = AccInst->getType(); 1190 1191 auto *Ptr = AddressValue; 1192 auto Name = Ptr->getName(); 1193 auto AS = Ptr->getType()->getPointerAddressSpace(); 1194 Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast"); 1195 PreloadVal = Builder.CreateLoad(Ptr, Name + ".load"); 1196 if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) 1197 PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment()); 1198 1199 // TODO: This is only a hot fix for SCoP sequences that use the same load 1200 // instruction contained and hoisted by one of the SCoPs. 1201 if (SE.isSCEVable(Ty)) 1202 SE.forgetValue(AccInst); 1203 1204 return PreloadVal; 1205 } 1206 1207 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, 1208 isl_set *Domain) { 1209 isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release()); 1210 AccessRange = isl_set_gist_params(AccessRange, S.getContext().release()); 1211 1212 if (!materializeParameters(AccessRange)) { 1213 isl_set_free(AccessRange); 1214 isl_set_free(Domain); 1215 return nullptr; 1216 } 1217 1218 auto *Build = 1219 isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release())); 1220 isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); 1221 bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); 1222 isl_set_free(Universe); 1223 1224 Instruction *AccInst = MA.getAccessInstruction(); 1225 Type *AccInstTy = AccInst->getType(); 1226 1227 Value *PreloadVal = nullptr; 1228 if (AlwaysExecuted) { 1229 PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); 1230 isl_ast_build_free(Build); 1231 isl_set_free(Domain); 1232 return PreloadVal; 1233 } 1234 1235 if (!materializeParameters(Domain)) { 1236 isl_ast_build_free(Build); 1237 isl_set_free(AccessRange); 1238 isl_set_free(Domain); 1239 return nullptr; 1240 } 1241 1242 isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); 1243 Domain = nullptr; 1244 1245 ExprBuilder.setTrackOverflow(true); 1246 Value *Cond = ExprBuilder.create(DomainCond); 1247 Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), 1248 "polly.preload.cond.overflown"); 1249 Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); 1250 ExprBuilder.setTrackOverflow(false); 1251 1252 if (!Cond->getType()->isIntegerTy(1)) 1253 Cond = Builder.CreateIsNotNull(Cond); 1254 1255 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1256 &*Builder.GetInsertPoint(), &DT, &LI); 1257 CondBB->setName("polly.preload.cond"); 1258 1259 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 1260 MergeBB->setName("polly.preload.merge"); 1261 1262 Function *F = Builder.GetInsertBlock()->getParent(); 1263 LLVMContext &Context = F->getContext(); 1264 BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); 1265 1266 DT.addNewBlock(ExecBB, CondBB); 1267 if (Loop *L = LI.getLoopFor(CondBB)) 1268 L->addBasicBlockToLoop(ExecBB, LI); 1269 1270 auto *CondBBTerminator = CondBB->getTerminator(); 1271 Builder.SetInsertPoint(CondBBTerminator); 1272 Builder.CreateCondBr(Cond, ExecBB, MergeBB); 1273 CondBBTerminator->eraseFromParent(); 1274 1275 Builder.SetInsertPoint(ExecBB); 1276 Builder.CreateBr(MergeBB); 1277 1278 Builder.SetInsertPoint(ExecBB->getTerminator()); 1279 Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); 1280 Builder.SetInsertPoint(MergeBB->getTerminator()); 1281 auto *MergePHI = Builder.CreatePHI( 1282 AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); 1283 PreloadVal = MergePHI; 1284 1285 if (!PreAccInst) { 1286 PreloadVal = nullptr; 1287 PreAccInst = UndefValue::get(AccInstTy); 1288 } 1289 1290 MergePHI->addIncoming(PreAccInst, ExecBB); 1291 MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); 1292 1293 isl_ast_build_free(Build); 1294 return PreloadVal; 1295 } 1296 1297 bool IslNodeBuilder::preloadInvariantEquivClass( 1298 InvariantEquivClassTy &IAClass) { 1299 // For an equivalence class of invariant loads we pre-load the representing 1300 // element with the unified execution context. However, we have to map all 1301 // elements of the class to the one preloaded load as they are referenced 1302 // during the code generation and therefor need to be mapped. 1303 const MemoryAccessList &MAs = IAClass.InvariantAccesses; 1304 if (MAs.empty()) 1305 return true; 1306 1307 MemoryAccess *MA = MAs.front(); 1308 assert(MA->isArrayKind() && MA->isRead()); 1309 1310 // If the access function was already mapped, the preload of this equivalence 1311 // class was triggered earlier already and doesn't need to be done again. 1312 if (ValueMap.count(MA->getAccessInstruction())) 1313 return true; 1314 1315 // Check for recursion which can be caused by additional constraints, e.g., 1316 // non-finite loop constraints. In such a case we have to bail out and insert 1317 // a "false" runtime check that will cause the original code to be executed. 1318 auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); 1319 if (!PreloadedPtrs.insert(PtrId).second) 1320 return false; 1321 1322 // The execution context of the IAClass. 1323 isl::set &ExecutionCtx = IAClass.ExecutionContext; 1324 1325 // If the base pointer of this class is dependent on another one we have to 1326 // make sure it was preloaded already. 1327 auto *SAI = MA->getScopArrayInfo(); 1328 if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { 1329 if (!preloadInvariantEquivClass(*BaseIAClass)) 1330 return false; 1331 1332 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and 1333 // we need to refine the ExecutionCtx. 1334 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1335 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1336 } 1337 1338 // If the size of a dimension is dependent on another class, make sure it is 1339 // preloaded. 1340 for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { 1341 const SCEV *Dim = SAI->getDimensionSize(i); 1342 SetVector<Value *> Values; 1343 findValues(Dim, SE, Values); 1344 for (auto *Val : Values) { 1345 if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { 1346 if (!preloadInvariantEquivClass(*BaseIAClass)) 1347 return false; 1348 1349 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx 1350 // and we need to refine the ExecutionCtx. 1351 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1352 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1353 } 1354 } 1355 } 1356 1357 Instruction *AccInst = MA->getAccessInstruction(); 1358 Type *AccInstTy = AccInst->getType(); 1359 1360 Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy()); 1361 if (!PreloadVal) 1362 return false; 1363 1364 for (const MemoryAccess *MA : MAs) { 1365 Instruction *MAAccInst = MA->getAccessInstruction(); 1366 assert(PreloadVal->getType() == MAAccInst->getType()); 1367 ValueMap[MAAccInst] = PreloadVal; 1368 } 1369 1370 if (SE.isSCEVable(AccInstTy)) { 1371 isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release(); 1372 if (ParamId) 1373 IDToValue[ParamId] = PreloadVal; 1374 isl_id_free(ParamId); 1375 } 1376 1377 BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 1378 auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), 1379 AccInst->getName() + ".preload.s2a"); 1380 Alloca->insertBefore(&*EntryBB->getFirstInsertionPt()); 1381 Builder.CreateStore(PreloadVal, Alloca); 1382 ValueMapT PreloadedPointer; 1383 PreloadedPointer[PreloadVal] = AccInst; 1384 Annotator.addAlternativeAliasBases(PreloadedPointer); 1385 1386 for (auto *DerivedSAI : SAI->getDerivedSAIs()) { 1387 Value *BasePtr = DerivedSAI->getBasePtr(); 1388 1389 for (const MemoryAccess *MA : MAs) { 1390 // As the derived SAI information is quite coarse, any load from the 1391 // current SAI could be the base pointer of the derived SAI, however we 1392 // should only change the base pointer of the derived SAI if we actually 1393 // preloaded it. 1394 if (BasePtr == MA->getOriginalBaseAddr()) { 1395 assert(BasePtr->getType() == PreloadVal->getType()); 1396 DerivedSAI->setBasePtr(PreloadVal); 1397 } 1398 1399 // For scalar derived SAIs we remap the alloca used for the derived value. 1400 if (BasePtr == MA->getAccessInstruction()) 1401 ScalarMap[DerivedSAI] = Alloca; 1402 } 1403 } 1404 1405 for (const MemoryAccess *MA : MAs) { 1406 Instruction *MAAccInst = MA->getAccessInstruction(); 1407 // Use the escape system to get the correct value to users outside the SCoP. 1408 BlockGenerator::EscapeUserVectorTy EscapeUsers; 1409 for (auto *U : MAAccInst->users()) 1410 if (Instruction *UI = dyn_cast<Instruction>(U)) 1411 if (!S.contains(UI)) 1412 EscapeUsers.push_back(UI); 1413 1414 if (EscapeUsers.empty()) 1415 continue; 1416 1417 EscapeMap[MA->getAccessInstruction()] = 1418 std::make_pair(Alloca, std::move(EscapeUsers)); 1419 } 1420 1421 return true; 1422 } 1423 1424 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { 1425 for (auto &SAI : S.arrays()) { 1426 if (SAI->getBasePtr()) 1427 continue; 1428 1429 assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) && 1430 "The size of the outermost dimension is used to declare newly " 1431 "created arrays that require memory allocation."); 1432 1433 Type *NewArrayType = nullptr; 1434 1435 // Get the size of the array = size(dim_1)*...*size(dim_n) 1436 uint64_t ArraySizeInt = 1; 1437 for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { 1438 auto *DimSize = SAI->getDimensionSize(i); 1439 unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) 1440 ->getAPInt() 1441 .getLimitedValue(); 1442 1443 if (!NewArrayType) 1444 NewArrayType = SAI->getElementType(); 1445 1446 NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); 1447 ArraySizeInt *= UnsignedDimSize; 1448 } 1449 1450 if (SAI->isOnHeap()) { 1451 LLVMContext &Ctx = NewArrayType->getContext(); 1452 1453 // Get the IntPtrTy from the Datalayout 1454 auto IntPtrTy = DL.getIntPtrType(Ctx); 1455 1456 // Get the size of the element type in bits 1457 unsigned Size = SAI->getElemSizeInBytes(); 1458 1459 // Insert the malloc call at polly.start 1460 auto InstIt = std::get<0>(StartExitBlocks)->getTerminator(); 1461 auto *CreatedArray = CallInst::CreateMalloc( 1462 &*InstIt, IntPtrTy, SAI->getElementType(), 1463 ConstantInt::get(Type::getInt64Ty(Ctx), Size), 1464 ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, 1465 SAI->getName()); 1466 1467 SAI->setBasePtr(CreatedArray); 1468 1469 // Insert the free call at polly.exiting 1470 CallInst::CreateFree(CreatedArray, 1471 std::get<1>(StartExitBlocks)->getTerminator()); 1472 } else { 1473 auto InstIt = Builder.GetInsertBlock() 1474 ->getParent() 1475 ->getEntryBlock() 1476 .getTerminator(); 1477 1478 auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), 1479 SAI->getName(), &*InstIt); 1480 CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); 1481 SAI->setBasePtr(CreatedArray); 1482 } 1483 } 1484 } 1485 1486 bool IslNodeBuilder::preloadInvariantLoads() { 1487 auto &InvariantEquivClasses = S.getInvariantAccesses(); 1488 if (InvariantEquivClasses.empty()) 1489 return true; 1490 1491 BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), 1492 &*Builder.GetInsertPoint(), &DT, &LI); 1493 PreLoadBB->setName("polly.preload.begin"); 1494 Builder.SetInsertPoint(&PreLoadBB->front()); 1495 1496 for (auto &IAClass : InvariantEquivClasses) 1497 if (!preloadInvariantEquivClass(IAClass)) 1498 return false; 1499 1500 return true; 1501 } 1502 1503 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { 1504 // Materialize values for the parameters of the SCoP. 1505 materializeParameters(); 1506 1507 // materialize the outermost dimension parameters for a Fortran array. 1508 // NOTE: materializeParameters() does not work since it looks through 1509 // the SCEVs. We don't have a corresponding SCEV for the array size 1510 // parameter 1511 materializeFortranArrayOutermostDimension(); 1512 1513 // Generate values for the current loop iteration for all surrounding loops. 1514 // 1515 // We may also reference loops outside of the scop which do not contain the 1516 // scop itself, but as the number of such scops may be arbitrarily large we do 1517 // not generate code for them here, but only at the point of code generation 1518 // where these values are needed. 1519 Loop *L = LI.getLoopFor(S.getEntry()); 1520 1521 while (L != nullptr && S.contains(L)) 1522 L = L->getParentLoop(); 1523 1524 while (L != nullptr) { 1525 materializeNonScopLoopInductionVariable(L); 1526 L = L->getParentLoop(); 1527 } 1528 1529 isl_set_free(Context); 1530 } 1531 1532 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { 1533 /// We pass the insert location of our Builder, as Polly ensures during IR 1534 /// generation that there is always a valid CFG into which instructions are 1535 /// inserted. As a result, the insertpoint is known to be always followed by a 1536 /// terminator instruction. This means the insert point may be specified by a 1537 /// terminator instruction, but it can never point to an ->end() iterator 1538 /// which does not have a corresponding instruction. Hence, dereferencing 1539 /// the insertpoint to obtain an instruction is known to be save. 1540 /// 1541 /// We also do not need to update the Builder here, as new instructions are 1542 /// always inserted _before_ the given InsertLocation. As a result, the 1543 /// insert location remains valid. 1544 assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() && 1545 "Insert location points after last valid instruction"); 1546 Instruction *InsertLocation = &*Builder.GetInsertPoint(); 1547 return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), 1548 InsertLocation, &ValueMap, 1549 StartBlock->getSinglePredecessor()); 1550 } 1551 1552 /// The AST expression we generate to perform the run-time check assumes 1553 /// computations on integer types of infinite size. As we only use 64-bit 1554 /// arithmetic we check for overflows, in case of which we set the result 1555 /// of this run-time check to false to be conservatively correct, 1556 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { 1557 auto ExprBuilder = getExprBuilder(); 1558 1559 // In case the AST expression has integers larger than 64 bit, bail out. The 1560 // resulting LLVM-IR will contain operations on types that use more than 64 1561 // bits. These are -- in case wrapping intrinsics are used -- translated to 1562 // runtime library calls that are not available on all systems (e.g., Android) 1563 // and consequently will result in linker errors. 1564 if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) { 1565 isl_ast_expr_free(Condition); 1566 return Builder.getFalse(); 1567 } 1568 1569 ExprBuilder.setTrackOverflow(true); 1570 Value *RTC = ExprBuilder.create(Condition); 1571 if (!RTC->getType()->isIntegerTy(1)) 1572 RTC = Builder.CreateIsNotNull(RTC); 1573 Value *OverflowHappened = 1574 Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); 1575 1576 if (PollyGenerateRTCPrint) { 1577 auto *F = Builder.GetInsertBlock()->getParent(); 1578 RuntimeDebugBuilder::createCPUPrinter( 1579 Builder, 1580 "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + 1581 "RTC: ", 1582 RTC, " Overflow: ", OverflowHappened, 1583 "\n" 1584 " (0 failed, -1 succeeded)\n" 1585 " (if one or both are 0 falling back to original code, if both are -1 " 1586 "executing Polly code)\n"); 1587 } 1588 1589 RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); 1590 ExprBuilder.setTrackOverflow(false); 1591 1592 if (!isa<ConstantInt>(RTC)) 1593 VersionedScops++; 1594 1595 return RTC; 1596 } 1597