1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the IslNodeBuilder, a class to translate an isl AST into 10 // a LLVM-IR AST. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/IslNodeBuilder.h" 15 #include "polly/CodeGen/BlockGenerators.h" 16 #include "polly/CodeGen/CodeGeneration.h" 17 #include "polly/CodeGen/IslAst.h" 18 #include "polly/CodeGen/IslExprBuilder.h" 19 #include "polly/CodeGen/LoopGenerators.h" 20 #include "polly/CodeGen/RuntimeDebugBuilder.h" 21 #include "polly/Config/config.h" 22 #include "polly/Options.h" 23 #include "polly/ScopInfo.h" 24 #include "polly/Support/GICHelper.h" 25 #include "polly/Support/ISLTools.h" 26 #include "polly/Support/SCEVValidator.h" 27 #include "polly/Support/ScopHelper.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/PostOrderIterator.h" 30 #include "llvm/ADT/SetVector.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/RegionInfo.h" 35 #include "llvm/Analysis/ScalarEvolution.h" 36 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Value.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 53 #include "isl/aff.h" 54 #include "isl/aff_type.h" 55 #include "isl/ast.h" 56 #include "isl/ast_build.h" 57 #include "isl/isl-noexceptions.h" 58 #include "isl/map.h" 59 #include "isl/set.h" 60 #include "isl/union_map.h" 61 #include "isl/union_set.h" 62 #include "isl/val.h" 63 #include <algorithm> 64 #include <cassert> 65 #include <cstdint> 66 #include <cstring> 67 #include <string> 68 #include <utility> 69 #include <vector> 70 71 using namespace llvm; 72 using namespace polly; 73 74 #define DEBUG_TYPE "polly-codegen" 75 76 STATISTIC(VersionedScops, "Number of SCoPs that required versioning."); 77 78 STATISTIC(SequentialLoops, "Number of generated sequential for-loops"); 79 STATISTIC(ParallelLoops, "Number of generated parallel for-loops"); 80 STATISTIC(VectorLoops, "Number of generated vector for-loops"); 81 STATISTIC(IfConditions, "Number of generated if-conditions"); 82 83 static cl::opt<bool> PollyGenerateRTCPrint( 84 "polly-codegen-emit-rtc-print", 85 cl::desc("Emit code that prints the runtime check result dynamically."), 86 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 87 88 // If this option is set we always use the isl AST generator to regenerate 89 // memory accesses. Without this option set we regenerate expressions using the 90 // original SCEV expressions and only generate new expressions in case the 91 // access relation has been changed and consequently must be regenerated. 92 static cl::opt<bool> PollyGenerateExpressions( 93 "polly-codegen-generate-expressions", 94 cl::desc("Generate AST expressions for unmodified and modified accesses"), 95 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 96 97 static cl::opt<int> PollyTargetFirstLevelCacheLineSize( 98 "polly-target-first-level-cache-line-size", 99 cl::desc("The size of the first level cache line size specified in bytes."), 100 cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); 101 102 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For, 103 ICmpInst::Predicate &Predicate) { 104 isl::ast_expr Cond = For.for_get_cond(); 105 isl::ast_expr Iterator = For.for_get_iterator(); 106 assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op && 107 "conditional expression is not an atomic upper bound"); 108 109 isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get()); 110 111 switch (OpType) { 112 case isl_ast_op_le: 113 Predicate = ICmpInst::ICMP_SLE; 114 break; 115 case isl_ast_op_lt: 116 Predicate = ICmpInst::ICMP_SLT; 117 break; 118 default: 119 llvm_unreachable("Unexpected comparison type in loop condition"); 120 } 121 122 isl::ast_expr Arg0 = Cond.get_op_arg(0); 123 124 assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id && 125 "conditional expression is not an atomic upper bound"); 126 127 isl::id UBID = Arg0.get_id(); 128 129 assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id && 130 "Could not get the iterator"); 131 132 isl::id IteratorID = Iterator.get_id(); 133 134 assert(UBID.get() == IteratorID.get() && 135 "conditional expression is not an atomic upper bound"); 136 137 return Cond.get_op_arg(1); 138 } 139 140 /// Return true if a return value of Predicate is true for the value represented 141 /// by passed isl_ast_expr_int. 142 static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr, 143 isl_bool (*Predicate)(__isl_keep isl_val *)) { 144 if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) { 145 isl_ast_expr_free(Expr); 146 return false; 147 } 148 auto ExprVal = isl_ast_expr_get_val(Expr); 149 isl_ast_expr_free(Expr); 150 if (Predicate(ExprVal) != isl_bool_true) { 151 isl_val_free(ExprVal); 152 return false; 153 } 154 isl_val_free(ExprVal); 155 return true; 156 } 157 158 int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) { 159 assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for); 160 isl::ast_node Body = For.for_get_body(); 161 162 // First, check if we can actually handle this code. 163 switch (isl_ast_node_get_type(Body.get())) { 164 case isl_ast_node_user: 165 break; 166 case isl_ast_node_block: { 167 isl::ast_node_list List = Body.block_get_children(); 168 for (isl::ast_node Node : List) { 169 isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get()); 170 if (NodeType != isl_ast_node_user) 171 return -1; 172 } 173 break; 174 } 175 default: 176 return -1; 177 } 178 179 isl::ast_expr Init = For.for_get_init(); 180 if (!checkIslAstExprInt(Init.release(), isl_val_is_zero)) 181 return -1; 182 isl::ast_expr Inc = For.for_get_inc(); 183 if (!checkIslAstExprInt(Inc.release(), isl_val_is_one)) 184 return -1; 185 CmpInst::Predicate Predicate; 186 isl::ast_expr UB = getUpperBound(For, Predicate); 187 if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int) 188 return -1; 189 isl::val UpVal = UB.get_val(); 190 int NumberIterations = UpVal.get_num_si(); 191 if (NumberIterations < 0) 192 return -1; 193 if (Predicate == CmpInst::ICMP_SLT) 194 return NumberIterations; 195 else 196 return NumberIterations + 1; 197 } 198 199 /// Extract the values and SCEVs needed to generate code for a block. 200 static int findReferencesInBlock(struct SubtreeReferences &References, 201 const ScopStmt *Stmt, BasicBlock *BB) { 202 for (Instruction &Inst : *BB) { 203 // Include invariant loads 204 if (isa<LoadInst>(Inst)) 205 if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst)) 206 References.Values.insert(InvariantLoad); 207 208 for (Value *SrcVal : Inst.operands()) { 209 auto *Scope = References.LI.getLoopFor(BB); 210 if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) { 211 References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope)); 212 continue; 213 } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) 214 References.Values.insert(NewVal); 215 } 216 } 217 return 0; 218 } 219 220 void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr, 221 bool CreateScalarRefs) { 222 auto &References = *static_cast<struct SubtreeReferences *>(UserPtr); 223 224 if (Stmt->isBlockStmt()) 225 findReferencesInBlock(References, Stmt, Stmt->getBasicBlock()); 226 else { 227 assert(Stmt->isRegionStmt() && 228 "Stmt was neither block nor region statement"); 229 for (BasicBlock *BB : Stmt->getRegion()->blocks()) 230 findReferencesInBlock(References, Stmt, BB); 231 } 232 233 for (auto &Access : *Stmt) { 234 if (References.ParamSpace) { 235 isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); 236 (*References.ParamSpace) = 237 References.ParamSpace->align_params(ParamSpace); 238 } 239 240 if (Access->isLatestArrayKind()) { 241 auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr(); 242 if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) 243 if (Stmt->getParent()->contains(OpInst)) 244 continue; 245 246 References.Values.insert(BasePtr); 247 continue; 248 } 249 250 if (CreateScalarRefs) 251 References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); 252 } 253 } 254 255 /// Extract the out-of-scop values and SCEVs referenced from a set describing 256 /// a ScopStmt. 257 /// 258 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 259 /// statement and the base pointers of the memory accesses. For scalar 260 /// statements we force the generation of alloca memory locations and list 261 /// these locations in the set of out-of-scop values as well. 262 /// 263 /// @param Set A set which references the ScopStmt we are interested in. 264 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences 265 /// structure. 266 static void addReferencesFromStmtSet(isl::set Set, 267 struct SubtreeReferences *UserPtr) { 268 isl::id Id = Set.get_tuple_id(); 269 auto *Stmt = static_cast<const ScopStmt *>(Id.get_user()); 270 return addReferencesFromStmt(Stmt, UserPtr); 271 } 272 273 /// Extract the out-of-scop values and SCEVs referenced from a union set 274 /// referencing multiple ScopStmts. 275 /// 276 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 277 /// statement and the base pointers of the memory accesses. For scalar 278 /// statements we force the generation of alloca memory locations and list 279 /// these locations in the set of out-of-scop values as well. 280 /// 281 /// @param USet A union set referencing the ScopStmts we are interested 282 /// in. 283 /// @param References The SubtreeReferences data structure through which 284 /// results are returned and further information is 285 /// provided. 286 static void 287 addReferencesFromStmtUnionSet(isl::union_set USet, 288 struct SubtreeReferences &References) { 289 290 for (isl::set Set : USet.get_set_list()) 291 addReferencesFromStmtSet(Set, &References); 292 } 293 294 __isl_give isl_union_map * 295 IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) { 296 return IslAstInfo::getSchedule(For); 297 } 298 299 void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, 300 SetVector<Value *> &Values, 301 SetVector<const Loop *> &Loops) { 302 SetVector<const SCEV *> SCEVs; 303 struct SubtreeReferences References = { 304 LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; 305 306 for (const auto &I : IDToValue) 307 Values.insert(I.second); 308 309 // NOTE: this is populated in IslNodeBuilder::addParameters 310 for (const auto &I : OutsideLoopIterations) 311 Values.insert(cast<SCEVUnknown>(I.second)->getValue()); 312 313 isl::union_set Schedule = 314 isl::manage(isl_union_map_domain(getScheduleForAstNode(For))); 315 addReferencesFromStmtUnionSet(Schedule, References); 316 317 for (const SCEV *Expr : SCEVs) { 318 findValues(Expr, SE, Values); 319 findLoops(Expr, Loops); 320 } 321 322 Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); 323 324 /// Note: Code generation of induction variables of loops outside Scops 325 /// 326 /// Remove loops that contain the scop or that are part of the scop, as they 327 /// are considered local. This leaves only loops that are before the scop, but 328 /// do not contain the scop itself. 329 /// We ignore loops perfectly contained in the Scop because these are already 330 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops 331 /// whose induction variables are referred to by the Scop, but the Scop is not 332 /// fully contained in these Loops. Since there can be many of these, 333 /// we choose to codegen these on-demand. 334 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable. 335 Loops.remove_if([this](const Loop *L) { 336 return S.contains(L) || L->contains(S.getEntry()); 337 }); 338 339 // Contains Values that may need to be replaced with other values 340 // due to replacements from the ValueMap. We should make sure 341 // that we return correctly remapped values. 342 // NOTE: this code path is tested by: 343 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll 344 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll 345 SetVector<Value *> ReplacedValues; 346 for (Value *V : Values) { 347 ReplacedValues.insert(getLatestValue(V)); 348 } 349 Values = ReplacedValues; 350 } 351 352 void IslNodeBuilder::updateValues(ValueMapT &NewValues) { 353 SmallPtrSet<Value *, 5> Inserted; 354 355 for (const auto &I : IDToValue) { 356 IDToValue[I.first] = NewValues[I.second]; 357 Inserted.insert(I.second); 358 } 359 360 for (const auto &I : NewValues) { 361 if (Inserted.count(I.first)) 362 continue; 363 364 ValueMap[I.first] = I.second; 365 } 366 } 367 368 Value *IslNodeBuilder::getLatestValue(Value *Original) const { 369 auto It = ValueMap.find(Original); 370 if (It == ValueMap.end()) 371 return Original; 372 return It->second; 373 } 374 375 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, 376 std::vector<Value *> &IVS, 377 __isl_take isl_id *IteratorID, 378 __isl_take isl_union_map *Schedule) { 379 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 380 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 381 isl_id *Id = isl_ast_expr_get_id(StmtExpr); 382 isl_ast_expr_free(StmtExpr); 383 ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); 384 std::vector<LoopToScevMapT> VLTS(IVS.size()); 385 386 isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release()); 387 Schedule = isl_union_map_intersect_domain(Schedule, Domain); 388 isl_map *S = isl_map_from_union_map(Schedule); 389 390 auto *NewAccesses = createNewAccesses(Stmt, User); 391 createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); 392 VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); 393 isl_id_to_ast_expr_free(NewAccesses); 394 isl_map_free(S); 395 isl_id_free(Id); 396 isl_ast_node_free(User); 397 } 398 399 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { 400 auto *Id = isl_ast_node_mark_get_id(Node); 401 auto Child = isl_ast_node_mark_get_node(Node); 402 isl_ast_node_free(Node); 403 // If a child node of a 'SIMD mark' is a loop that has a single iteration, 404 // it will be optimized away and we should skip it. 405 if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && 406 isl_ast_node_get_type(Child) == isl_ast_node_for) { 407 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 408 int VectorWidth = getNumberOfIterations(isl::manage_copy(Child)); 409 if (Vector && 1 < VectorWidth && VectorWidth <= 16) 410 createForVector(Child, VectorWidth); 411 else 412 createForSequential(isl::manage(Child), true); 413 isl_id_free(Id); 414 return; 415 } 416 if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) { 417 auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id)); 418 Annotator.addInterIterationAliasFreeBasePtr(BasePtr); 419 } 420 create(Child); 421 isl_id_free(Id); 422 } 423 424 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, 425 int VectorWidth) { 426 isl_ast_node *Body = isl_ast_node_for_get_body(For); 427 isl_ast_expr *Init = isl_ast_node_for_get_init(For); 428 isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); 429 isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); 430 isl_id *IteratorID = isl_ast_expr_get_id(Iterator); 431 432 Value *ValueLB = ExprBuilder.create(Init); 433 Value *ValueInc = ExprBuilder.create(Inc); 434 435 Type *MaxType = ExprBuilder.getType(Iterator); 436 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 437 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 438 439 if (MaxType != ValueLB->getType()) 440 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 441 if (MaxType != ValueInc->getType()) 442 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 443 444 std::vector<Value *> IVS(VectorWidth); 445 IVS[0] = ValueLB; 446 447 for (int i = 1; i < VectorWidth; i++) 448 IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); 449 450 isl_union_map *Schedule = getScheduleForAstNode(For); 451 assert(Schedule && "For statement annotation does not contain its schedule"); 452 453 IDToValue[IteratorID] = ValueLB; 454 455 switch (isl_ast_node_get_type(Body)) { 456 case isl_ast_node_user: 457 createUserVector(Body, IVS, isl_id_copy(IteratorID), 458 isl_union_map_copy(Schedule)); 459 break; 460 case isl_ast_node_block: { 461 isl_ast_node_list *List = isl_ast_node_block_get_children(Body); 462 463 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 464 createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, 465 isl_id_copy(IteratorID), isl_union_map_copy(Schedule)); 466 467 isl_ast_node_free(Body); 468 isl_ast_node_list_free(List); 469 break; 470 } 471 default: 472 isl_ast_node_dump(Body); 473 llvm_unreachable("Unhandled isl_ast_node in vectorizer"); 474 } 475 476 IDToValue.erase(IDToValue.find(IteratorID)); 477 isl_id_free(IteratorID); 478 isl_union_map_free(Schedule); 479 480 isl_ast_node_free(For); 481 isl_ast_expr_free(Iterator); 482 483 VectorLoops++; 484 } 485 486 /// Restore the initial ordering of dimensions of the band node 487 /// 488 /// In case the band node represents all the dimensions of the iteration 489 /// domain, recreate the band node to restore the initial ordering of the 490 /// dimensions. 491 /// 492 /// @param Node The band node to be modified. 493 /// @return The modified schedule node. 494 static bool IsLoopVectorizerDisabled(isl::ast_node Node) { 495 assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for); 496 auto Body = Node.for_get_body(); 497 if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark) 498 return false; 499 auto Id = Body.mark_get_id(); 500 if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0) 501 return true; 502 return false; 503 } 504 505 void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) { 506 Value *ValueLB, *ValueUB, *ValueInc; 507 Type *MaxType; 508 BasicBlock *ExitBlock; 509 Value *IV; 510 CmpInst::Predicate Predicate; 511 512 bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For); 513 514 isl::ast_node Body = For.for_get_body(); 515 516 // isl_ast_node_for_is_degenerate(For) 517 // 518 // TODO: For degenerated loops we could generate a plain assignment. 519 // However, for now we just reuse the logic for normal loops, which will 520 // create a loop with a single iteration. 521 522 isl::ast_expr Init = For.for_get_init(); 523 isl::ast_expr Inc = For.for_get_inc(); 524 isl::ast_expr Iterator = For.for_get_iterator(); 525 isl::id IteratorID = Iterator.get_id(); 526 isl::ast_expr UB = getUpperBound(For, Predicate); 527 528 ValueLB = ExprBuilder.create(Init.release()); 529 ValueUB = ExprBuilder.create(UB.release()); 530 ValueInc = ExprBuilder.create(Inc.release()); 531 532 MaxType = ExprBuilder.getType(Iterator.get()); 533 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 534 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 535 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 536 537 if (MaxType != ValueLB->getType()) 538 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 539 if (MaxType != ValueUB->getType()) 540 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 541 if (MaxType != ValueInc->getType()) 542 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 543 544 // If we can show that LB <Predicate> UB holds at least once, we can 545 // omit the GuardBB in front of the loop. 546 bool UseGuardBB = 547 !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); 548 IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, 549 Predicate, &Annotator, MarkParallel, UseGuardBB, 550 LoopVectorizerDisabled); 551 IDToValue[IteratorID.get()] = IV; 552 553 create(Body.release()); 554 555 Annotator.popLoop(MarkParallel); 556 557 IDToValue.erase(IDToValue.find(IteratorID.get())); 558 559 Builder.SetInsertPoint(&ExitBlock->front()); 560 561 SequentialLoops++; 562 } 563 564 /// Remove the BBs contained in a (sub)function from the dominator tree. 565 /// 566 /// This function removes the basic blocks that are part of a subfunction from 567 /// the dominator tree. Specifically, when generating code it may happen that at 568 /// some point the code generation continues in a new sub-function (e.g., when 569 /// generating OpenMP code). The basic blocks that are created in this 570 /// sub-function are then still part of the dominator tree of the original 571 /// function, such that the dominator tree reaches over function boundaries. 572 /// This is not only incorrect, but also causes crashes. This function now 573 /// removes from the dominator tree all basic blocks that are dominated (and 574 /// consequently reachable) from the entry block of this (sub)function. 575 /// 576 /// FIXME: A LLVM (function or region) pass should not touch anything outside of 577 /// the function/region it runs on. Hence, the pure need for this function shows 578 /// that we do not comply to this rule. At the moment, this does not cause any 579 /// issues, but we should be aware that such issues may appear. Unfortunately 580 /// the current LLVM pass infrastructure does not allow to make Polly a module 581 /// or call-graph pass to solve this issue, as such a pass would not have access 582 /// to the per-function analyses passes needed by Polly. A future pass manager 583 /// infrastructure is supposed to enable such kind of access possibly allowing 584 /// us to create a cleaner solution here. 585 /// 586 /// FIXME: Instead of adding the dominance information and then dropping it 587 /// later on, we should try to just not add it in the first place. This requires 588 /// some careful testing to make sure this does not break in interaction with 589 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or 590 /// which may try to update it. 591 /// 592 /// @param F The function which contains the BBs to removed. 593 /// @param DT The dominator tree from which to remove the BBs. 594 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { 595 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 596 std::vector<BasicBlock *> Nodes; 597 598 // We can only remove an element from the dominator tree, if all its children 599 // have been removed. To ensure this we obtain the list of nodes to remove 600 // using a post-order tree traversal. 601 for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) 602 Nodes.push_back(I->getBlock()); 603 604 for (BasicBlock *BB : Nodes) 605 DT.eraseNode(BB); 606 } 607 608 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { 609 isl_ast_node *Body; 610 isl_ast_expr *Init, *Inc, *Iterator, *UB; 611 isl_id *IteratorID; 612 Value *ValueLB, *ValueUB, *ValueInc; 613 Type *MaxType; 614 Value *IV; 615 CmpInst::Predicate Predicate; 616 617 // The preamble of parallel code interacts different than normal code with 618 // e.g., scalar initialization. Therefore, we ensure the parallel code is 619 // separated from the last basic block. 620 BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), 621 &*Builder.GetInsertPoint(), &DT, &LI); 622 ParBB->setName("polly.parallel.for"); 623 Builder.SetInsertPoint(&ParBB->front()); 624 625 Body = isl_ast_node_for_get_body(For); 626 Init = isl_ast_node_for_get_init(For); 627 Inc = isl_ast_node_for_get_inc(For); 628 Iterator = isl_ast_node_for_get_iterator(For); 629 IteratorID = isl_ast_expr_get_id(Iterator); 630 UB = getUpperBound(isl::manage_copy(For), Predicate).release(); 631 632 ValueLB = ExprBuilder.create(Init); 633 ValueUB = ExprBuilder.create(UB); 634 ValueInc = ExprBuilder.create(Inc); 635 636 // OpenMP always uses SLE. In case the isl generated AST uses a SLT 637 // expression, we need to adjust the loop bound by one. 638 if (Predicate == CmpInst::ICMP_SLT) 639 ValueUB = Builder.CreateAdd( 640 ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); 641 642 MaxType = ExprBuilder.getType(Iterator); 643 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 644 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 645 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 646 647 if (MaxType != ValueLB->getType()) 648 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 649 if (MaxType != ValueUB->getType()) 650 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 651 if (MaxType != ValueInc->getType()) 652 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 653 654 BasicBlock::iterator LoopBody; 655 656 SetVector<Value *> SubtreeValues; 657 SetVector<const Loop *> Loops; 658 659 getReferencesInSubtree(For, SubtreeValues, Loops); 660 661 // Create for all loops we depend on values that contain the current loop 662 // iteration. These values are necessary to generate code for SCEVs that 663 // depend on such loops. As a result we need to pass them to the subfunction. 664 // See [Code generation of induction variables of loops outside Scops] 665 for (const Loop *L : Loops) { 666 Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L); 667 SubtreeValues.insert(LoopInductionVar); 668 } 669 670 ValueMapT NewValues; 671 ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL); 672 673 IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, 674 SubtreeValues, NewValues, &LoopBody); 675 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 676 Builder.SetInsertPoint(&*LoopBody); 677 678 // Remember the parallel subfunction 679 ParallelSubfunctions.push_back(LoopBody->getFunction()); 680 681 // Save the current values. 682 auto ValueMapCopy = ValueMap; 683 IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; 684 685 updateValues(NewValues); 686 IDToValue[IteratorID] = IV; 687 688 ValueMapT NewValuesReverse; 689 690 for (auto P : NewValues) 691 NewValuesReverse[P.second] = P.first; 692 693 Annotator.addAlternativeAliasBases(NewValuesReverse); 694 695 create(Body); 696 697 Annotator.resetAlternativeAliasBases(); 698 // Restore the original values. 699 ValueMap = ValueMapCopy; 700 IDToValue = IDToValueCopy; 701 702 Builder.SetInsertPoint(&*AfterLoop); 703 removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); 704 705 for (const Loop *L : Loops) 706 OutsideLoopIterations.erase(L); 707 708 isl_ast_node_free(For); 709 isl_ast_expr_free(Iterator); 710 isl_id_free(IteratorID); 711 712 ParallelLoops++; 713 } 714 715 /// Return whether any of @p Node's statements contain partial accesses. 716 /// 717 /// Partial accesses are not supported by Polly's vector code generator. 718 static bool hasPartialAccesses(__isl_take isl_ast_node *Node) { 719 return isl_ast_node_foreach_descendant_top_down( 720 Node, 721 [](isl_ast_node *Node, void *User) -> isl_bool { 722 if (isl_ast_node_get_type(Node) != isl_ast_node_user) 723 return isl_bool_true; 724 725 isl::ast_expr Expr = 726 isl::manage(isl_ast_node_user_get_expr(Node)); 727 isl::ast_expr StmtExpr = Expr.get_op_arg(0); 728 isl::id Id = StmtExpr.get_id(); 729 730 ScopStmt *Stmt = 731 static_cast<ScopStmt *>(isl_id_get_user(Id.get())); 732 isl::set StmtDom = Stmt->getDomain(); 733 for (auto *MA : *Stmt) { 734 if (MA->isLatestPartialAccess()) 735 return isl_bool_error; 736 } 737 return isl_bool_true; 738 }, 739 nullptr) == isl_stat_error; 740 } 741 742 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { 743 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 744 745 if (Vector && IslAstInfo::isInnermostParallel(For) && 746 !IslAstInfo::isReductionParallel(For)) { 747 int VectorWidth = getNumberOfIterations(isl::manage_copy(For)); 748 if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) { 749 createForVector(For, VectorWidth); 750 return; 751 } 752 } 753 754 if (IslAstInfo::isExecutedInParallel(For)) { 755 createForParallel(For); 756 return; 757 } 758 bool Parallel = 759 (IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For)); 760 createForSequential(isl::manage(For), Parallel); 761 } 762 763 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { 764 isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); 765 766 Function *F = Builder.GetInsertBlock()->getParent(); 767 LLVMContext &Context = F->getContext(); 768 769 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 770 &*Builder.GetInsertPoint(), &DT, &LI); 771 CondBB->setName("polly.cond"); 772 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 773 MergeBB->setName("polly.merge"); 774 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 775 BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); 776 777 DT.addNewBlock(ThenBB, CondBB); 778 DT.addNewBlock(ElseBB, CondBB); 779 DT.changeImmediateDominator(MergeBB, CondBB); 780 781 Loop *L = LI.getLoopFor(CondBB); 782 if (L) { 783 L->addBasicBlockToLoop(ThenBB, LI); 784 L->addBasicBlockToLoop(ElseBB, LI); 785 } 786 787 CondBB->getTerminator()->eraseFromParent(); 788 789 Builder.SetInsertPoint(CondBB); 790 Value *Predicate = ExprBuilder.create(Cond); 791 Builder.CreateCondBr(Predicate, ThenBB, ElseBB); 792 Builder.SetInsertPoint(ThenBB); 793 Builder.CreateBr(MergeBB); 794 Builder.SetInsertPoint(ElseBB); 795 Builder.CreateBr(MergeBB); 796 Builder.SetInsertPoint(&ThenBB->front()); 797 798 create(isl_ast_node_if_get_then(If)); 799 800 Builder.SetInsertPoint(&ElseBB->front()); 801 802 if (isl_ast_node_if_has_else(If)) 803 create(isl_ast_node_if_get_else(If)); 804 805 Builder.SetInsertPoint(&MergeBB->front()); 806 807 isl_ast_node_free(If); 808 809 IfConditions++; 810 } 811 812 __isl_give isl_id_to_ast_expr * 813 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, 814 __isl_keep isl_ast_node *Node) { 815 isl_id_to_ast_expr *NewAccesses = 816 isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0); 817 818 auto *Build = IslAstInfo::getBuild(Node); 819 assert(Build && "Could not obtain isl_ast_build from user node"); 820 Stmt->setAstBuild(isl::manage_copy(Build)); 821 822 for (auto *MA : *Stmt) { 823 if (!MA->hasNewAccessRelation()) { 824 if (PollyGenerateExpressions) { 825 if (!MA->isAffine()) 826 continue; 827 if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) 828 continue; 829 830 auto *BasePtr = 831 dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); 832 if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) 833 continue; 834 } else { 835 continue; 836 } 837 } 838 assert(MA->isAffine() && 839 "Only affine memory accesses can be code generated"); 840 841 auto Schedule = isl_ast_build_get_schedule(Build); 842 843 #ifndef NDEBUG 844 if (MA->isRead()) { 845 auto Dom = Stmt->getDomain().release(); 846 auto SchedDom = isl_set_from_union_set( 847 isl_union_map_domain(isl_union_map_copy(Schedule))); 848 auto AccDom = isl_map_domain(MA->getAccessRelation().release()); 849 Dom = isl_set_intersect_params(Dom, 850 Stmt->getParent()->getContext().release()); 851 SchedDom = isl_set_intersect_params( 852 SchedDom, Stmt->getParent()->getContext().release()); 853 assert(isl_set_is_subset(SchedDom, AccDom) && 854 "Access relation not defined on full schedule domain"); 855 assert(isl_set_is_subset(Dom, AccDom) && 856 "Access relation not defined on full domain"); 857 isl_set_free(AccDom); 858 isl_set_free(SchedDom); 859 isl_set_free(Dom); 860 } 861 #endif 862 863 auto PWAccRel = 864 MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release(); 865 866 // isl cannot generate an index expression for access-nothing accesses. 867 isl::set AccDomain = 868 isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel))); 869 isl::set Context = S.getContext(); 870 AccDomain = AccDomain.intersect_params(Context); 871 if (AccDomain.is_empty()) { 872 isl_pw_multi_aff_free(PWAccRel); 873 continue; 874 } 875 876 auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 877 NewAccesses = 878 isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr); 879 } 880 881 return NewAccesses; 882 } 883 884 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, 885 ScopStmt *Stmt, LoopToScevMapT <S) { 886 assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op && 887 "Expression of type 'op' expected"); 888 assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call && 889 "Operation of type 'call' expected"); 890 for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { 891 isl_ast_expr *SubExpr; 892 Value *V; 893 894 SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); 895 V = ExprBuilder.create(SubExpr); 896 ScalarEvolution *SE = Stmt->getParent()->getSE(); 897 LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); 898 } 899 900 isl_ast_expr_free(Expr); 901 } 902 903 void IslNodeBuilder::createSubstitutionsVector( 904 __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, 905 std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, 906 __isl_take isl_id *IteratorID) { 907 int i = 0; 908 909 Value *OldValue = IDToValue[IteratorID]; 910 for (Value *IV : IVS) { 911 IDToValue[IteratorID] = IV; 912 createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); 913 i++; 914 } 915 916 IDToValue[IteratorID] = OldValue; 917 isl_id_free(IteratorID); 918 isl_ast_expr_free(Expr); 919 } 920 921 void IslNodeBuilder::generateCopyStmt( 922 ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 923 assert(Stmt->size() == 2); 924 auto ReadAccess = Stmt->begin(); 925 auto WriteAccess = ReadAccess++; 926 assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite()); 927 assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() && 928 "Accesses use the same data type"); 929 assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind()); 930 auto *AccessExpr = 931 isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release()); 932 auto *LoadValue = ExprBuilder.create(AccessExpr); 933 AccessExpr = 934 isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release()); 935 auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr); 936 Builder.CreateStore(LoadValue, StoreAddr); 937 } 938 939 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) { 940 assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() && 941 "trying to materialize loop induction variable twice"); 942 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 943 SE.getUnknown(Builder.getInt64(1)), L, 944 SCEV::FlagAnyWrap); 945 Value *V = generateSCEV(OuterLIV); 946 OutsideLoopIterations[L] = SE.getUnknown(V); 947 return V; 948 } 949 950 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { 951 LoopToScevMapT LTS; 952 isl_id *Id; 953 ScopStmt *Stmt; 954 955 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 956 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 957 Id = isl_ast_expr_get_id(StmtExpr); 958 isl_ast_expr_free(StmtExpr); 959 960 LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 961 962 Stmt = (ScopStmt *)isl_id_get_user(Id); 963 auto *NewAccesses = createNewAccesses(Stmt, User); 964 if (Stmt->isCopyStmt()) { 965 generateCopyStmt(Stmt, NewAccesses); 966 isl_ast_expr_free(Expr); 967 } else { 968 createSubstitutions(Expr, Stmt, LTS); 969 970 if (Stmt->isBlockStmt()) 971 BlockGen.copyStmt(*Stmt, LTS, NewAccesses); 972 else 973 RegionGen.copyStmt(*Stmt, LTS, NewAccesses); 974 } 975 976 isl_id_to_ast_expr_free(NewAccesses); 977 isl_ast_node_free(User); 978 isl_id_free(Id); 979 } 980 981 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { 982 isl_ast_node_list *List = isl_ast_node_block_get_children(Block); 983 984 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 985 create(isl_ast_node_list_get_ast_node(List, i)); 986 987 isl_ast_node_free(Block); 988 isl_ast_node_list_free(List); 989 } 990 991 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { 992 switch (isl_ast_node_get_type(Node)) { 993 case isl_ast_node_error: 994 llvm_unreachable("code generation error"); 995 case isl_ast_node_mark: 996 createMark(Node); 997 return; 998 case isl_ast_node_for: 999 createFor(Node); 1000 return; 1001 case isl_ast_node_if: 1002 createIf(Node); 1003 return; 1004 case isl_ast_node_user: 1005 createUser(Node); 1006 return; 1007 case isl_ast_node_block: 1008 createBlock(Node); 1009 return; 1010 } 1011 1012 llvm_unreachable("Unknown isl_ast_node type"); 1013 } 1014 1015 bool IslNodeBuilder::materializeValue(isl_id *Id) { 1016 // If the Id is already mapped, skip it. 1017 if (!IDToValue.count(Id)) { 1018 auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); 1019 Value *V = nullptr; 1020 1021 // Parameters could refer to invariant loads that need to be 1022 // preloaded before we can generate code for the parameter. Thus, 1023 // check if any value referred to in ParamSCEV is an invariant load 1024 // and if so make sure its equivalence class is preloaded. 1025 SetVector<Value *> Values; 1026 findValues(ParamSCEV, SE, Values); 1027 for (auto *Val : Values) { 1028 // Check if the value is an instruction in a dead block within the SCoP 1029 // and if so do not code generate it. 1030 if (auto *Inst = dyn_cast<Instruction>(Val)) { 1031 if (S.contains(Inst)) { 1032 bool IsDead = true; 1033 1034 // Check for "undef" loads first, then if there is a statement for 1035 // the parent of Inst and lastly if the parent of Inst has an empty 1036 // domain. In the first and last case the instruction is dead but if 1037 // there is a statement or the domain is not empty Inst is not dead. 1038 auto MemInst = MemAccInst::dyn_cast(Inst); 1039 auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; 1040 if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == 1041 SE.getPointerBase(SE.getSCEV(Address))) { 1042 } else if (S.getStmtFor(Inst)) { 1043 IsDead = false; 1044 } else { 1045 auto *Domain = S.getDomainConditions(Inst->getParent()).release(); 1046 IsDead = isl_set_is_empty(Domain); 1047 isl_set_free(Domain); 1048 } 1049 1050 if (IsDead) { 1051 V = UndefValue::get(ParamSCEV->getType()); 1052 break; 1053 } 1054 } 1055 } 1056 1057 if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { 1058 // Check if this invariant access class is empty, hence if we never 1059 // actually added a loads instruction to it. In that case it has no 1060 // (meaningful) users and we should not try to code generate it. 1061 if (IAClass->InvariantAccesses.empty()) 1062 V = UndefValue::get(ParamSCEV->getType()); 1063 1064 if (!preloadInvariantEquivClass(*IAClass)) { 1065 isl_id_free(Id); 1066 return false; 1067 } 1068 } 1069 } 1070 1071 V = V ? V : generateSCEV(ParamSCEV); 1072 IDToValue[Id] = V; 1073 } 1074 1075 isl_id_free(Id); 1076 return true; 1077 } 1078 1079 bool IslNodeBuilder::materializeParameters(isl_set *Set) { 1080 for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { 1081 if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) 1082 continue; 1083 isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); 1084 if (!materializeValue(Id)) 1085 return false; 1086 } 1087 return true; 1088 } 1089 1090 bool IslNodeBuilder::materializeParameters() { 1091 for (const SCEV *Param : S.parameters()) { 1092 isl_id *Id = S.getIdForParam(Param).release(); 1093 if (!materializeValue(Id)) 1094 return false; 1095 } 1096 return true; 1097 } 1098 1099 /// Generate the computation of the size of the outermost dimension from the 1100 /// Fortran array descriptor (in this case, `@g_arr`). The final `%size` 1101 /// contains the size of the array. 1102 /// 1103 /// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] } 1104 /// %desc.dimensionty = type { i64, i64, i64 } 1105 /// @g_arr = global %arrty zeroinitializer, align 32 1106 /// ... 1107 /// %0 = load i64, i64* getelementptr inbounds 1108 /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2) 1109 /// %1 = load i64, i64* getelementptr inbounds 1110 /// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1) 1111 /// %2 = sub nsw i64 %0, %1 1112 /// %size = add nsw i64 %2, 1 1113 static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor, 1114 PollyIRBuilder &Builder, 1115 std::string ArrayName) { 1116 assert(GlobalDescriptor && "invalid global descriptor given"); 1117 1118 Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), 1119 Builder.getInt64(0), Builder.getInt32(2)}; 1120 Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx, 1121 ArrayName + "_end_ptr"); 1122 Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end"); 1123 1124 Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3), 1125 Builder.getInt64(0), Builder.getInt32(1)}; 1126 Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx, 1127 ArrayName + "_begin_ptr"); 1128 Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin"); 1129 1130 Value *size = 1131 Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta"); 1132 Type *endType = dyn_cast<IntegerType>(end->getType()); 1133 assert(endType && "expected type of end to be integral"); 1134 1135 size = Builder.CreateNSWAdd(end, 1136 ConstantInt::get(endType, 1, /* signed = */ true), 1137 ArrayName + "_size"); 1138 1139 return size; 1140 } 1141 1142 bool IslNodeBuilder::materializeFortranArrayOutermostDimension() { 1143 for (ScopArrayInfo *Array : S.arrays()) { 1144 if (Array->getNumberOfDimensions() == 0) 1145 continue; 1146 1147 Value *FAD = Array->getFortranArrayDescriptor(); 1148 if (!FAD) 1149 continue; 1150 1151 isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release(); 1152 assert(ParametricPwAff && "parametric pw_aff corresponding " 1153 "to outermost dimension does not " 1154 "exist"); 1155 1156 isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); 1157 isl_pw_aff_free(ParametricPwAff); 1158 1159 assert(Id && "pw_aff is not parametric"); 1160 1161 if (IDToValue.count(Id)) { 1162 isl_id_free(Id); 1163 continue; 1164 } 1165 1166 Value *FinalValue = 1167 buildFADOutermostDimensionLoad(FAD, Builder, Array->getName()); 1168 assert(FinalValue && "unable to build Fortran array " 1169 "descriptor load of outermost dimension"); 1170 IDToValue[Id] = FinalValue; 1171 isl_id_free(Id); 1172 } 1173 return true; 1174 } 1175 1176 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, 1177 isl_ast_build *Build, 1178 Instruction *AccInst) { 1179 isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); 1180 isl_ast_expr *Access = 1181 isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 1182 auto *Address = isl_ast_expr_address_of(Access); 1183 auto *AddressValue = ExprBuilder.create(Address); 1184 Value *PreloadVal; 1185 1186 // Correct the type as the SAI might have a different type than the user 1187 // expects, especially if the base pointer is a struct. 1188 Type *Ty = AccInst->getType(); 1189 1190 auto *Ptr = AddressValue; 1191 auto Name = Ptr->getName(); 1192 auto AS = Ptr->getType()->getPointerAddressSpace(); 1193 Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast"); 1194 PreloadVal = Builder.CreateLoad(Ptr, Name + ".load"); 1195 if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) 1196 PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment()); 1197 1198 // TODO: This is only a hot fix for SCoP sequences that use the same load 1199 // instruction contained and hoisted by one of the SCoPs. 1200 if (SE.isSCEVable(Ty)) 1201 SE.forgetValue(AccInst); 1202 1203 return PreloadVal; 1204 } 1205 1206 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, 1207 isl_set *Domain) { 1208 isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release()); 1209 AccessRange = isl_set_gist_params(AccessRange, S.getContext().release()); 1210 1211 if (!materializeParameters(AccessRange)) { 1212 isl_set_free(AccessRange); 1213 isl_set_free(Domain); 1214 return nullptr; 1215 } 1216 1217 auto *Build = 1218 isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release())); 1219 isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); 1220 bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); 1221 isl_set_free(Universe); 1222 1223 Instruction *AccInst = MA.getAccessInstruction(); 1224 Type *AccInstTy = AccInst->getType(); 1225 1226 Value *PreloadVal = nullptr; 1227 if (AlwaysExecuted) { 1228 PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); 1229 isl_ast_build_free(Build); 1230 isl_set_free(Domain); 1231 return PreloadVal; 1232 } 1233 1234 if (!materializeParameters(Domain)) { 1235 isl_ast_build_free(Build); 1236 isl_set_free(AccessRange); 1237 isl_set_free(Domain); 1238 return nullptr; 1239 } 1240 1241 isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); 1242 Domain = nullptr; 1243 1244 ExprBuilder.setTrackOverflow(true); 1245 Value *Cond = ExprBuilder.create(DomainCond); 1246 Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), 1247 "polly.preload.cond.overflown"); 1248 Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); 1249 ExprBuilder.setTrackOverflow(false); 1250 1251 if (!Cond->getType()->isIntegerTy(1)) 1252 Cond = Builder.CreateIsNotNull(Cond); 1253 1254 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1255 &*Builder.GetInsertPoint(), &DT, &LI); 1256 CondBB->setName("polly.preload.cond"); 1257 1258 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 1259 MergeBB->setName("polly.preload.merge"); 1260 1261 Function *F = Builder.GetInsertBlock()->getParent(); 1262 LLVMContext &Context = F->getContext(); 1263 BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); 1264 1265 DT.addNewBlock(ExecBB, CondBB); 1266 if (Loop *L = LI.getLoopFor(CondBB)) 1267 L->addBasicBlockToLoop(ExecBB, LI); 1268 1269 auto *CondBBTerminator = CondBB->getTerminator(); 1270 Builder.SetInsertPoint(CondBBTerminator); 1271 Builder.CreateCondBr(Cond, ExecBB, MergeBB); 1272 CondBBTerminator->eraseFromParent(); 1273 1274 Builder.SetInsertPoint(ExecBB); 1275 Builder.CreateBr(MergeBB); 1276 1277 Builder.SetInsertPoint(ExecBB->getTerminator()); 1278 Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); 1279 Builder.SetInsertPoint(MergeBB->getTerminator()); 1280 auto *MergePHI = Builder.CreatePHI( 1281 AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); 1282 PreloadVal = MergePHI; 1283 1284 if (!PreAccInst) { 1285 PreloadVal = nullptr; 1286 PreAccInst = UndefValue::get(AccInstTy); 1287 } 1288 1289 MergePHI->addIncoming(PreAccInst, ExecBB); 1290 MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); 1291 1292 isl_ast_build_free(Build); 1293 return PreloadVal; 1294 } 1295 1296 bool IslNodeBuilder::preloadInvariantEquivClass( 1297 InvariantEquivClassTy &IAClass) { 1298 // For an equivalence class of invariant loads we pre-load the representing 1299 // element with the unified execution context. However, we have to map all 1300 // elements of the class to the one preloaded load as they are referenced 1301 // during the code generation and therefor need to be mapped. 1302 const MemoryAccessList &MAs = IAClass.InvariantAccesses; 1303 if (MAs.empty()) 1304 return true; 1305 1306 MemoryAccess *MA = MAs.front(); 1307 assert(MA->isArrayKind() && MA->isRead()); 1308 1309 // If the access function was already mapped, the preload of this equivalence 1310 // class was triggered earlier already and doesn't need to be done again. 1311 if (ValueMap.count(MA->getAccessInstruction())) 1312 return true; 1313 1314 // Check for recursion which can be caused by additional constraints, e.g., 1315 // non-finite loop constraints. In such a case we have to bail out and insert 1316 // a "false" runtime check that will cause the original code to be executed. 1317 auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); 1318 if (!PreloadedPtrs.insert(PtrId).second) 1319 return false; 1320 1321 // The execution context of the IAClass. 1322 isl::set &ExecutionCtx = IAClass.ExecutionContext; 1323 1324 // If the base pointer of this class is dependent on another one we have to 1325 // make sure it was preloaded already. 1326 auto *SAI = MA->getScopArrayInfo(); 1327 if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { 1328 if (!preloadInvariantEquivClass(*BaseIAClass)) 1329 return false; 1330 1331 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and 1332 // we need to refine the ExecutionCtx. 1333 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1334 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1335 } 1336 1337 // If the size of a dimension is dependent on another class, make sure it is 1338 // preloaded. 1339 for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { 1340 const SCEV *Dim = SAI->getDimensionSize(i); 1341 SetVector<Value *> Values; 1342 findValues(Dim, SE, Values); 1343 for (auto *Val : Values) { 1344 if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { 1345 if (!preloadInvariantEquivClass(*BaseIAClass)) 1346 return false; 1347 1348 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx 1349 // and we need to refine the ExecutionCtx. 1350 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1351 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1352 } 1353 } 1354 } 1355 1356 Instruction *AccInst = MA->getAccessInstruction(); 1357 Type *AccInstTy = AccInst->getType(); 1358 1359 Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy()); 1360 if (!PreloadVal) 1361 return false; 1362 1363 for (const MemoryAccess *MA : MAs) { 1364 Instruction *MAAccInst = MA->getAccessInstruction(); 1365 assert(PreloadVal->getType() == MAAccInst->getType()); 1366 ValueMap[MAAccInst] = PreloadVal; 1367 } 1368 1369 if (SE.isSCEVable(AccInstTy)) { 1370 isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release(); 1371 if (ParamId) 1372 IDToValue[ParamId] = PreloadVal; 1373 isl_id_free(ParamId); 1374 } 1375 1376 BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 1377 auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), 1378 AccInst->getName() + ".preload.s2a"); 1379 Alloca->insertBefore(&*EntryBB->getFirstInsertionPt()); 1380 Builder.CreateStore(PreloadVal, Alloca); 1381 ValueMapT PreloadedPointer; 1382 PreloadedPointer[PreloadVal] = AccInst; 1383 Annotator.addAlternativeAliasBases(PreloadedPointer); 1384 1385 for (auto *DerivedSAI : SAI->getDerivedSAIs()) { 1386 Value *BasePtr = DerivedSAI->getBasePtr(); 1387 1388 for (const MemoryAccess *MA : MAs) { 1389 // As the derived SAI information is quite coarse, any load from the 1390 // current SAI could be the base pointer of the derived SAI, however we 1391 // should only change the base pointer of the derived SAI if we actually 1392 // preloaded it. 1393 if (BasePtr == MA->getOriginalBaseAddr()) { 1394 assert(BasePtr->getType() == PreloadVal->getType()); 1395 DerivedSAI->setBasePtr(PreloadVal); 1396 } 1397 1398 // For scalar derived SAIs we remap the alloca used for the derived value. 1399 if (BasePtr == MA->getAccessInstruction()) 1400 ScalarMap[DerivedSAI] = Alloca; 1401 } 1402 } 1403 1404 for (const MemoryAccess *MA : MAs) { 1405 Instruction *MAAccInst = MA->getAccessInstruction(); 1406 // Use the escape system to get the correct value to users outside the SCoP. 1407 BlockGenerator::EscapeUserVectorTy EscapeUsers; 1408 for (auto *U : MAAccInst->users()) 1409 if (Instruction *UI = dyn_cast<Instruction>(U)) 1410 if (!S.contains(UI)) 1411 EscapeUsers.push_back(UI); 1412 1413 if (EscapeUsers.empty()) 1414 continue; 1415 1416 EscapeMap[MA->getAccessInstruction()] = 1417 std::make_pair(Alloca, std::move(EscapeUsers)); 1418 } 1419 1420 return true; 1421 } 1422 1423 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { 1424 for (auto &SAI : S.arrays()) { 1425 if (SAI->getBasePtr()) 1426 continue; 1427 1428 assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) && 1429 "The size of the outermost dimension is used to declare newly " 1430 "created arrays that require memory allocation."); 1431 1432 Type *NewArrayType = nullptr; 1433 1434 // Get the size of the array = size(dim_1)*...*size(dim_n) 1435 uint64_t ArraySizeInt = 1; 1436 for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { 1437 auto *DimSize = SAI->getDimensionSize(i); 1438 unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) 1439 ->getAPInt() 1440 .getLimitedValue(); 1441 1442 if (!NewArrayType) 1443 NewArrayType = SAI->getElementType(); 1444 1445 NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); 1446 ArraySizeInt *= UnsignedDimSize; 1447 } 1448 1449 if (SAI->isOnHeap()) { 1450 LLVMContext &Ctx = NewArrayType->getContext(); 1451 1452 // Get the IntPtrTy from the Datalayout 1453 auto IntPtrTy = DL.getIntPtrType(Ctx); 1454 1455 // Get the size of the element type in bits 1456 unsigned Size = SAI->getElemSizeInBytes(); 1457 1458 // Insert the malloc call at polly.start 1459 auto InstIt = std::get<0>(StartExitBlocks)->getTerminator(); 1460 auto *CreatedArray = CallInst::CreateMalloc( 1461 &*InstIt, IntPtrTy, SAI->getElementType(), 1462 ConstantInt::get(Type::getInt64Ty(Ctx), Size), 1463 ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, 1464 SAI->getName()); 1465 1466 SAI->setBasePtr(CreatedArray); 1467 1468 // Insert the free call at polly.exiting 1469 CallInst::CreateFree(CreatedArray, 1470 std::get<1>(StartExitBlocks)->getTerminator()); 1471 } else { 1472 auto InstIt = Builder.GetInsertBlock() 1473 ->getParent() 1474 ->getEntryBlock() 1475 .getTerminator(); 1476 1477 auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), 1478 SAI->getName(), &*InstIt); 1479 CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); 1480 SAI->setBasePtr(CreatedArray); 1481 } 1482 } 1483 } 1484 1485 bool IslNodeBuilder::preloadInvariantLoads() { 1486 auto &InvariantEquivClasses = S.getInvariantAccesses(); 1487 if (InvariantEquivClasses.empty()) 1488 return true; 1489 1490 BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), 1491 &*Builder.GetInsertPoint(), &DT, &LI); 1492 PreLoadBB->setName("polly.preload.begin"); 1493 Builder.SetInsertPoint(&PreLoadBB->front()); 1494 1495 for (auto &IAClass : InvariantEquivClasses) 1496 if (!preloadInvariantEquivClass(IAClass)) 1497 return false; 1498 1499 return true; 1500 } 1501 1502 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { 1503 // Materialize values for the parameters of the SCoP. 1504 materializeParameters(); 1505 1506 // materialize the outermost dimension parameters for a Fortran array. 1507 // NOTE: materializeParameters() does not work since it looks through 1508 // the SCEVs. We don't have a corresponding SCEV for the array size 1509 // parameter 1510 materializeFortranArrayOutermostDimension(); 1511 1512 // Generate values for the current loop iteration for all surrounding loops. 1513 // 1514 // We may also reference loops outside of the scop which do not contain the 1515 // scop itself, but as the number of such scops may be arbitrarily large we do 1516 // not generate code for them here, but only at the point of code generation 1517 // where these values are needed. 1518 Loop *L = LI.getLoopFor(S.getEntry()); 1519 1520 while (L != nullptr && S.contains(L)) 1521 L = L->getParentLoop(); 1522 1523 while (L != nullptr) { 1524 materializeNonScopLoopInductionVariable(L); 1525 L = L->getParentLoop(); 1526 } 1527 1528 isl_set_free(Context); 1529 } 1530 1531 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { 1532 /// We pass the insert location of our Builder, as Polly ensures during IR 1533 /// generation that there is always a valid CFG into which instructions are 1534 /// inserted. As a result, the insertpoint is known to be always followed by a 1535 /// terminator instruction. This means the insert point may be specified by a 1536 /// terminator instruction, but it can never point to an ->end() iterator 1537 /// which does not have a corresponding instruction. Hence, dereferencing 1538 /// the insertpoint to obtain an instruction is known to be save. 1539 /// 1540 /// We also do not need to update the Builder here, as new instructions are 1541 /// always inserted _before_ the given InsertLocation. As a result, the 1542 /// insert location remains valid. 1543 assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() && 1544 "Insert location points after last valid instruction"); 1545 Instruction *InsertLocation = &*Builder.GetInsertPoint(); 1546 return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), 1547 InsertLocation, &ValueMap, 1548 StartBlock->getSinglePredecessor()); 1549 } 1550 1551 /// The AST expression we generate to perform the run-time check assumes 1552 /// computations on integer types of infinite size. As we only use 64-bit 1553 /// arithmetic we check for overflows, in case of which we set the result 1554 /// of this run-time check to false to be conservatively correct, 1555 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { 1556 auto ExprBuilder = getExprBuilder(); 1557 1558 // In case the AST expression has integers larger than 64 bit, bail out. The 1559 // resulting LLVM-IR will contain operations on types that use more than 64 1560 // bits. These are -- in case wrapping intrinsics are used -- translated to 1561 // runtime library calls that are not available on all systems (e.g., Android) 1562 // and consequently will result in linker errors. 1563 if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) { 1564 isl_ast_expr_free(Condition); 1565 return Builder.getFalse(); 1566 } 1567 1568 ExprBuilder.setTrackOverflow(true); 1569 Value *RTC = ExprBuilder.create(Condition); 1570 if (!RTC->getType()->isIntegerTy(1)) 1571 RTC = Builder.CreateIsNotNull(RTC); 1572 Value *OverflowHappened = 1573 Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); 1574 1575 if (PollyGenerateRTCPrint) { 1576 auto *F = Builder.GetInsertBlock()->getParent(); 1577 RuntimeDebugBuilder::createCPUPrinter( 1578 Builder, 1579 "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + 1580 "RTC: ", 1581 RTC, " Overflow: ", OverflowHappened, 1582 "\n" 1583 " (0 failed, -1 succeeded)\n" 1584 " (if one or both are 0 falling back to original code, if both are -1 " 1585 "executing Polly code)\n"); 1586 } 1587 1588 RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); 1589 ExprBuilder.setTrackOverflow(false); 1590 1591 if (!isa<ConstantInt>(RTC)) 1592 VersionedScops++; 1593 1594 return RTC; 1595 } 1596