1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the IslNodeBuilder, a class to translate an isl AST into 10 // a LLVM-IR AST. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/IslNodeBuilder.h" 15 #include "polly/CodeGen/BlockGenerators.h" 16 #include "polly/CodeGen/CodeGeneration.h" 17 #include "polly/CodeGen/IslAst.h" 18 #include "polly/CodeGen/IslExprBuilder.h" 19 #include "polly/CodeGen/LoopGeneratorsGOMP.h" 20 #include "polly/CodeGen/LoopGeneratorsKMP.h" 21 #include "polly/CodeGen/RuntimeDebugBuilder.h" 22 #include "polly/Options.h" 23 #include "polly/ScopInfo.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/Support/SCEVValidator.h" 26 #include "polly/Support/ScopHelper.h" 27 #include "polly/Support/VirtualInstruction.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/PostOrderIterator.h" 30 #include "llvm/ADT/SetVector.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/RegionInfo.h" 35 #include "llvm/Analysis/ScalarEvolution.h" 36 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Value.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 53 #include "isl/aff.h" 54 #include "isl/aff_type.h" 55 #include "isl/ast.h" 56 #include "isl/ast_build.h" 57 #include "isl/isl-noexceptions.h" 58 #include "isl/map.h" 59 #include "isl/set.h" 60 #include "isl/union_map.h" 61 #include "isl/union_set.h" 62 #include "isl/val.h" 63 #include <algorithm> 64 #include <cassert> 65 #include <cstdint> 66 #include <cstring> 67 #include <string> 68 #include <utility> 69 #include <vector> 70 71 using namespace llvm; 72 using namespace polly; 73 74 #define DEBUG_TYPE "polly-codegen" 75 76 STATISTIC(VersionedScops, "Number of SCoPs that required versioning."); 77 78 STATISTIC(SequentialLoops, "Number of generated sequential for-loops"); 79 STATISTIC(ParallelLoops, "Number of generated parallel for-loops"); 80 STATISTIC(VectorLoops, "Number of generated vector for-loops"); 81 STATISTIC(IfConditions, "Number of generated if-conditions"); 82 83 /// OpenMP backend options 84 enum class OpenMPBackend { GNU, LLVM }; 85 86 static cl::opt<bool> PollyGenerateRTCPrint( 87 "polly-codegen-emit-rtc-print", 88 cl::desc("Emit code that prints the runtime check result dynamically."), 89 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 90 91 // If this option is set we always use the isl AST generator to regenerate 92 // memory accesses. Without this option set we regenerate expressions using the 93 // original SCEV expressions and only generate new expressions in case the 94 // access relation has been changed and consequently must be regenerated. 95 static cl::opt<bool> PollyGenerateExpressions( 96 "polly-codegen-generate-expressions", 97 cl::desc("Generate AST expressions for unmodified and modified accesses"), 98 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 99 100 static cl::opt<int> PollyTargetFirstLevelCacheLineSize( 101 "polly-target-first-level-cache-line-size", 102 cl::desc("The size of the first level cache line size specified in bytes."), 103 cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); 104 105 static cl::opt<OpenMPBackend> PollyOmpBackend( 106 "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"), 107 cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP"), 108 clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")), 109 cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory)); 110 111 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node_for For, 112 ICmpInst::Predicate &Predicate) { 113 isl::ast_expr Cond = For.cond(); 114 isl::ast_expr Iterator = For.iterator(); 115 assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op && 116 "conditional expression is not an atomic upper bound"); 117 118 isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get()); 119 120 switch (OpType) { 121 case isl_ast_op_le: 122 Predicate = ICmpInst::ICMP_SLE; 123 break; 124 case isl_ast_op_lt: 125 Predicate = ICmpInst::ICMP_SLT; 126 break; 127 default: 128 llvm_unreachable("Unexpected comparison type in loop condition"); 129 } 130 131 isl::ast_expr Arg0 = Cond.get_op_arg(0); 132 133 assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id && 134 "conditional expression is not an atomic upper bound"); 135 136 isl::id UBID = Arg0.get_id(); 137 138 assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id && 139 "Could not get the iterator"); 140 141 isl::id IteratorID = Iterator.get_id(); 142 143 assert(UBID.get() == IteratorID.get() && 144 "conditional expression is not an atomic upper bound"); 145 146 return Cond.get_op_arg(1); 147 } 148 149 int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For) { 150 assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for); 151 isl::ast_node Body = For.body(); 152 153 // First, check if we can actually handle this code. 154 switch (isl_ast_node_get_type(Body.get())) { 155 case isl_ast_node_user: 156 break; 157 case isl_ast_node_block: { 158 isl::ast_node_block BodyBlock = Body.as<isl::ast_node_block>(); 159 isl::ast_node_list List = BodyBlock.children(); 160 for (isl::ast_node Node : List) { 161 isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get()); 162 if (NodeType != isl_ast_node_user) 163 return -1; 164 } 165 break; 166 } 167 default: 168 return -1; 169 } 170 171 isl::ast_expr Init = For.init(); 172 if (!Init.isa<isl::ast_expr_int>() || !Init.val().is_zero()) 173 return -1; 174 isl::ast_expr Inc = For.inc(); 175 if (!Inc.isa<isl::ast_expr_int>() || !Inc.val().is_one()) 176 return -1; 177 CmpInst::Predicate Predicate; 178 isl::ast_expr UB = getUpperBound(For, Predicate); 179 if (!UB.isa<isl::ast_expr_int>()) 180 return -1; 181 isl::val UpVal = UB.get_val(); 182 int NumberIterations = UpVal.get_num_si(); 183 if (NumberIterations < 0) 184 return -1; 185 if (Predicate == CmpInst::ICMP_SLT) 186 return NumberIterations; 187 else 188 return NumberIterations + 1; 189 } 190 191 static void findReferencesByUse(Value *SrcVal, ScopStmt *UserStmt, 192 Loop *UserScope, const ValueMapT &GlobalMap, 193 SetVector<Value *> &Values, 194 SetVector<const SCEV *> &SCEVs) { 195 VirtualUse VUse = VirtualUse::create(UserStmt, UserScope, SrcVal, true); 196 switch (VUse.getKind()) { 197 case VirtualUse::Constant: 198 // When accelerator-offloading, GlobalValue is a host address whose content 199 // must still be transferred to the GPU. 200 if (isa<GlobalValue>(SrcVal)) 201 Values.insert(SrcVal); 202 break; 203 204 case VirtualUse::Synthesizable: 205 SCEVs.insert(VUse.getScevExpr()); 206 return; 207 208 case VirtualUse::Block: 209 case VirtualUse::ReadOnly: 210 case VirtualUse::Hoisted: 211 case VirtualUse::Intra: 212 case VirtualUse::Inter: 213 break; 214 } 215 216 if (Value *NewVal = GlobalMap.lookup(SrcVal)) 217 Values.insert(NewVal); 218 } 219 220 static void findReferencesInInst(Instruction *Inst, ScopStmt *UserStmt, 221 Loop *UserScope, const ValueMapT &GlobalMap, 222 SetVector<Value *> &Values, 223 SetVector<const SCEV *> &SCEVs) { 224 for (Use &U : Inst->operands()) 225 findReferencesByUse(U.get(), UserStmt, UserScope, GlobalMap, Values, SCEVs); 226 } 227 228 static void findReferencesInStmt(ScopStmt *Stmt, SetVector<Value *> &Values, 229 ValueMapT &GlobalMap, 230 SetVector<const SCEV *> &SCEVs) { 231 LoopInfo *LI = Stmt->getParent()->getLI(); 232 233 BasicBlock *BB = Stmt->getBasicBlock(); 234 Loop *Scope = LI->getLoopFor(BB); 235 for (Instruction *Inst : Stmt->getInstructions()) 236 findReferencesInInst(Inst, Stmt, Scope, GlobalMap, Values, SCEVs); 237 238 if (Stmt->isRegionStmt()) { 239 for (BasicBlock *BB : Stmt->getRegion()->blocks()) { 240 Loop *Scope = LI->getLoopFor(BB); 241 for (Instruction &Inst : *BB) 242 findReferencesInInst(&Inst, Stmt, Scope, GlobalMap, Values, SCEVs); 243 } 244 } 245 } 246 247 void polly::addReferencesFromStmt(ScopStmt *Stmt, void *UserPtr, 248 bool CreateScalarRefs) { 249 auto &References = *static_cast<struct SubtreeReferences *>(UserPtr); 250 251 findReferencesInStmt(Stmt, References.Values, References.GlobalMap, 252 References.SCEVs); 253 254 for (auto &Access : *Stmt) { 255 if (References.ParamSpace) { 256 isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); 257 (*References.ParamSpace) = 258 References.ParamSpace->align_params(ParamSpace); 259 } 260 261 if (Access->isLatestArrayKind()) { 262 auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr(); 263 if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) 264 if (Stmt->getParent()->contains(OpInst)) 265 continue; 266 267 References.Values.insert(BasePtr); 268 continue; 269 } 270 271 if (CreateScalarRefs) 272 References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); 273 } 274 } 275 276 /// Extract the out-of-scop values and SCEVs referenced from a set describing 277 /// a ScopStmt. 278 /// 279 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 280 /// statement and the base pointers of the memory accesses. For scalar 281 /// statements we force the generation of alloca memory locations and list 282 /// these locations in the set of out-of-scop values as well. 283 /// 284 /// @param Set A set which references the ScopStmt we are interested in. 285 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences 286 /// structure. 287 static void addReferencesFromStmtSet(isl::set Set, 288 struct SubtreeReferences *UserPtr) { 289 isl::id Id = Set.get_tuple_id(); 290 auto *Stmt = static_cast<ScopStmt *>(Id.get_user()); 291 addReferencesFromStmt(Stmt, UserPtr); 292 } 293 294 /// Extract the out-of-scop values and SCEVs referenced from a union set 295 /// referencing multiple ScopStmts. 296 /// 297 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 298 /// statement and the base pointers of the memory accesses. For scalar 299 /// statements we force the generation of alloca memory locations and list 300 /// these locations in the set of out-of-scop values as well. 301 /// 302 /// @param USet A union set referencing the ScopStmts we are interested 303 /// in. 304 /// @param References The SubtreeReferences data structure through which 305 /// results are returned and further information is 306 /// provided. 307 static void 308 addReferencesFromStmtUnionSet(isl::union_set USet, 309 struct SubtreeReferences &References) { 310 311 for (isl::set Set : USet.get_set_list()) 312 addReferencesFromStmtSet(Set, &References); 313 } 314 315 isl::union_map 316 IslNodeBuilder::getScheduleForAstNode(const isl::ast_node &Node) { 317 return IslAstInfo::getSchedule(Node); 318 } 319 320 void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node &For, 321 SetVector<Value *> &Values, 322 SetVector<const Loop *> &Loops) { 323 SetVector<const SCEV *> SCEVs; 324 struct SubtreeReferences References = { 325 LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; 326 327 for (const auto &I : IDToValue) 328 Values.insert(I.second); 329 330 // NOTE: this is populated in IslNodeBuilder::addParameters 331 for (const auto &I : OutsideLoopIterations) 332 Values.insert(cast<SCEVUnknown>(I.second)->getValue()); 333 334 isl::union_set Schedule = getScheduleForAstNode(For).domain(); 335 addReferencesFromStmtUnionSet(Schedule, References); 336 337 for (const SCEV *Expr : SCEVs) { 338 findValues(Expr, SE, Values); 339 findLoops(Expr, Loops); 340 } 341 342 Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); 343 344 /// Note: Code generation of induction variables of loops outside Scops 345 /// 346 /// Remove loops that contain the scop or that are part of the scop, as they 347 /// are considered local. This leaves only loops that are before the scop, but 348 /// do not contain the scop itself. 349 /// We ignore loops perfectly contained in the Scop because these are already 350 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops 351 /// whose induction variables are referred to by the Scop, but the Scop is not 352 /// fully contained in these Loops. Since there can be many of these, 353 /// we choose to codegen these on-demand. 354 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable. 355 Loops.remove_if([this](const Loop *L) { 356 return S.contains(L) || L->contains(S.getEntry()); 357 }); 358 359 // Contains Values that may need to be replaced with other values 360 // due to replacements from the ValueMap. We should make sure 361 // that we return correctly remapped values. 362 // NOTE: this code path is tested by: 363 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll 364 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll 365 SetVector<Value *> ReplacedValues; 366 for (Value *V : Values) { 367 ReplacedValues.insert(getLatestValue(V)); 368 } 369 Values = ReplacedValues; 370 } 371 372 void IslNodeBuilder::updateValues(ValueMapT &NewValues) { 373 SmallPtrSet<Value *, 5> Inserted; 374 375 for (const auto &I : IDToValue) { 376 IDToValue[I.first] = NewValues[I.second]; 377 Inserted.insert(I.second); 378 } 379 380 for (const auto &I : NewValues) { 381 if (Inserted.count(I.first)) 382 continue; 383 384 ValueMap[I.first] = I.second; 385 } 386 } 387 388 Value *IslNodeBuilder::getLatestValue(Value *Original) const { 389 auto It = ValueMap.find(Original); 390 if (It == ValueMap.end()) 391 return Original; 392 return It->second; 393 } 394 395 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, 396 std::vector<Value *> &IVS, 397 __isl_take isl_id *IteratorID, 398 __isl_take isl_union_map *Schedule) { 399 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 400 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 401 isl_id *Id = isl_ast_expr_get_id(StmtExpr); 402 isl_ast_expr_free(StmtExpr); 403 ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); 404 std::vector<LoopToScevMapT> VLTS(IVS.size()); 405 406 isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release()); 407 Schedule = isl_union_map_intersect_domain(Schedule, Domain); 408 isl_map *S = isl_map_from_union_map(Schedule); 409 410 auto *NewAccesses = createNewAccesses(Stmt, User); 411 createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); 412 VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); 413 isl_id_to_ast_expr_free(NewAccesses); 414 isl_map_free(S); 415 isl_id_free(Id); 416 isl_ast_node_free(User); 417 } 418 419 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { 420 auto *Id = isl_ast_node_mark_get_id(Node); 421 auto Child = isl_ast_node_mark_get_node(Node); 422 isl_ast_node_free(Node); 423 // If a child node of a 'SIMD mark' is a loop that has a single iteration, 424 // it will be optimized away and we should skip it. 425 if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && 426 isl_ast_node_get_type(Child) == isl_ast_node_for) { 427 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 428 int VectorWidth = 429 getNumberOfIterations(isl::manage_copy(Child).as<isl::ast_node_for>()); 430 if (Vector && 1 < VectorWidth && VectorWidth <= 16) 431 createForVector(Child, VectorWidth); 432 else 433 createForSequential(isl::manage(Child).as<isl::ast_node_for>(), true); 434 isl_id_free(Id); 435 return; 436 } 437 438 BandAttr *ChildLoopAttr = getLoopAttr(isl::manage_copy(Id)); 439 BandAttr *AncestorLoopAttr; 440 if (ChildLoopAttr) { 441 // Save current LoopAttr environment to restore again when leaving this 442 // subtree. This means there was no loop between the ancestor LoopAttr and 443 // this mark, i.e. the ancestor LoopAttr did not directly mark a loop. This 444 // can happen e.g. if the AST build peeled or unrolled the loop. 445 AncestorLoopAttr = Annotator.getStagingAttrEnv(); 446 447 Annotator.getStagingAttrEnv() = ChildLoopAttr; 448 } 449 450 create(Child); 451 452 if (ChildLoopAttr) { 453 assert(Annotator.getStagingAttrEnv() == ChildLoopAttr && 454 "Nest must not overwrite loop attr environment"); 455 Annotator.getStagingAttrEnv() = AncestorLoopAttr; 456 } 457 458 isl_id_free(Id); 459 } 460 461 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, 462 int VectorWidth) { 463 isl_ast_node *Body = isl_ast_node_for_get_body(For); 464 isl_ast_expr *Init = isl_ast_node_for_get_init(For); 465 isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); 466 isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); 467 isl_id *IteratorID = isl_ast_expr_get_id(Iterator); 468 469 Value *ValueLB = ExprBuilder.create(Init); 470 Value *ValueInc = ExprBuilder.create(Inc); 471 472 Type *MaxType = ExprBuilder.getType(Iterator); 473 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 474 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 475 476 if (MaxType != ValueLB->getType()) 477 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 478 if (MaxType != ValueInc->getType()) 479 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 480 481 std::vector<Value *> IVS(VectorWidth); 482 IVS[0] = ValueLB; 483 484 for (int i = 1; i < VectorWidth; i++) 485 IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); 486 487 isl::union_map Schedule = getScheduleForAstNode(isl::manage_copy(For)); 488 assert(!Schedule.is_null() && 489 "For statement annotation does not contain its schedule"); 490 491 IDToValue[IteratorID] = ValueLB; 492 493 switch (isl_ast_node_get_type(Body)) { 494 case isl_ast_node_user: 495 createUserVector(Body, IVS, isl_id_copy(IteratorID), Schedule.copy()); 496 break; 497 case isl_ast_node_block: { 498 isl_ast_node_list *List = isl_ast_node_block_get_children(Body); 499 500 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 501 createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, 502 isl_id_copy(IteratorID), Schedule.copy()); 503 504 isl_ast_node_free(Body); 505 isl_ast_node_list_free(List); 506 break; 507 } 508 default: 509 isl_ast_node_dump(Body); 510 llvm_unreachable("Unhandled isl_ast_node in vectorizer"); 511 } 512 513 IDToValue.erase(IDToValue.find(IteratorID)); 514 isl_id_free(IteratorID); 515 516 isl_ast_node_free(For); 517 isl_ast_expr_free(Iterator); 518 519 VectorLoops++; 520 } 521 522 /// Restore the initial ordering of dimensions of the band node 523 /// 524 /// In case the band node represents all the dimensions of the iteration 525 /// domain, recreate the band node to restore the initial ordering of the 526 /// dimensions. 527 /// 528 /// @param Node The band node to be modified. 529 /// @return The modified schedule node. 530 static bool IsLoopVectorizerDisabled(isl::ast_node_for Node) { 531 assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for); 532 isl::ast_node Body = Node.body(); 533 if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark) 534 return false; 535 536 isl::ast_node_mark BodyMark = Body.as<isl::ast_node_mark>(); 537 auto Id = BodyMark.id(); 538 if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0) 539 return true; 540 return false; 541 } 542 543 void IslNodeBuilder::createForSequential(isl::ast_node_for For, 544 bool MarkParallel) { 545 Value *ValueLB, *ValueUB, *ValueInc; 546 Type *MaxType; 547 BasicBlock *ExitBlock; 548 Value *IV; 549 CmpInst::Predicate Predicate; 550 551 bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For); 552 553 isl::ast_node Body = For.body(); 554 555 // isl_ast_node_for_is_degenerate(For) 556 // 557 // TODO: For degenerated loops we could generate a plain assignment. 558 // However, for now we just reuse the logic for normal loops, which will 559 // create a loop with a single iteration. 560 561 isl::ast_expr Init = For.init(); 562 isl::ast_expr Inc = For.inc(); 563 isl::ast_expr Iterator = For.iterator(); 564 isl::id IteratorID = Iterator.get_id(); 565 isl::ast_expr UB = getUpperBound(For, Predicate); 566 567 ValueLB = ExprBuilder.create(Init.release()); 568 ValueUB = ExprBuilder.create(UB.release()); 569 ValueInc = ExprBuilder.create(Inc.release()); 570 571 MaxType = ExprBuilder.getType(Iterator.get()); 572 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 573 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 574 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 575 576 if (MaxType != ValueLB->getType()) 577 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 578 if (MaxType != ValueUB->getType()) 579 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 580 if (MaxType != ValueInc->getType()) 581 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 582 583 // If we can show that LB <Predicate> UB holds at least once, we can 584 // omit the GuardBB in front of the loop. 585 bool UseGuardBB = 586 !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); 587 IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, 588 Predicate, &Annotator, MarkParallel, UseGuardBB, 589 LoopVectorizerDisabled); 590 IDToValue[IteratorID.get()] = IV; 591 592 create(Body.release()); 593 594 Annotator.popLoop(MarkParallel); 595 596 IDToValue.erase(IDToValue.find(IteratorID.get())); 597 598 Builder.SetInsertPoint(&ExitBlock->front()); 599 600 SequentialLoops++; 601 } 602 603 /// Remove the BBs contained in a (sub)function from the dominator tree. 604 /// 605 /// This function removes the basic blocks that are part of a subfunction from 606 /// the dominator tree. Specifically, when generating code it may happen that at 607 /// some point the code generation continues in a new sub-function (e.g., when 608 /// generating OpenMP code). The basic blocks that are created in this 609 /// sub-function are then still part of the dominator tree of the original 610 /// function, such that the dominator tree reaches over function boundaries. 611 /// This is not only incorrect, but also causes crashes. This function now 612 /// removes from the dominator tree all basic blocks that are dominated (and 613 /// consequently reachable) from the entry block of this (sub)function. 614 /// 615 /// FIXME: A LLVM (function or region) pass should not touch anything outside of 616 /// the function/region it runs on. Hence, the pure need for this function shows 617 /// that we do not comply to this rule. At the moment, this does not cause any 618 /// issues, but we should be aware that such issues may appear. Unfortunately 619 /// the current LLVM pass infrastructure does not allow to make Polly a module 620 /// or call-graph pass to solve this issue, as such a pass would not have access 621 /// to the per-function analyses passes needed by Polly. A future pass manager 622 /// infrastructure is supposed to enable such kind of access possibly allowing 623 /// us to create a cleaner solution here. 624 /// 625 /// FIXME: Instead of adding the dominance information and then dropping it 626 /// later on, we should try to just not add it in the first place. This requires 627 /// some careful testing to make sure this does not break in interaction with 628 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or 629 /// which may try to update it. 630 /// 631 /// @param F The function which contains the BBs to removed. 632 /// @param DT The dominator tree from which to remove the BBs. 633 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { 634 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 635 std::vector<BasicBlock *> Nodes; 636 637 // We can only remove an element from the dominator tree, if all its children 638 // have been removed. To ensure this we obtain the list of nodes to remove 639 // using a post-order tree traversal. 640 for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) 641 Nodes.push_back(I->getBlock()); 642 643 for (BasicBlock *BB : Nodes) 644 DT.eraseNode(BB); 645 } 646 647 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { 648 isl_ast_node *Body; 649 isl_ast_expr *Init, *Inc, *Iterator, *UB; 650 isl_id *IteratorID; 651 Value *ValueLB, *ValueUB, *ValueInc; 652 Type *MaxType; 653 Value *IV; 654 CmpInst::Predicate Predicate; 655 656 // The preamble of parallel code interacts different than normal code with 657 // e.g., scalar initialization. Therefore, we ensure the parallel code is 658 // separated from the last basic block. 659 BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), 660 &*Builder.GetInsertPoint(), &DT, &LI); 661 ParBB->setName("polly.parallel.for"); 662 Builder.SetInsertPoint(&ParBB->front()); 663 664 Body = isl_ast_node_for_get_body(For); 665 Init = isl_ast_node_for_get_init(For); 666 Inc = isl_ast_node_for_get_inc(For); 667 Iterator = isl_ast_node_for_get_iterator(For); 668 IteratorID = isl_ast_expr_get_id(Iterator); 669 UB = getUpperBound(isl::manage_copy(For).as<isl::ast_node_for>(), Predicate) 670 .release(); 671 672 ValueLB = ExprBuilder.create(Init); 673 ValueUB = ExprBuilder.create(UB); 674 ValueInc = ExprBuilder.create(Inc); 675 676 // OpenMP always uses SLE. In case the isl generated AST uses a SLT 677 // expression, we need to adjust the loop bound by one. 678 if (Predicate == CmpInst::ICMP_SLT) 679 ValueUB = Builder.CreateAdd( 680 ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); 681 682 MaxType = ExprBuilder.getType(Iterator); 683 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 684 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 685 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 686 687 if (MaxType != ValueLB->getType()) 688 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 689 if (MaxType != ValueUB->getType()) 690 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 691 if (MaxType != ValueInc->getType()) 692 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 693 694 BasicBlock::iterator LoopBody; 695 696 SetVector<Value *> SubtreeValues; 697 SetVector<const Loop *> Loops; 698 699 getReferencesInSubtree(isl::manage_copy(For), SubtreeValues, Loops); 700 701 // Create for all loops we depend on values that contain the current loop 702 // iteration. These values are necessary to generate code for SCEVs that 703 // depend on such loops. As a result we need to pass them to the subfunction. 704 // See [Code generation of induction variables of loops outside Scops] 705 for (const Loop *L : Loops) { 706 Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L); 707 SubtreeValues.insert(LoopInductionVar); 708 } 709 710 ValueMapT NewValues; 711 712 std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr; 713 714 switch (PollyOmpBackend) { 715 case OpenMPBackend::GNU: 716 ParallelLoopGenPtr.reset( 717 new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL)); 718 break; 719 case OpenMPBackend::LLVM: 720 ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL)); 721 break; 722 } 723 724 IV = ParallelLoopGenPtr->createParallelLoop( 725 ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody); 726 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 727 Builder.SetInsertPoint(&*LoopBody); 728 729 // Remember the parallel subfunction 730 ParallelSubfunctions.push_back(LoopBody->getFunction()); 731 732 // Save the current values. 733 auto ValueMapCopy = ValueMap; 734 IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; 735 736 updateValues(NewValues); 737 IDToValue[IteratorID] = IV; 738 739 ValueMapT NewValuesReverse; 740 741 for (auto P : NewValues) 742 NewValuesReverse[P.second] = P.first; 743 744 Annotator.addAlternativeAliasBases(NewValuesReverse); 745 746 create(Body); 747 748 Annotator.resetAlternativeAliasBases(); 749 // Restore the original values. 750 ValueMap = ValueMapCopy; 751 IDToValue = IDToValueCopy; 752 753 Builder.SetInsertPoint(&*AfterLoop); 754 removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); 755 756 for (const Loop *L : Loops) 757 OutsideLoopIterations.erase(L); 758 759 isl_ast_node_free(For); 760 isl_ast_expr_free(Iterator); 761 isl_id_free(IteratorID); 762 763 ParallelLoops++; 764 } 765 766 /// Return whether any of @p Node's statements contain partial accesses. 767 /// 768 /// Partial accesses are not supported by Polly's vector code generator. 769 static bool hasPartialAccesses(__isl_take isl_ast_node *Node) { 770 return isl_ast_node_foreach_descendant_top_down( 771 Node, 772 [](isl_ast_node *Node, void *User) -> isl_bool { 773 if (isl_ast_node_get_type(Node) != isl_ast_node_user) 774 return isl_bool_true; 775 776 isl::ast_expr Expr = 777 isl::manage(isl_ast_node_user_get_expr(Node)); 778 isl::ast_expr StmtExpr = Expr.get_op_arg(0); 779 isl::id Id = StmtExpr.get_id(); 780 781 ScopStmt *Stmt = 782 static_cast<ScopStmt *>(isl_id_get_user(Id.get())); 783 isl::set StmtDom = Stmt->getDomain(); 784 for (auto *MA : *Stmt) { 785 if (MA->isLatestPartialAccess()) 786 return isl_bool_error; 787 } 788 return isl_bool_true; 789 }, 790 nullptr) == isl_stat_error; 791 } 792 793 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { 794 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 795 796 if (Vector && IslAstInfo::isInnermostParallel(isl::manage_copy(For)) && 797 !IslAstInfo::isReductionParallel(isl::manage_copy(For))) { 798 int VectorWidth = 799 getNumberOfIterations(isl::manage_copy(For).as<isl::ast_node_for>()); 800 if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) { 801 createForVector(For, VectorWidth); 802 return; 803 } 804 } 805 806 if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For))) { 807 createForParallel(For); 808 return; 809 } 810 bool Parallel = (IslAstInfo::isParallel(isl::manage_copy(For)) && 811 !IslAstInfo::isReductionParallel(isl::manage_copy(For))); 812 createForSequential(isl::manage(For).as<isl::ast_node_for>(), Parallel); 813 } 814 815 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { 816 isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); 817 818 Function *F = Builder.GetInsertBlock()->getParent(); 819 LLVMContext &Context = F->getContext(); 820 821 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 822 &*Builder.GetInsertPoint(), &DT, &LI); 823 CondBB->setName("polly.cond"); 824 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 825 MergeBB->setName("polly.merge"); 826 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 827 BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); 828 829 DT.addNewBlock(ThenBB, CondBB); 830 DT.addNewBlock(ElseBB, CondBB); 831 DT.changeImmediateDominator(MergeBB, CondBB); 832 833 Loop *L = LI.getLoopFor(CondBB); 834 if (L) { 835 L->addBasicBlockToLoop(ThenBB, LI); 836 L->addBasicBlockToLoop(ElseBB, LI); 837 } 838 839 CondBB->getTerminator()->eraseFromParent(); 840 841 Builder.SetInsertPoint(CondBB); 842 Value *Predicate = ExprBuilder.create(Cond); 843 Builder.CreateCondBr(Predicate, ThenBB, ElseBB); 844 Builder.SetInsertPoint(ThenBB); 845 Builder.CreateBr(MergeBB); 846 Builder.SetInsertPoint(ElseBB); 847 Builder.CreateBr(MergeBB); 848 Builder.SetInsertPoint(&ThenBB->front()); 849 850 create(isl_ast_node_if_get_then(If)); 851 852 Builder.SetInsertPoint(&ElseBB->front()); 853 854 if (isl_ast_node_if_has_else(If)) 855 create(isl_ast_node_if_get_else(If)); 856 857 Builder.SetInsertPoint(&MergeBB->front()); 858 859 isl_ast_node_free(If); 860 861 IfConditions++; 862 } 863 864 __isl_give isl_id_to_ast_expr * 865 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, 866 __isl_keep isl_ast_node *Node) { 867 isl::id_to_ast_expr NewAccesses = 868 isl::id_to_ast_expr::alloc(Stmt->getParent()->getIslCtx(), 0); 869 870 isl::ast_build Build = IslAstInfo::getBuild(isl::manage_copy(Node)); 871 assert(!Build.is_null() && "Could not obtain isl_ast_build from user node"); 872 Stmt->setAstBuild(Build); 873 874 for (auto *MA : *Stmt) { 875 if (!MA->hasNewAccessRelation()) { 876 if (PollyGenerateExpressions) { 877 if (!MA->isAffine()) 878 continue; 879 if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) 880 continue; 881 882 auto *BasePtr = 883 dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); 884 if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) 885 continue; 886 } else { 887 continue; 888 } 889 } 890 assert(MA->isAffine() && 891 "Only affine memory accesses can be code generated"); 892 893 isl::union_map Schedule = Build.get_schedule(); 894 895 #ifndef NDEBUG 896 if (MA->isRead()) { 897 auto Dom = Stmt->getDomain().release(); 898 auto SchedDom = isl_set_from_union_set(Schedule.domain().release()); 899 auto AccDom = isl_map_domain(MA->getAccessRelation().release()); 900 Dom = isl_set_intersect_params(Dom, 901 Stmt->getParent()->getContext().release()); 902 SchedDom = isl_set_intersect_params( 903 SchedDom, Stmt->getParent()->getContext().release()); 904 assert(isl_set_is_subset(SchedDom, AccDom) && 905 "Access relation not defined on full schedule domain"); 906 assert(isl_set_is_subset(Dom, AccDom) && 907 "Access relation not defined on full domain"); 908 isl_set_free(AccDom); 909 isl_set_free(SchedDom); 910 isl_set_free(Dom); 911 } 912 #endif 913 914 isl::pw_multi_aff PWAccRel = MA->applyScheduleToAccessRelation(Schedule); 915 916 // isl cannot generate an index expression for access-nothing accesses. 917 isl::set AccDomain = PWAccRel.domain(); 918 isl::set Context = S.getContext(); 919 AccDomain = AccDomain.intersect_params(Context); 920 if (AccDomain.is_empty()) 921 continue; 922 923 isl::ast_expr AccessExpr = Build.access_from(PWAccRel); 924 NewAccesses = NewAccesses.set(MA->getId(), AccessExpr); 925 } 926 927 return NewAccesses.release(); 928 } 929 930 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, 931 ScopStmt *Stmt, LoopToScevMapT <S) { 932 assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op && 933 "Expression of type 'op' expected"); 934 assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call && 935 "Operation of type 'call' expected"); 936 for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { 937 isl_ast_expr *SubExpr; 938 Value *V; 939 940 SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); 941 V = ExprBuilder.create(SubExpr); 942 ScalarEvolution *SE = Stmt->getParent()->getSE(); 943 LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); 944 } 945 946 isl_ast_expr_free(Expr); 947 } 948 949 void IslNodeBuilder::createSubstitutionsVector( 950 __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, 951 std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, 952 __isl_take isl_id *IteratorID) { 953 int i = 0; 954 955 Value *OldValue = IDToValue[IteratorID]; 956 for (Value *IV : IVS) { 957 IDToValue[IteratorID] = IV; 958 createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); 959 i++; 960 } 961 962 IDToValue[IteratorID] = OldValue; 963 isl_id_free(IteratorID); 964 isl_ast_expr_free(Expr); 965 } 966 967 void IslNodeBuilder::generateCopyStmt( 968 ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 969 assert(Stmt->size() == 2); 970 auto ReadAccess = Stmt->begin(); 971 auto WriteAccess = ReadAccess++; 972 assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite()); 973 assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() && 974 "Accesses use the same data type"); 975 assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind()); 976 auto *AccessExpr = 977 isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release()); 978 auto *LoadValue = ExprBuilder.create(AccessExpr); 979 AccessExpr = 980 isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release()); 981 auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr).first; 982 Builder.CreateStore(LoadValue, StoreAddr); 983 } 984 985 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) { 986 assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() && 987 "trying to materialize loop induction variable twice"); 988 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 989 SE.getUnknown(Builder.getInt64(1)), L, 990 SCEV::FlagAnyWrap); 991 Value *V = generateSCEV(OuterLIV); 992 OutsideLoopIterations[L] = SE.getUnknown(V); 993 return V; 994 } 995 996 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { 997 LoopToScevMapT LTS; 998 isl_id *Id; 999 ScopStmt *Stmt; 1000 1001 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 1002 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 1003 Id = isl_ast_expr_get_id(StmtExpr); 1004 isl_ast_expr_free(StmtExpr); 1005 1006 LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 1007 1008 Stmt = (ScopStmt *)isl_id_get_user(Id); 1009 auto *NewAccesses = createNewAccesses(Stmt, User); 1010 if (Stmt->isCopyStmt()) { 1011 generateCopyStmt(Stmt, NewAccesses); 1012 isl_ast_expr_free(Expr); 1013 } else { 1014 createSubstitutions(Expr, Stmt, LTS); 1015 1016 if (Stmt->isBlockStmt()) 1017 BlockGen.copyStmt(*Stmt, LTS, NewAccesses); 1018 else 1019 RegionGen.copyStmt(*Stmt, LTS, NewAccesses); 1020 } 1021 1022 isl_id_to_ast_expr_free(NewAccesses); 1023 isl_ast_node_free(User); 1024 isl_id_free(Id); 1025 } 1026 1027 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { 1028 isl_ast_node_list *List = isl_ast_node_block_get_children(Block); 1029 1030 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 1031 create(isl_ast_node_list_get_ast_node(List, i)); 1032 1033 isl_ast_node_free(Block); 1034 isl_ast_node_list_free(List); 1035 } 1036 1037 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { 1038 switch (isl_ast_node_get_type(Node)) { 1039 case isl_ast_node_error: 1040 llvm_unreachable("code generation error"); 1041 case isl_ast_node_mark: 1042 createMark(Node); 1043 return; 1044 case isl_ast_node_for: 1045 createFor(Node); 1046 return; 1047 case isl_ast_node_if: 1048 createIf(Node); 1049 return; 1050 case isl_ast_node_user: 1051 createUser(Node); 1052 return; 1053 case isl_ast_node_block: 1054 createBlock(Node); 1055 return; 1056 } 1057 1058 llvm_unreachable("Unknown isl_ast_node type"); 1059 } 1060 1061 bool IslNodeBuilder::materializeValue(isl_id *Id) { 1062 // If the Id is already mapped, skip it. 1063 if (!IDToValue.count(Id)) { 1064 auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); 1065 Value *V = nullptr; 1066 1067 // Parameters could refer to invariant loads that need to be 1068 // preloaded before we can generate code for the parameter. Thus, 1069 // check if any value referred to in ParamSCEV is an invariant load 1070 // and if so make sure its equivalence class is preloaded. 1071 SetVector<Value *> Values; 1072 findValues(ParamSCEV, SE, Values); 1073 for (auto *Val : Values) { 1074 // Check if the value is an instruction in a dead block within the SCoP 1075 // and if so do not code generate it. 1076 if (auto *Inst = dyn_cast<Instruction>(Val)) { 1077 if (S.contains(Inst)) { 1078 bool IsDead = true; 1079 1080 // Check for "undef" loads first, then if there is a statement for 1081 // the parent of Inst and lastly if the parent of Inst has an empty 1082 // domain. In the first and last case the instruction is dead but if 1083 // there is a statement or the domain is not empty Inst is not dead. 1084 auto MemInst = MemAccInst::dyn_cast(Inst); 1085 auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; 1086 if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == 1087 SE.getPointerBase(SE.getSCEV(Address))) { 1088 } else if (S.getStmtFor(Inst)) { 1089 IsDead = false; 1090 } else { 1091 auto *Domain = S.getDomainConditions(Inst->getParent()).release(); 1092 IsDead = isl_set_is_empty(Domain); 1093 isl_set_free(Domain); 1094 } 1095 1096 if (IsDead) { 1097 V = UndefValue::get(ParamSCEV->getType()); 1098 break; 1099 } 1100 } 1101 } 1102 1103 if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { 1104 // Check if this invariant access class is empty, hence if we never 1105 // actually added a loads instruction to it. In that case it has no 1106 // (meaningful) users and we should not try to code generate it. 1107 if (IAClass->InvariantAccesses.empty()) 1108 V = UndefValue::get(ParamSCEV->getType()); 1109 1110 if (!preloadInvariantEquivClass(*IAClass)) { 1111 isl_id_free(Id); 1112 return false; 1113 } 1114 } 1115 } 1116 1117 V = V ? V : generateSCEV(ParamSCEV); 1118 IDToValue[Id] = V; 1119 } 1120 1121 isl_id_free(Id); 1122 return true; 1123 } 1124 1125 bool IslNodeBuilder::materializeParameters(isl_set *Set) { 1126 for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { 1127 if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) 1128 continue; 1129 isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); 1130 if (!materializeValue(Id)) 1131 return false; 1132 } 1133 return true; 1134 } 1135 1136 bool IslNodeBuilder::materializeParameters() { 1137 for (const SCEV *Param : S.parameters()) { 1138 isl_id *Id = S.getIdForParam(Param).release(); 1139 if (!materializeValue(Id)) 1140 return false; 1141 } 1142 return true; 1143 } 1144 1145 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, 1146 isl_ast_build *Build, 1147 Instruction *AccInst) { 1148 isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); 1149 isl_ast_expr *Access = 1150 isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 1151 auto *Address = isl_ast_expr_address_of(Access); 1152 auto *AddressValue = ExprBuilder.create(Address); 1153 Value *PreloadVal; 1154 1155 // Correct the type as the SAI might have a different type than the user 1156 // expects, especially if the base pointer is a struct. 1157 Type *Ty = AccInst->getType(); 1158 1159 auto *Ptr = AddressValue; 1160 auto Name = Ptr->getName(); 1161 auto AS = Ptr->getType()->getPointerAddressSpace(); 1162 Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast"); 1163 PreloadVal = Builder.CreateLoad(Ty, Ptr, Name + ".load"); 1164 if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) 1165 PreloadInst->setAlignment(cast<LoadInst>(AccInst)->getAlign()); 1166 1167 // TODO: This is only a hot fix for SCoP sequences that use the same load 1168 // instruction contained and hoisted by one of the SCoPs. 1169 if (SE.isSCEVable(Ty)) 1170 SE.forgetValue(AccInst); 1171 1172 return PreloadVal; 1173 } 1174 1175 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, 1176 isl_set *Domain) { 1177 isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release()); 1178 AccessRange = isl_set_gist_params(AccessRange, S.getContext().release()); 1179 1180 if (!materializeParameters(AccessRange)) { 1181 isl_set_free(AccessRange); 1182 isl_set_free(Domain); 1183 return nullptr; 1184 } 1185 1186 auto *Build = 1187 isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release())); 1188 isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); 1189 bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); 1190 isl_set_free(Universe); 1191 1192 Instruction *AccInst = MA.getAccessInstruction(); 1193 Type *AccInstTy = AccInst->getType(); 1194 1195 Value *PreloadVal = nullptr; 1196 if (AlwaysExecuted) { 1197 PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); 1198 isl_ast_build_free(Build); 1199 isl_set_free(Domain); 1200 return PreloadVal; 1201 } 1202 1203 if (!materializeParameters(Domain)) { 1204 isl_ast_build_free(Build); 1205 isl_set_free(AccessRange); 1206 isl_set_free(Domain); 1207 return nullptr; 1208 } 1209 1210 isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); 1211 Domain = nullptr; 1212 1213 ExprBuilder.setTrackOverflow(true); 1214 Value *Cond = ExprBuilder.create(DomainCond); 1215 Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), 1216 "polly.preload.cond.overflown"); 1217 Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); 1218 ExprBuilder.setTrackOverflow(false); 1219 1220 if (!Cond->getType()->isIntegerTy(1)) 1221 Cond = Builder.CreateIsNotNull(Cond); 1222 1223 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1224 &*Builder.GetInsertPoint(), &DT, &LI); 1225 CondBB->setName("polly.preload.cond"); 1226 1227 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 1228 MergeBB->setName("polly.preload.merge"); 1229 1230 Function *F = Builder.GetInsertBlock()->getParent(); 1231 LLVMContext &Context = F->getContext(); 1232 BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); 1233 1234 DT.addNewBlock(ExecBB, CondBB); 1235 if (Loop *L = LI.getLoopFor(CondBB)) 1236 L->addBasicBlockToLoop(ExecBB, LI); 1237 1238 auto *CondBBTerminator = CondBB->getTerminator(); 1239 Builder.SetInsertPoint(CondBBTerminator); 1240 Builder.CreateCondBr(Cond, ExecBB, MergeBB); 1241 CondBBTerminator->eraseFromParent(); 1242 1243 Builder.SetInsertPoint(ExecBB); 1244 Builder.CreateBr(MergeBB); 1245 1246 Builder.SetInsertPoint(ExecBB->getTerminator()); 1247 Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); 1248 Builder.SetInsertPoint(MergeBB->getTerminator()); 1249 auto *MergePHI = Builder.CreatePHI( 1250 AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); 1251 PreloadVal = MergePHI; 1252 1253 if (!PreAccInst) { 1254 PreloadVal = nullptr; 1255 PreAccInst = UndefValue::get(AccInstTy); 1256 } 1257 1258 MergePHI->addIncoming(PreAccInst, ExecBB); 1259 MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); 1260 1261 isl_ast_build_free(Build); 1262 return PreloadVal; 1263 } 1264 1265 bool IslNodeBuilder::preloadInvariantEquivClass( 1266 InvariantEquivClassTy &IAClass) { 1267 // For an equivalence class of invariant loads we pre-load the representing 1268 // element with the unified execution context. However, we have to map all 1269 // elements of the class to the one preloaded load as they are referenced 1270 // during the code generation and therefor need to be mapped. 1271 const MemoryAccessList &MAs = IAClass.InvariantAccesses; 1272 if (MAs.empty()) 1273 return true; 1274 1275 MemoryAccess *MA = MAs.front(); 1276 assert(MA->isArrayKind() && MA->isRead()); 1277 1278 // If the access function was already mapped, the preload of this equivalence 1279 // class was triggered earlier already and doesn't need to be done again. 1280 if (ValueMap.count(MA->getAccessInstruction())) 1281 return true; 1282 1283 // Check for recursion which can be caused by additional constraints, e.g., 1284 // non-finite loop constraints. In such a case we have to bail out and insert 1285 // a "false" runtime check that will cause the original code to be executed. 1286 auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); 1287 if (!PreloadedPtrs.insert(PtrId).second) 1288 return false; 1289 1290 // The execution context of the IAClass. 1291 isl::set &ExecutionCtx = IAClass.ExecutionContext; 1292 1293 // If the base pointer of this class is dependent on another one we have to 1294 // make sure it was preloaded already. 1295 auto *SAI = MA->getScopArrayInfo(); 1296 if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { 1297 if (!preloadInvariantEquivClass(*BaseIAClass)) 1298 return false; 1299 1300 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and 1301 // we need to refine the ExecutionCtx. 1302 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1303 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1304 } 1305 1306 // If the size of a dimension is dependent on another class, make sure it is 1307 // preloaded. 1308 for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { 1309 const SCEV *Dim = SAI->getDimensionSize(i); 1310 SetVector<Value *> Values; 1311 findValues(Dim, SE, Values); 1312 for (auto *Val : Values) { 1313 if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { 1314 if (!preloadInvariantEquivClass(*BaseIAClass)) 1315 return false; 1316 1317 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx 1318 // and we need to refine the ExecutionCtx. 1319 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1320 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1321 } 1322 } 1323 } 1324 1325 Instruction *AccInst = MA->getAccessInstruction(); 1326 Type *AccInstTy = AccInst->getType(); 1327 1328 Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy()); 1329 if (!PreloadVal) 1330 return false; 1331 1332 for (const MemoryAccess *MA : MAs) { 1333 Instruction *MAAccInst = MA->getAccessInstruction(); 1334 assert(PreloadVal->getType() == MAAccInst->getType()); 1335 ValueMap[MAAccInst] = PreloadVal; 1336 } 1337 1338 if (SE.isSCEVable(AccInstTy)) { 1339 isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release(); 1340 if (ParamId) 1341 IDToValue[ParamId] = PreloadVal; 1342 isl_id_free(ParamId); 1343 } 1344 1345 BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 1346 auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), 1347 AccInst->getName() + ".preload.s2a", 1348 &*EntryBB->getFirstInsertionPt()); 1349 Builder.CreateStore(PreloadVal, Alloca); 1350 ValueMapT PreloadedPointer; 1351 PreloadedPointer[PreloadVal] = AccInst; 1352 Annotator.addAlternativeAliasBases(PreloadedPointer); 1353 1354 for (auto *DerivedSAI : SAI->getDerivedSAIs()) { 1355 Value *BasePtr = DerivedSAI->getBasePtr(); 1356 1357 for (const MemoryAccess *MA : MAs) { 1358 // As the derived SAI information is quite coarse, any load from the 1359 // current SAI could be the base pointer of the derived SAI, however we 1360 // should only change the base pointer of the derived SAI if we actually 1361 // preloaded it. 1362 if (BasePtr == MA->getOriginalBaseAddr()) { 1363 assert(BasePtr->getType() == PreloadVal->getType()); 1364 DerivedSAI->setBasePtr(PreloadVal); 1365 } 1366 1367 // For scalar derived SAIs we remap the alloca used for the derived value. 1368 if (BasePtr == MA->getAccessInstruction()) 1369 ScalarMap[DerivedSAI] = Alloca; 1370 } 1371 } 1372 1373 for (const MemoryAccess *MA : MAs) { 1374 Instruction *MAAccInst = MA->getAccessInstruction(); 1375 // Use the escape system to get the correct value to users outside the SCoP. 1376 BlockGenerator::EscapeUserVectorTy EscapeUsers; 1377 for (auto *U : MAAccInst->users()) 1378 if (Instruction *UI = dyn_cast<Instruction>(U)) 1379 if (!S.contains(UI)) 1380 EscapeUsers.push_back(UI); 1381 1382 if (EscapeUsers.empty()) 1383 continue; 1384 1385 EscapeMap[MA->getAccessInstruction()] = 1386 std::make_pair(Alloca, std::move(EscapeUsers)); 1387 } 1388 1389 return true; 1390 } 1391 1392 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { 1393 for (auto &SAI : S.arrays()) { 1394 if (SAI->getBasePtr()) 1395 continue; 1396 1397 assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) && 1398 "The size of the outermost dimension is used to declare newly " 1399 "created arrays that require memory allocation."); 1400 1401 Type *NewArrayType = nullptr; 1402 1403 // Get the size of the array = size(dim_1)*...*size(dim_n) 1404 uint64_t ArraySizeInt = 1; 1405 for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { 1406 auto *DimSize = SAI->getDimensionSize(i); 1407 unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) 1408 ->getAPInt() 1409 .getLimitedValue(); 1410 1411 if (!NewArrayType) 1412 NewArrayType = SAI->getElementType(); 1413 1414 NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); 1415 ArraySizeInt *= UnsignedDimSize; 1416 } 1417 1418 if (SAI->isOnHeap()) { 1419 LLVMContext &Ctx = NewArrayType->getContext(); 1420 1421 // Get the IntPtrTy from the Datalayout 1422 auto IntPtrTy = DL.getIntPtrType(Ctx); 1423 1424 // Get the size of the element type in bits 1425 unsigned Size = SAI->getElemSizeInBytes(); 1426 1427 // Insert the malloc call at polly.start 1428 auto InstIt = std::get<0>(StartExitBlocks)->getTerminator(); 1429 auto *CreatedArray = CallInst::CreateMalloc( 1430 &*InstIt, IntPtrTy, SAI->getElementType(), 1431 ConstantInt::get(Type::getInt64Ty(Ctx), Size), 1432 ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, 1433 SAI->getName()); 1434 1435 SAI->setBasePtr(CreatedArray); 1436 1437 // Insert the free call at polly.exiting 1438 CallInst::CreateFree(CreatedArray, 1439 std::get<1>(StartExitBlocks)->getTerminator()); 1440 } else { 1441 auto InstIt = Builder.GetInsertBlock() 1442 ->getParent() 1443 ->getEntryBlock() 1444 .getTerminator(); 1445 1446 auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), 1447 SAI->getName(), &*InstIt); 1448 if (PollyTargetFirstLevelCacheLineSize) 1449 CreatedArray->setAlignment(Align(PollyTargetFirstLevelCacheLineSize)); 1450 SAI->setBasePtr(CreatedArray); 1451 } 1452 } 1453 } 1454 1455 bool IslNodeBuilder::preloadInvariantLoads() { 1456 auto &InvariantEquivClasses = S.getInvariantAccesses(); 1457 if (InvariantEquivClasses.empty()) 1458 return true; 1459 1460 BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), 1461 &*Builder.GetInsertPoint(), &DT, &LI); 1462 PreLoadBB->setName("polly.preload.begin"); 1463 Builder.SetInsertPoint(&PreLoadBB->front()); 1464 1465 for (auto &IAClass : InvariantEquivClasses) 1466 if (!preloadInvariantEquivClass(IAClass)) 1467 return false; 1468 1469 return true; 1470 } 1471 1472 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { 1473 // Materialize values for the parameters of the SCoP. 1474 materializeParameters(); 1475 1476 // Generate values for the current loop iteration for all surrounding loops. 1477 // 1478 // We may also reference loops outside of the scop which do not contain the 1479 // scop itself, but as the number of such scops may be arbitrarily large we do 1480 // not generate code for them here, but only at the point of code generation 1481 // where these values are needed. 1482 Loop *L = LI.getLoopFor(S.getEntry()); 1483 1484 while (L != nullptr && S.contains(L)) 1485 L = L->getParentLoop(); 1486 1487 while (L != nullptr) { 1488 materializeNonScopLoopInductionVariable(L); 1489 L = L->getParentLoop(); 1490 } 1491 1492 isl_set_free(Context); 1493 } 1494 1495 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { 1496 /// We pass the insert location of our Builder, as Polly ensures during IR 1497 /// generation that there is always a valid CFG into which instructions are 1498 /// inserted. As a result, the insertpoint is known to be always followed by a 1499 /// terminator instruction. This means the insert point may be specified by a 1500 /// terminator instruction, but it can never point to an ->end() iterator 1501 /// which does not have a corresponding instruction. Hence, dereferencing 1502 /// the insertpoint to obtain an instruction is known to be save. 1503 /// 1504 /// We also do not need to update the Builder here, as new instructions are 1505 /// always inserted _before_ the given InsertLocation. As a result, the 1506 /// insert location remains valid. 1507 assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() && 1508 "Insert location points after last valid instruction"); 1509 Instruction *InsertLocation = &*Builder.GetInsertPoint(); 1510 return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), 1511 InsertLocation, &ValueMap, 1512 StartBlock->getSinglePredecessor()); 1513 } 1514 1515 /// The AST expression we generate to perform the run-time check assumes 1516 /// computations on integer types of infinite size. As we only use 64-bit 1517 /// arithmetic we check for overflows, in case of which we set the result 1518 /// of this run-time check to false to be conservatively correct, 1519 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { 1520 auto ExprBuilder = getExprBuilder(); 1521 1522 // In case the AST expression has integers larger than 64 bit, bail out. The 1523 // resulting LLVM-IR will contain operations on types that use more than 64 1524 // bits. These are -- in case wrapping intrinsics are used -- translated to 1525 // runtime library calls that are not available on all systems (e.g., Android) 1526 // and consequently will result in linker errors. 1527 if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) { 1528 isl_ast_expr_free(Condition); 1529 return Builder.getFalse(); 1530 } 1531 1532 ExprBuilder.setTrackOverflow(true); 1533 Value *RTC = ExprBuilder.create(Condition); 1534 if (!RTC->getType()->isIntegerTy(1)) 1535 RTC = Builder.CreateIsNotNull(RTC); 1536 Value *OverflowHappened = 1537 Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); 1538 1539 if (PollyGenerateRTCPrint) { 1540 auto *F = Builder.GetInsertBlock()->getParent(); 1541 RuntimeDebugBuilder::createCPUPrinter( 1542 Builder, 1543 "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + 1544 "RTC: ", 1545 RTC, " Overflow: ", OverflowHappened, 1546 "\n" 1547 " (0 failed, -1 succeeded)\n" 1548 " (if one or both are 0 falling back to original code, if both are -1 " 1549 "executing Polly code)\n"); 1550 } 1551 1552 RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); 1553 ExprBuilder.setTrackOverflow(false); 1554 1555 if (!isa<ConstantInt>(RTC)) 1556 VersionedScops++; 1557 1558 return RTC; 1559 } 1560