1 //===------ IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the IslNodeBuilder, a class to translate an isl AST into 11 // a LLVM-IR AST. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/CodeGen/BlockGenerators.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/IslAst.h" 19 #include "polly/CodeGen/IslExprBuilder.h" 20 #include "polly/CodeGen/LoopGenerators.h" 21 #include "polly/CodeGen/RuntimeDebugBuilder.h" 22 #include "polly/CodeGen/Utils.h" 23 #include "polly/Config/config.h" 24 #include "polly/DependenceInfo.h" 25 #include "polly/LinkAllPasses.h" 26 #include "polly/Options.h" 27 #include "polly/ScopInfo.h" 28 #include "polly/Support/GICHelper.h" 29 #include "polly/Support/SCEVValidator.h" 30 #include "polly/Support/ScopHelper.h" 31 #include "llvm/ADT/PostOrderIterator.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/Verifier.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 40 #include "isl/aff.h" 41 #include "isl/ast.h" 42 #include "isl/ast_build.h" 43 #include "isl/list.h" 44 #include "isl/map.h" 45 #include "isl/set.h" 46 #include "isl/union_map.h" 47 #include "isl/union_set.h" 48 49 using namespace polly; 50 using namespace llvm; 51 52 #define DEBUG_TYPE "polly-codegen" 53 54 STATISTIC(VersionedScops, "Number of SCoPs that required versioning."); 55 56 // The maximal number of dimensions we allow during invariant load construction. 57 // More complex access ranges will result in very high compile time and are also 58 // unlikely to result in good code. This value is very high and should only 59 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). 60 static int const MaxDimensionsInAccessRange = 9; 61 62 static cl::opt<bool> PollyGenerateRTCPrint( 63 "polly-codegen-emit-rtc-print", 64 cl::desc("Emit code that prints the runtime check result dynamically."), 65 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 66 67 // If this option is set we always use the isl AST generator to regenerate 68 // memory accesses. Without this option set we regenerate expressions using the 69 // original SCEV expressions and only generate new expressions in case the 70 // access relation has been changed and consequently must be regenerated. 71 static cl::opt<bool> PollyGenerateExpressions( 72 "polly-codegen-generate-expressions", 73 cl::desc("Generate AST expressions for unmodified and modified accesses"), 74 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 75 76 static cl::opt<int> PollyTargetFirstLevelCacheLineSize( 77 "polly-target-first-level-cache-line-size", 78 cl::desc("The size of the first level cache line size specified in bytes."), 79 cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); 80 81 __isl_give isl_ast_expr * 82 IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For, 83 ICmpInst::Predicate &Predicate) { 84 isl_id *UBID, *IteratorID; 85 isl_ast_expr *Cond, *Iterator, *UB, *Arg0; 86 isl_ast_op_type Type; 87 88 Cond = isl_ast_node_for_get_cond(For); 89 Iterator = isl_ast_node_for_get_iterator(For); 90 isl_ast_expr_get_type(Cond); 91 assert(isl_ast_expr_get_type(Cond) == isl_ast_expr_op && 92 "conditional expression is not an atomic upper bound"); 93 94 Type = isl_ast_expr_get_op_type(Cond); 95 96 switch (Type) { 97 case isl_ast_op_le: 98 Predicate = ICmpInst::ICMP_SLE; 99 break; 100 case isl_ast_op_lt: 101 Predicate = ICmpInst::ICMP_SLT; 102 break; 103 default: 104 llvm_unreachable("Unexpected comparision type in loop conditon"); 105 } 106 107 Arg0 = isl_ast_expr_get_op_arg(Cond, 0); 108 109 assert(isl_ast_expr_get_type(Arg0) == isl_ast_expr_id && 110 "conditional expression is not an atomic upper bound"); 111 112 UBID = isl_ast_expr_get_id(Arg0); 113 114 assert(isl_ast_expr_get_type(Iterator) == isl_ast_expr_id && 115 "Could not get the iterator"); 116 117 IteratorID = isl_ast_expr_get_id(Iterator); 118 119 assert(UBID == IteratorID && 120 "conditional expression is not an atomic upper bound"); 121 122 UB = isl_ast_expr_get_op_arg(Cond, 1); 123 124 isl_ast_expr_free(Cond); 125 isl_ast_expr_free(Iterator); 126 isl_ast_expr_free(Arg0); 127 isl_id_free(IteratorID); 128 isl_id_free(UBID); 129 130 return UB; 131 } 132 133 /// Return true if a return value of Predicate is true for the value represented 134 /// by passed isl_ast_expr_int. 135 static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr, 136 isl_bool (*Predicate)(__isl_keep isl_val *)) { 137 if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) { 138 isl_ast_expr_free(Expr); 139 return false; 140 } 141 auto ExprVal = isl_ast_expr_get_val(Expr); 142 isl_ast_expr_free(Expr); 143 if (Predicate(ExprVal) != true) { 144 isl_val_free(ExprVal); 145 return false; 146 } 147 isl_val_free(ExprVal); 148 return true; 149 } 150 151 int IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { 152 assert(isl_ast_node_get_type(For) == isl_ast_node_for); 153 auto Body = isl_ast_node_for_get_body(For); 154 155 // First, check if we can actually handle this code. 156 switch (isl_ast_node_get_type(Body)) { 157 case isl_ast_node_user: 158 break; 159 case isl_ast_node_block: { 160 isl_ast_node_list *List = isl_ast_node_block_get_children(Body); 161 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) { 162 isl_ast_node *Node = isl_ast_node_list_get_ast_node(List, i); 163 int Type = isl_ast_node_get_type(Node); 164 isl_ast_node_free(Node); 165 if (Type != isl_ast_node_user) { 166 isl_ast_node_list_free(List); 167 isl_ast_node_free(Body); 168 return -1; 169 } 170 } 171 isl_ast_node_list_free(List); 172 break; 173 } 174 default: 175 isl_ast_node_free(Body); 176 return -1; 177 } 178 isl_ast_node_free(Body); 179 180 auto Init = isl_ast_node_for_get_init(For); 181 if (!checkIslAstExprInt(Init, isl_val_is_zero)) 182 return -1; 183 auto Inc = isl_ast_node_for_get_inc(For); 184 if (!checkIslAstExprInt(Inc, isl_val_is_one)) 185 return -1; 186 CmpInst::Predicate Predicate; 187 auto UB = getUpperBound(For, Predicate); 188 if (isl_ast_expr_get_type(UB) != isl_ast_expr_int) { 189 isl_ast_expr_free(UB); 190 return -1; 191 } 192 auto UpVal = isl_ast_expr_get_val(UB); 193 isl_ast_expr_free(UB); 194 int NumberIterations = isl_val_get_num_si(UpVal); 195 isl_val_free(UpVal); 196 if (NumberIterations < 0) 197 return -1; 198 if (Predicate == CmpInst::ICMP_SLT) 199 return NumberIterations; 200 else 201 return NumberIterations + 1; 202 } 203 204 /// Extract the values and SCEVs needed to generate code for a block. 205 static int findReferencesInBlock(struct SubtreeReferences &References, 206 const ScopStmt *Stmt, const BasicBlock *BB) { 207 for (const Instruction &Inst : *BB) 208 for (Value *SrcVal : Inst.operands()) { 209 auto *Scope = References.LI.getLoopFor(BB); 210 if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) { 211 References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope)); 212 continue; 213 } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) 214 References.Values.insert(NewVal); 215 } 216 return 0; 217 } 218 219 isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr, 220 bool CreateScalarRefs) { 221 auto &References = *static_cast<struct SubtreeReferences *>(UserPtr); 222 223 if (Stmt->isBlockStmt()) 224 findReferencesInBlock(References, Stmt, Stmt->getBasicBlock()); 225 else { 226 assert(Stmt->isRegionStmt() && 227 "Stmt was neither block nor region statement"); 228 for (const BasicBlock *BB : Stmt->getRegion()->blocks()) 229 findReferencesInBlock(References, Stmt, BB); 230 } 231 232 for (auto &Access : *Stmt) { 233 if (Access->isArrayKind()) { 234 auto *BasePtr = Access->getScopArrayInfo()->getBasePtr(); 235 if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) 236 if (Stmt->getParent()->contains(OpInst)) 237 continue; 238 239 References.Values.insert(BasePtr); 240 continue; 241 } 242 243 if (CreateScalarRefs) 244 References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); 245 } 246 247 return isl_stat_ok; 248 } 249 250 /// Extract the out-of-scop values and SCEVs referenced from a set describing 251 /// a ScopStmt. 252 /// 253 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 254 /// statement and the base pointers of the memory accesses. For scalar 255 /// statements we force the generation of alloca memory locations and list 256 /// these locations in the set of out-of-scop values as well. 257 /// 258 /// @param Set A set which references the ScopStmt we are interested in. 259 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences 260 /// structure. 261 static isl_stat addReferencesFromStmtSet(__isl_take isl_set *Set, 262 void *UserPtr) { 263 isl_id *Id = isl_set_get_tuple_id(Set); 264 auto *Stmt = static_cast<const ScopStmt *>(isl_id_get_user(Id)); 265 isl_id_free(Id); 266 isl_set_free(Set); 267 return addReferencesFromStmt(Stmt, UserPtr); 268 } 269 270 /// Extract the out-of-scop values and SCEVs referenced from a union set 271 /// referencing multiple ScopStmts. 272 /// 273 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 274 /// statement and the base pointers of the memory accesses. For scalar 275 /// statements we force the generation of alloca memory locations and list 276 /// these locations in the set of out-of-scop values as well. 277 /// 278 /// @param USet A union set referencing the ScopStmts we are interested 279 /// in. 280 /// @param References The SubtreeReferences data structure through which 281 /// results are returned and further information is 282 /// provided. 283 static void 284 addReferencesFromStmtUnionSet(isl_union_set *USet, 285 struct SubtreeReferences &References) { 286 isl_union_set_foreach_set(USet, addReferencesFromStmtSet, &References); 287 isl_union_set_free(USet); 288 } 289 290 __isl_give isl_union_map * 291 IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) { 292 return IslAstInfo::getSchedule(For); 293 } 294 295 void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, 296 SetVector<Value *> &Values, 297 SetVector<const Loop *> &Loops) { 298 299 SetVector<const SCEV *> SCEVs; 300 struct SubtreeReferences References = { 301 LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()}; 302 303 for (const auto &I : IDToValue) 304 Values.insert(I.second); 305 306 for (const auto &I : OutsideLoopIterations) 307 Values.insert(cast<SCEVUnknown>(I.second)->getValue()); 308 309 isl_union_set *Schedule = isl_union_map_domain(getScheduleForAstNode(For)); 310 addReferencesFromStmtUnionSet(Schedule, References); 311 312 for (const SCEV *Expr : SCEVs) { 313 findValues(Expr, SE, Values); 314 findLoops(Expr, Loops); 315 } 316 317 Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); 318 319 /// Remove loops that contain the scop or that are part of the scop, as they 320 /// are considered local. This leaves only loops that are before the scop, but 321 /// do not contain the scop itself. 322 Loops.remove_if([this](const Loop *L) { 323 return S.contains(L) || L->contains(S.getEntry()); 324 }); 325 } 326 327 void IslNodeBuilder::updateValues(ValueMapT &NewValues) { 328 SmallPtrSet<Value *, 5> Inserted; 329 330 for (const auto &I : IDToValue) { 331 IDToValue[I.first] = NewValues[I.second]; 332 Inserted.insert(I.second); 333 } 334 335 for (const auto &I : NewValues) { 336 if (Inserted.count(I.first)) 337 continue; 338 339 ValueMap[I.first] = I.second; 340 } 341 } 342 343 void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, 344 std::vector<Value *> &IVS, 345 __isl_take isl_id *IteratorID, 346 __isl_take isl_union_map *Schedule) { 347 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 348 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 349 isl_id *Id = isl_ast_expr_get_id(StmtExpr); 350 isl_ast_expr_free(StmtExpr); 351 ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); 352 std::vector<LoopToScevMapT> VLTS(IVS.size()); 353 354 isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain()); 355 Schedule = isl_union_map_intersect_domain(Schedule, Domain); 356 isl_map *S = isl_map_from_union_map(Schedule); 357 358 auto *NewAccesses = createNewAccesses(Stmt, User); 359 createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); 360 VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); 361 isl_id_to_ast_expr_free(NewAccesses); 362 isl_map_free(S); 363 isl_id_free(Id); 364 isl_ast_node_free(User); 365 } 366 367 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { 368 auto *Id = isl_ast_node_mark_get_id(Node); 369 auto Child = isl_ast_node_mark_get_node(Node); 370 isl_ast_node_free(Node); 371 // If a child node of a 'SIMD mark' is a loop that has a single iteration, 372 // it will be optimized away and we should skip it. 373 if (!strcmp(isl_id_get_name(Id), "SIMD") && 374 isl_ast_node_get_type(Child) == isl_ast_node_for) { 375 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 376 int VectorWidth = getNumberOfIterations(Child); 377 if (Vector && 1 < VectorWidth && VectorWidth <= 16) 378 createForVector(Child, VectorWidth); 379 else 380 createForSequential(Child, true); 381 isl_id_free(Id); 382 return; 383 } 384 if (!strcmp(isl_id_get_name(Id), "Inter iteration alias-free")) { 385 auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id)); 386 Annotator.addInterIterationAliasFreeBasePtr(BasePtr); 387 } 388 create(Child); 389 isl_id_free(Id); 390 } 391 392 void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, 393 int VectorWidth) { 394 isl_ast_node *Body = isl_ast_node_for_get_body(For); 395 isl_ast_expr *Init = isl_ast_node_for_get_init(For); 396 isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); 397 isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); 398 isl_id *IteratorID = isl_ast_expr_get_id(Iterator); 399 400 Value *ValueLB = ExprBuilder.create(Init); 401 Value *ValueInc = ExprBuilder.create(Inc); 402 403 Type *MaxType = ExprBuilder.getType(Iterator); 404 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 405 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 406 407 if (MaxType != ValueLB->getType()) 408 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 409 if (MaxType != ValueInc->getType()) 410 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 411 412 std::vector<Value *> IVS(VectorWidth); 413 IVS[0] = ValueLB; 414 415 for (int i = 1; i < VectorWidth; i++) 416 IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); 417 418 isl_union_map *Schedule = getScheduleForAstNode(For); 419 assert(Schedule && "For statement annotation does not contain its schedule"); 420 421 IDToValue[IteratorID] = ValueLB; 422 423 switch (isl_ast_node_get_type(Body)) { 424 case isl_ast_node_user: 425 createUserVector(Body, IVS, isl_id_copy(IteratorID), 426 isl_union_map_copy(Schedule)); 427 break; 428 case isl_ast_node_block: { 429 isl_ast_node_list *List = isl_ast_node_block_get_children(Body); 430 431 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 432 createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, 433 isl_id_copy(IteratorID), isl_union_map_copy(Schedule)); 434 435 isl_ast_node_free(Body); 436 isl_ast_node_list_free(List); 437 break; 438 } 439 default: 440 isl_ast_node_dump(Body); 441 llvm_unreachable("Unhandled isl_ast_node in vectorizer"); 442 } 443 444 IDToValue.erase(IDToValue.find(IteratorID)); 445 isl_id_free(IteratorID); 446 isl_union_map_free(Schedule); 447 448 isl_ast_node_free(For); 449 isl_ast_expr_free(Iterator); 450 } 451 452 void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For, 453 bool KnownParallel) { 454 isl_ast_node *Body; 455 isl_ast_expr *Init, *Inc, *Iterator, *UB; 456 isl_id *IteratorID; 457 Value *ValueLB, *ValueUB, *ValueInc; 458 Type *MaxType; 459 BasicBlock *ExitBlock; 460 Value *IV; 461 CmpInst::Predicate Predicate; 462 bool Parallel; 463 464 Parallel = KnownParallel || (IslAstInfo::isParallel(For) && 465 !IslAstInfo::isReductionParallel(For)); 466 467 Body = isl_ast_node_for_get_body(For); 468 469 // isl_ast_node_for_is_degenerate(For) 470 // 471 // TODO: For degenerated loops we could generate a plain assignment. 472 // However, for now we just reuse the logic for normal loops, which will 473 // create a loop with a single iteration. 474 475 Init = isl_ast_node_for_get_init(For); 476 Inc = isl_ast_node_for_get_inc(For); 477 Iterator = isl_ast_node_for_get_iterator(For); 478 IteratorID = isl_ast_expr_get_id(Iterator); 479 UB = getUpperBound(For, Predicate); 480 481 ValueLB = ExprBuilder.create(Init); 482 ValueUB = ExprBuilder.create(UB); 483 ValueInc = ExprBuilder.create(Inc); 484 485 MaxType = ExprBuilder.getType(Iterator); 486 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 487 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 488 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 489 490 if (MaxType != ValueLB->getType()) 491 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 492 if (MaxType != ValueUB->getType()) 493 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 494 if (MaxType != ValueInc->getType()) 495 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 496 497 // If we can show that LB <Predicate> UB holds at least once, we can 498 // omit the GuardBB in front of the loop. 499 bool UseGuardBB = 500 !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); 501 IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, 502 Predicate, &Annotator, Parallel, UseGuardBB); 503 IDToValue[IteratorID] = IV; 504 505 create(Body); 506 507 Annotator.popLoop(Parallel); 508 509 IDToValue.erase(IDToValue.find(IteratorID)); 510 511 Builder.SetInsertPoint(&ExitBlock->front()); 512 513 isl_ast_node_free(For); 514 isl_ast_expr_free(Iterator); 515 isl_id_free(IteratorID); 516 } 517 518 /// Remove the BBs contained in a (sub)function from the dominator tree. 519 /// 520 /// This function removes the basic blocks that are part of a subfunction from 521 /// the dominator tree. Specifically, when generating code it may happen that at 522 /// some point the code generation continues in a new sub-function (e.g., when 523 /// generating OpenMP code). The basic blocks that are created in this 524 /// sub-function are then still part of the dominator tree of the original 525 /// function, such that the dominator tree reaches over function boundaries. 526 /// This is not only incorrect, but also causes crashes. This function now 527 /// removes from the dominator tree all basic blocks that are dominated (and 528 /// consequently reachable) from the entry block of this (sub)function. 529 /// 530 /// FIXME: A LLVM (function or region) pass should not touch anything outside of 531 /// the function/region it runs on. Hence, the pure need for this function shows 532 /// that we do not comply to this rule. At the moment, this does not cause any 533 /// issues, but we should be aware that such issues may appear. Unfortunately 534 /// the current LLVM pass infrastructure does not allow to make Polly a module 535 /// or call-graph pass to solve this issue, as such a pass would not have access 536 /// to the per-function analyses passes needed by Polly. A future pass manager 537 /// infrastructure is supposed to enable such kind of access possibly allowing 538 /// us to create a cleaner solution here. 539 /// 540 /// FIXME: Instead of adding the dominance information and then dropping it 541 /// later on, we should try to just not add it in the first place. This requires 542 /// some careful testing to make sure this does not break in interaction with 543 /// the SCEVBuilder and SplitBlock which may rely on the dominator tree or 544 /// which may try to update it. 545 /// 546 /// @param F The function which contains the BBs to removed. 547 /// @param DT The dominator tree from which to remove the BBs. 548 static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { 549 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 550 std::vector<BasicBlock *> Nodes; 551 552 // We can only remove an element from the dominator tree, if all its children 553 // have been removed. To ensure this we obtain the list of nodes to remove 554 // using a post-order tree traversal. 555 for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) 556 Nodes.push_back(I->getBlock()); 557 558 for (BasicBlock *BB : Nodes) 559 DT.eraseNode(BB); 560 } 561 562 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { 563 isl_ast_node *Body; 564 isl_ast_expr *Init, *Inc, *Iterator, *UB; 565 isl_id *IteratorID; 566 Value *ValueLB, *ValueUB, *ValueInc; 567 Type *MaxType; 568 Value *IV; 569 CmpInst::Predicate Predicate; 570 571 // The preamble of parallel code interacts different than normal code with 572 // e.g., scalar initialization. Therefore, we ensure the parallel code is 573 // separated from the last basic block. 574 BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), 575 &*Builder.GetInsertPoint(), &DT, &LI); 576 ParBB->setName("polly.parallel.for"); 577 Builder.SetInsertPoint(&ParBB->front()); 578 579 Body = isl_ast_node_for_get_body(For); 580 Init = isl_ast_node_for_get_init(For); 581 Inc = isl_ast_node_for_get_inc(For); 582 Iterator = isl_ast_node_for_get_iterator(For); 583 IteratorID = isl_ast_expr_get_id(Iterator); 584 UB = getUpperBound(For, Predicate); 585 586 ValueLB = ExprBuilder.create(Init); 587 ValueUB = ExprBuilder.create(UB); 588 ValueInc = ExprBuilder.create(Inc); 589 590 // OpenMP always uses SLE. In case the isl generated AST uses a SLT 591 // expression, we need to adjust the loop blound by one. 592 if (Predicate == CmpInst::ICMP_SLT) 593 ValueUB = Builder.CreateAdd( 594 ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); 595 596 MaxType = ExprBuilder.getType(Iterator); 597 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 598 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 599 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 600 601 if (MaxType != ValueLB->getType()) 602 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 603 if (MaxType != ValueUB->getType()) 604 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 605 if (MaxType != ValueInc->getType()) 606 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 607 608 BasicBlock::iterator LoopBody; 609 610 SetVector<Value *> SubtreeValues; 611 SetVector<const Loop *> Loops; 612 613 getReferencesInSubtree(For, SubtreeValues, Loops); 614 615 // Create for all loops we depend on values that contain the current loop 616 // iteration. These values are necessary to generate code for SCEVs that 617 // depend on such loops. As a result we need to pass them to the subfunction. 618 for (const Loop *L : Loops) { 619 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 620 SE.getUnknown(Builder.getInt64(1)), 621 L, SCEV::FlagAnyWrap); 622 Value *V = generateSCEV(OuterLIV); 623 OutsideLoopIterations[L] = SE.getUnknown(V); 624 SubtreeValues.insert(V); 625 } 626 627 ValueMapT NewValues; 628 ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL); 629 630 IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, 631 SubtreeValues, NewValues, &LoopBody); 632 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 633 Builder.SetInsertPoint(&*LoopBody); 634 635 // Remember the parallel subfunction 636 ParallelSubfunctions.push_back(LoopBody->getFunction()); 637 638 // Save the current values. 639 auto ValueMapCopy = ValueMap; 640 IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; 641 642 updateValues(NewValues); 643 IDToValue[IteratorID] = IV; 644 645 ValueMapT NewValuesReverse; 646 647 for (auto P : NewValues) 648 NewValuesReverse[P.second] = P.first; 649 650 Annotator.addAlternativeAliasBases(NewValuesReverse); 651 652 create(Body); 653 654 Annotator.resetAlternativeAliasBases(); 655 // Restore the original values. 656 ValueMap = ValueMapCopy; 657 IDToValue = IDToValueCopy; 658 659 Builder.SetInsertPoint(&*AfterLoop); 660 removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); 661 662 for (const Loop *L : Loops) 663 OutsideLoopIterations.erase(L); 664 665 isl_ast_node_free(For); 666 isl_ast_expr_free(Iterator); 667 isl_id_free(IteratorID); 668 } 669 670 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { 671 bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; 672 673 if (Vector && IslAstInfo::isInnermostParallel(For) && 674 !IslAstInfo::isReductionParallel(For)) { 675 int VectorWidth = getNumberOfIterations(For); 676 if (1 < VectorWidth && VectorWidth <= 16) { 677 createForVector(For, VectorWidth); 678 return; 679 } 680 } 681 682 if (IslAstInfo::isExecutedInParallel(For)) { 683 createForParallel(For); 684 return; 685 } 686 createForSequential(For, false); 687 } 688 689 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { 690 isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); 691 692 Function *F = Builder.GetInsertBlock()->getParent(); 693 LLVMContext &Context = F->getContext(); 694 695 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 696 &*Builder.GetInsertPoint(), &DT, &LI); 697 CondBB->setName("polly.cond"); 698 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 699 MergeBB->setName("polly.merge"); 700 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 701 BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); 702 703 DT.addNewBlock(ThenBB, CondBB); 704 DT.addNewBlock(ElseBB, CondBB); 705 DT.changeImmediateDominator(MergeBB, CondBB); 706 707 Loop *L = LI.getLoopFor(CondBB); 708 if (L) { 709 L->addBasicBlockToLoop(ThenBB, LI); 710 L->addBasicBlockToLoop(ElseBB, LI); 711 } 712 713 CondBB->getTerminator()->eraseFromParent(); 714 715 Builder.SetInsertPoint(CondBB); 716 Value *Predicate = ExprBuilder.create(Cond); 717 Builder.CreateCondBr(Predicate, ThenBB, ElseBB); 718 Builder.SetInsertPoint(ThenBB); 719 Builder.CreateBr(MergeBB); 720 Builder.SetInsertPoint(ElseBB); 721 Builder.CreateBr(MergeBB); 722 Builder.SetInsertPoint(&ThenBB->front()); 723 724 create(isl_ast_node_if_get_then(If)); 725 726 Builder.SetInsertPoint(&ElseBB->front()); 727 728 if (isl_ast_node_if_has_else(If)) 729 create(isl_ast_node_if_get_else(If)); 730 731 Builder.SetInsertPoint(&MergeBB->front()); 732 733 isl_ast_node_free(If); 734 } 735 736 __isl_give isl_id_to_ast_expr * 737 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, 738 __isl_keep isl_ast_node *Node) { 739 isl_id_to_ast_expr *NewAccesses = 740 isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx(), 0); 741 742 auto *Build = IslAstInfo::getBuild(Node); 743 assert(Build && "Could not obtain isl_ast_build from user node"); 744 Stmt->setAstBuild(Build); 745 746 for (auto *MA : *Stmt) { 747 if (!MA->hasNewAccessRelation()) { 748 if (PollyGenerateExpressions) { 749 if (!MA->isAffine()) 750 continue; 751 if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) 752 continue; 753 754 auto *BasePtr = 755 dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); 756 if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) 757 continue; 758 } else { 759 continue; 760 } 761 } 762 assert(MA->isAffine() && 763 "Only affine memory accesses can be code generated"); 764 765 auto Schedule = isl_ast_build_get_schedule(Build); 766 767 #ifndef NDEBUG 768 auto Dom = Stmt->getDomain(); 769 auto SchedDom = isl_set_from_union_set( 770 isl_union_map_domain(isl_union_map_copy(Schedule))); 771 auto AccDom = isl_map_domain(MA->getAccessRelation()); 772 Dom = isl_set_intersect_params(Dom, Stmt->getParent()->getContext()); 773 SchedDom = 774 isl_set_intersect_params(SchedDom, Stmt->getParent()->getContext()); 775 assert(isl_set_is_subset(SchedDom, AccDom) && 776 "Access relation not defined on full schedule domain"); 777 assert(isl_set_is_subset(Dom, AccDom) && 778 "Access relation not defined on full domain"); 779 isl_set_free(AccDom); 780 isl_set_free(SchedDom); 781 isl_set_free(Dom); 782 #endif 783 784 auto PWAccRel = MA->applyScheduleToAccessRelation(Schedule); 785 786 auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 787 NewAccesses = isl_id_to_ast_expr_set(NewAccesses, MA->getId(), AccessExpr); 788 } 789 790 return NewAccesses; 791 } 792 793 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, 794 ScopStmt *Stmt, LoopToScevMapT <S) { 795 assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op && 796 "Expression of type 'op' expected"); 797 assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call && 798 "Opertation of type 'call' expected"); 799 for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { 800 isl_ast_expr *SubExpr; 801 Value *V; 802 803 SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); 804 V = ExprBuilder.create(SubExpr); 805 ScalarEvolution *SE = Stmt->getParent()->getSE(); 806 LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); 807 } 808 809 isl_ast_expr_free(Expr); 810 } 811 812 void IslNodeBuilder::createSubstitutionsVector( 813 __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, 814 std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, 815 __isl_take isl_id *IteratorID) { 816 int i = 0; 817 818 Value *OldValue = IDToValue[IteratorID]; 819 for (Value *IV : IVS) { 820 IDToValue[IteratorID] = IV; 821 createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); 822 i++; 823 } 824 825 IDToValue[IteratorID] = OldValue; 826 isl_id_free(IteratorID); 827 isl_ast_expr_free(Expr); 828 } 829 830 void IslNodeBuilder::generateCopyStmt( 831 ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 832 assert(Stmt->size() == 2); 833 auto ReadAccess = Stmt->begin(); 834 auto WriteAccess = ReadAccess++; 835 assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite()); 836 assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() && 837 "Accesses use the same data type"); 838 assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind()); 839 auto *AccessExpr = 840 isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId()); 841 auto *LoadValue = ExprBuilder.create(AccessExpr); 842 AccessExpr = isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId()); 843 auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr); 844 Builder.CreateStore(LoadValue, StoreAddr); 845 } 846 847 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { 848 LoopToScevMapT LTS; 849 isl_id *Id; 850 ScopStmt *Stmt; 851 852 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 853 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 854 Id = isl_ast_expr_get_id(StmtExpr); 855 isl_ast_expr_free(StmtExpr); 856 857 LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 858 859 Stmt = (ScopStmt *)isl_id_get_user(Id); 860 auto *NewAccesses = createNewAccesses(Stmt, User); 861 if (Stmt->isCopyStmt()) { 862 generateCopyStmt(Stmt, NewAccesses); 863 isl_ast_expr_free(Expr); 864 } else { 865 createSubstitutions(Expr, Stmt, LTS); 866 867 if (Stmt->isBlockStmt()) 868 BlockGen.copyStmt(*Stmt, LTS, NewAccesses); 869 else 870 RegionGen.copyStmt(*Stmt, LTS, NewAccesses); 871 } 872 873 isl_id_to_ast_expr_free(NewAccesses); 874 isl_ast_node_free(User); 875 isl_id_free(Id); 876 } 877 878 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { 879 isl_ast_node_list *List = isl_ast_node_block_get_children(Block); 880 881 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 882 create(isl_ast_node_list_get_ast_node(List, i)); 883 884 isl_ast_node_free(Block); 885 isl_ast_node_list_free(List); 886 } 887 888 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { 889 switch (isl_ast_node_get_type(Node)) { 890 case isl_ast_node_error: 891 llvm_unreachable("code generation error"); 892 case isl_ast_node_mark: 893 createMark(Node); 894 return; 895 case isl_ast_node_for: 896 createFor(Node); 897 return; 898 case isl_ast_node_if: 899 createIf(Node); 900 return; 901 case isl_ast_node_user: 902 createUser(Node); 903 return; 904 case isl_ast_node_block: 905 createBlock(Node); 906 return; 907 } 908 909 llvm_unreachable("Unknown isl_ast_node type"); 910 } 911 912 bool IslNodeBuilder::materializeValue(isl_id *Id) { 913 // If the Id is already mapped, skip it. 914 if (!IDToValue.count(Id)) { 915 auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); 916 Value *V = nullptr; 917 918 // Parameters could refere to invariant loads that need to be 919 // preloaded before we can generate code for the parameter. Thus, 920 // check if any value refered to in ParamSCEV is an invariant load 921 // and if so make sure its equivalence class is preloaded. 922 SetVector<Value *> Values; 923 findValues(ParamSCEV, SE, Values); 924 for (auto *Val : Values) { 925 926 // Check if the value is an instruction in a dead block within the SCoP 927 // and if so do not code generate it. 928 if (auto *Inst = dyn_cast<Instruction>(Val)) { 929 if (S.contains(Inst)) { 930 bool IsDead = true; 931 932 // Check for "undef" loads first, then if there is a statement for 933 // the parent of Inst and lastly if the parent of Inst has an empty 934 // domain. In the first and last case the instruction is dead but if 935 // there is a statement or the domain is not empty Inst is not dead. 936 auto MemInst = MemAccInst::dyn_cast(Inst); 937 auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; 938 if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == 939 SE.getPointerBase(SE.getSCEV(Address))) { 940 } else if (S.getStmtFor(Inst)) { 941 IsDead = false; 942 } else { 943 auto *Domain = S.getDomainConditions(Inst->getParent()); 944 IsDead = isl_set_is_empty(Domain); 945 isl_set_free(Domain); 946 } 947 948 if (IsDead) { 949 V = UndefValue::get(ParamSCEV->getType()); 950 break; 951 } 952 } 953 } 954 955 if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { 956 957 // Check if this invariant access class is empty, hence if we never 958 // actually added a loads instruction to it. In that case it has no 959 // (meaningful) users and we should not try to code generate it. 960 if (IAClass->InvariantAccesses.empty()) 961 V = UndefValue::get(ParamSCEV->getType()); 962 963 if (!preloadInvariantEquivClass(*IAClass)) { 964 isl_id_free(Id); 965 return false; 966 } 967 } 968 } 969 970 V = V ? V : generateSCEV(ParamSCEV); 971 IDToValue[Id] = V; 972 } 973 974 isl_id_free(Id); 975 return true; 976 } 977 978 bool IslNodeBuilder::materializeParameters(isl_set *Set) { 979 for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { 980 if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) 981 continue; 982 isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); 983 if (!materializeValue(Id)) 984 return false; 985 } 986 return true; 987 } 988 989 bool IslNodeBuilder::materializeParameters() { 990 for (const SCEV *Param : S.parameters()) { 991 isl_id *Id = S.getIdForParam(Param); 992 if (!materializeValue(Id)) 993 return false; 994 } 995 return true; 996 } 997 998 /// Add the number of dimensions in @p BS to @p U. 999 static isl_stat countTotalDims(__isl_take isl_basic_set *BS, void *U) { 1000 unsigned *NumTotalDim = static_cast<unsigned *>(U); 1001 *NumTotalDim += isl_basic_set_total_dim(BS); 1002 isl_basic_set_free(BS); 1003 return isl_stat_ok; 1004 } 1005 1006 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, 1007 isl_ast_build *Build, 1008 Instruction *AccInst) { 1009 1010 // TODO: This check could be performed in the ScopInfo already. 1011 unsigned NumTotalDim = 0; 1012 isl_set_foreach_basic_set(AccessRange, countTotalDims, &NumTotalDim); 1013 if (NumTotalDim > MaxDimensionsInAccessRange) { 1014 isl_set_free(AccessRange); 1015 return nullptr; 1016 } 1017 1018 isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); 1019 isl_ast_expr *Access = 1020 isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 1021 auto *Address = isl_ast_expr_address_of(Access); 1022 auto *AddressValue = ExprBuilder.create(Address); 1023 Value *PreloadVal; 1024 1025 // Correct the type as the SAI might have a different type than the user 1026 // expects, especially if the base pointer is a struct. 1027 Type *Ty = AccInst->getType(); 1028 1029 auto *Ptr = AddressValue; 1030 auto Name = Ptr->getName(); 1031 Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(), Name + ".cast"); 1032 PreloadVal = Builder.CreateLoad(Ptr, Name + ".load"); 1033 if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) 1034 PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment()); 1035 1036 // TODO: This is only a hot fix for SCoP sequences that use the same load 1037 // instruction contained and hoisted by one of the SCoPs. 1038 if (SE.isSCEVable(Ty)) 1039 SE.forgetValue(AccInst); 1040 1041 return PreloadVal; 1042 } 1043 1044 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, 1045 isl_set *Domain) { 1046 1047 isl_set *AccessRange = isl_map_range(MA.getAddressFunction()); 1048 AccessRange = isl_set_gist_params(AccessRange, S.getContext()); 1049 1050 if (!materializeParameters(AccessRange)) { 1051 isl_set_free(AccessRange); 1052 isl_set_free(Domain); 1053 return nullptr; 1054 } 1055 1056 auto *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); 1057 isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); 1058 bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); 1059 isl_set_free(Universe); 1060 1061 Instruction *AccInst = MA.getAccessInstruction(); 1062 Type *AccInstTy = AccInst->getType(); 1063 1064 Value *PreloadVal = nullptr; 1065 if (AlwaysExecuted) { 1066 PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); 1067 isl_ast_build_free(Build); 1068 isl_set_free(Domain); 1069 return PreloadVal; 1070 } 1071 1072 if (!materializeParameters(Domain)) { 1073 isl_ast_build_free(Build); 1074 isl_set_free(AccessRange); 1075 isl_set_free(Domain); 1076 return nullptr; 1077 } 1078 1079 isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); 1080 Domain = nullptr; 1081 1082 ExprBuilder.setTrackOverflow(true); 1083 Value *Cond = ExprBuilder.create(DomainCond); 1084 Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), 1085 "polly.preload.cond.overflown"); 1086 Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); 1087 ExprBuilder.setTrackOverflow(false); 1088 1089 if (!Cond->getType()->isIntegerTy(1)) 1090 Cond = Builder.CreateIsNotNull(Cond); 1091 1092 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1093 &*Builder.GetInsertPoint(), &DT, &LI); 1094 CondBB->setName("polly.preload.cond"); 1095 1096 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI); 1097 MergeBB->setName("polly.preload.merge"); 1098 1099 Function *F = Builder.GetInsertBlock()->getParent(); 1100 LLVMContext &Context = F->getContext(); 1101 BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); 1102 1103 DT.addNewBlock(ExecBB, CondBB); 1104 if (Loop *L = LI.getLoopFor(CondBB)) 1105 L->addBasicBlockToLoop(ExecBB, LI); 1106 1107 auto *CondBBTerminator = CondBB->getTerminator(); 1108 Builder.SetInsertPoint(CondBBTerminator); 1109 Builder.CreateCondBr(Cond, ExecBB, MergeBB); 1110 CondBBTerminator->eraseFromParent(); 1111 1112 Builder.SetInsertPoint(ExecBB); 1113 Builder.CreateBr(MergeBB); 1114 1115 Builder.SetInsertPoint(ExecBB->getTerminator()); 1116 Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); 1117 Builder.SetInsertPoint(MergeBB->getTerminator()); 1118 auto *MergePHI = Builder.CreatePHI( 1119 AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); 1120 PreloadVal = MergePHI; 1121 1122 if (!PreAccInst) { 1123 PreloadVal = nullptr; 1124 PreAccInst = UndefValue::get(AccInstTy); 1125 } 1126 1127 MergePHI->addIncoming(PreAccInst, ExecBB); 1128 MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); 1129 1130 isl_ast_build_free(Build); 1131 return PreloadVal; 1132 } 1133 1134 bool IslNodeBuilder::preloadInvariantEquivClass( 1135 InvariantEquivClassTy &IAClass) { 1136 // For an equivalence class of invariant loads we pre-load the representing 1137 // element with the unified execution context. However, we have to map all 1138 // elements of the class to the one preloaded load as they are referenced 1139 // during the code generation and therefor need to be mapped. 1140 const MemoryAccessList &MAs = IAClass.InvariantAccesses; 1141 if (MAs.empty()) 1142 return true; 1143 1144 MemoryAccess *MA = MAs.front(); 1145 assert(MA->isArrayKind() && MA->isRead()); 1146 1147 // If the access function was already mapped, the preload of this equivalence 1148 // class was triggered earlier already and doesn't need to be done again. 1149 if (ValueMap.count(MA->getAccessInstruction())) 1150 return true; 1151 1152 // Check for recursion which can be caused by additional constraints, e.g., 1153 // non-finite loop constraints. In such a case we have to bail out and insert 1154 // a "false" runtime check that will cause the original code to be executed. 1155 auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); 1156 if (!PreloadedPtrs.insert(PtrId).second) 1157 return false; 1158 1159 // The execution context of the IAClass. 1160 isl_set *&ExecutionCtx = IAClass.ExecutionContext; 1161 1162 // If the base pointer of this class is dependent on another one we have to 1163 // make sure it was preloaded already. 1164 auto *SAI = MA->getScopArrayInfo(); 1165 if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { 1166 if (!preloadInvariantEquivClass(*BaseIAClass)) 1167 return false; 1168 1169 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and 1170 // we need to refine the ExecutionCtx. 1171 isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext); 1172 ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx); 1173 } 1174 1175 // If the size of a dimension is dependent on another class, make sure it is 1176 // preloaded. 1177 for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { 1178 const SCEV *Dim = SAI->getDimensionSize(i); 1179 SetVector<Value *> Values; 1180 findValues(Dim, SE, Values); 1181 for (auto *Val : Values) { 1182 if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { 1183 if (!preloadInvariantEquivClass(*BaseIAClass)) 1184 return false; 1185 1186 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx 1187 // and we need to refine the ExecutionCtx. 1188 isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext); 1189 ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx); 1190 } 1191 } 1192 } 1193 1194 Instruction *AccInst = MA->getAccessInstruction(); 1195 Type *AccInstTy = AccInst->getType(); 1196 1197 Value *PreloadVal = preloadInvariantLoad(*MA, isl_set_copy(ExecutionCtx)); 1198 if (!PreloadVal) 1199 return false; 1200 1201 for (const MemoryAccess *MA : MAs) { 1202 Instruction *MAAccInst = MA->getAccessInstruction(); 1203 assert(PreloadVal->getType() == MAAccInst->getType()); 1204 ValueMap[MAAccInst] = PreloadVal; 1205 } 1206 1207 if (SE.isSCEVable(AccInstTy)) { 1208 isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); 1209 if (ParamId) 1210 IDToValue[ParamId] = PreloadVal; 1211 isl_id_free(ParamId); 1212 } 1213 1214 BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 1215 auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), 1216 AccInst->getName() + ".preload.s2a"); 1217 Alloca->insertBefore(&*EntryBB->getFirstInsertionPt()); 1218 Builder.CreateStore(PreloadVal, Alloca); 1219 ValueMapT PreloadedPointer; 1220 PreloadedPointer[PreloadVal] = AccInst; 1221 Annotator.addAlternativeAliasBases(PreloadedPointer); 1222 1223 for (auto *DerivedSAI : SAI->getDerivedSAIs()) { 1224 Value *BasePtr = DerivedSAI->getBasePtr(); 1225 1226 for (const MemoryAccess *MA : MAs) { 1227 // As the derived SAI information is quite coarse, any load from the 1228 // current SAI could be the base pointer of the derived SAI, however we 1229 // should only change the base pointer of the derived SAI if we actually 1230 // preloaded it. 1231 if (BasePtr == MA->getBaseAddr()) { 1232 assert(BasePtr->getType() == PreloadVal->getType()); 1233 DerivedSAI->setBasePtr(PreloadVal); 1234 } 1235 1236 // For scalar derived SAIs we remap the alloca used for the derived value. 1237 if (BasePtr == MA->getAccessInstruction()) 1238 ScalarMap[DerivedSAI] = Alloca; 1239 } 1240 } 1241 1242 for (const MemoryAccess *MA : MAs) { 1243 1244 Instruction *MAAccInst = MA->getAccessInstruction(); 1245 // Use the escape system to get the correct value to users outside the SCoP. 1246 BlockGenerator::EscapeUserVectorTy EscapeUsers; 1247 for (auto *U : MAAccInst->users()) 1248 if (Instruction *UI = dyn_cast<Instruction>(U)) 1249 if (!S.contains(UI)) 1250 EscapeUsers.push_back(UI); 1251 1252 if (EscapeUsers.empty()) 1253 continue; 1254 1255 EscapeMap[MA->getAccessInstruction()] = 1256 std::make_pair(Alloca, std::move(EscapeUsers)); 1257 } 1258 1259 return true; 1260 } 1261 1262 void IslNodeBuilder::allocateNewArrays() { 1263 for (auto &SAI : S.arrays()) { 1264 if (SAI->getBasePtr()) 1265 continue; 1266 1267 assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) && 1268 "The size of the outermost dimension is used to declare newly " 1269 "created arrays that require memory allocation."); 1270 1271 Type *NewArrayType = nullptr; 1272 for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { 1273 auto *DimSize = SAI->getDimensionSize(i); 1274 unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) 1275 ->getAPInt() 1276 .getLimitedValue(); 1277 1278 if (!NewArrayType) 1279 NewArrayType = SAI->getElementType(); 1280 1281 NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); 1282 } 1283 1284 auto InstIt = 1285 Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator(); 1286 auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), 1287 SAI->getName(), &*InstIt); 1288 CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); 1289 SAI->setBasePtr(CreatedArray); 1290 } 1291 } 1292 1293 bool IslNodeBuilder::preloadInvariantLoads() { 1294 1295 auto &InvariantEquivClasses = S.getInvariantAccesses(); 1296 if (InvariantEquivClasses.empty()) 1297 return true; 1298 1299 BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), 1300 &*Builder.GetInsertPoint(), &DT, &LI); 1301 PreLoadBB->setName("polly.preload.begin"); 1302 Builder.SetInsertPoint(&PreLoadBB->front()); 1303 1304 for (auto &IAClass : InvariantEquivClasses) 1305 if (!preloadInvariantEquivClass(IAClass)) 1306 return false; 1307 1308 return true; 1309 } 1310 1311 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { 1312 // Materialize values for the parameters of the SCoP. 1313 materializeParameters(); 1314 1315 // Generate values for the current loop iteration for all surrounding loops. 1316 // 1317 // We may also reference loops outside of the scop which do not contain the 1318 // scop itself, but as the number of such scops may be arbitrarily large we do 1319 // not generate code for them here, but only at the point of code generation 1320 // where these values are needed. 1321 Loop *L = LI.getLoopFor(S.getEntry()); 1322 1323 while (L != nullptr && S.contains(L)) 1324 L = L->getParentLoop(); 1325 1326 while (L != nullptr) { 1327 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 1328 SE.getUnknown(Builder.getInt64(1)), 1329 L, SCEV::FlagAnyWrap); 1330 Value *V = generateSCEV(OuterLIV); 1331 OutsideLoopIterations[L] = SE.getUnknown(V); 1332 L = L->getParentLoop(); 1333 } 1334 1335 isl_set_free(Context); 1336 } 1337 1338 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { 1339 /// We pass the insert location of our Builder, as Polly ensures during IR 1340 /// generation that there is always a valid CFG into which instructions are 1341 /// inserted. As a result, the insertpoint is known to be always followed by a 1342 /// terminator instruction. This means the insert point may be specified by a 1343 /// terminator instruction, but it can never point to an ->end() iterator 1344 /// which does not have a corresponding instruction. Hence, dereferencing 1345 /// the insertpoint to obtain an instruction is known to be save. 1346 /// 1347 /// We also do not need to update the Builder here, as new instructions are 1348 /// always inserted _before_ the given InsertLocation. As a result, the 1349 /// insert location remains valid. 1350 assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() && 1351 "Insert location points after last valid instruction"); 1352 Instruction *InsertLocation = &*Builder.GetInsertPoint(); 1353 return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), 1354 InsertLocation, &ValueMap, 1355 StartBlock->getSinglePredecessor()); 1356 } 1357 1358 /// The AST expression we generate to perform the run-time check assumes 1359 /// computations on integer types of infinite size. As we only use 64-bit 1360 /// arithmetic we check for overflows, in case of which we set the result 1361 /// of this run-time check to false to be cosnservatively correct, 1362 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { 1363 auto ExprBuilder = getExprBuilder(); 1364 ExprBuilder.setTrackOverflow(true); 1365 Value *RTC = ExprBuilder.create(Condition); 1366 if (!RTC->getType()->isIntegerTy(1)) 1367 RTC = Builder.CreateIsNotNull(RTC); 1368 Value *OverflowHappened = 1369 Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); 1370 1371 if (PollyGenerateRTCPrint) { 1372 auto *F = Builder.GetInsertBlock()->getParent(); 1373 RuntimeDebugBuilder::createCPUPrinter( 1374 Builder, 1375 "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + 1376 " __RTC: ", 1377 RTC, " Overflow: ", OverflowHappened, "\n"); 1378 } 1379 1380 RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); 1381 ExprBuilder.setTrackOverflow(false); 1382 1383 if (!isa<ConstantInt>(RTC)) 1384 VersionedScops++; 1385 1386 return RTC; 1387 } 1388