1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/BlockGenerators.h" 18 #include "polly/CodeGen/CodeGeneration.h" 19 #include "polly/CodeGen/IslExprBuilder.h" 20 #include "polly/Options.h" 21 #include "polly/Support/GICHelper.h" 22 #include "polly/Support/SCEVValidator.h" 23 #include "polly/Support/ScopHelper.h" 24 #include "llvm/Analysis/LoopInfo.h" 25 #include "llvm/Analysis/RegionInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "isl/aff.h" 32 #include "isl/ast.h" 33 #include "isl/ast_build.h" 34 #include "isl/set.h" 35 #include <deque> 36 37 using namespace llvm; 38 using namespace polly; 39 40 static cl::opt<bool> Aligned("enable-polly-aligned", 41 cl::desc("Assumed aligned memory accesses."), 42 cl::Hidden, cl::init(false), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 45 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 46 ScalarEvolution *SE, const Region *R) { 47 if (!I || !SE->isSCEVable(I->getType())) 48 return false; 49 50 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 51 if (!isa<SCEVCouldNotCompute>(Scev)) 52 if (!hasScalarDepsInsideRegion(Scev, R)) 53 return true; 54 55 return false; 56 } 57 58 bool polly::isIgnoredIntrinsic(const Value *V) { 59 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 60 switch (IT->getIntrinsicID()) { 61 // Lifetime markers are supported/ignored. 62 case llvm::Intrinsic::lifetime_start: 63 case llvm::Intrinsic::lifetime_end: 64 // Invariant markers are supported/ignored. 65 case llvm::Intrinsic::invariant_start: 66 case llvm::Intrinsic::invariant_end: 67 // Some misc annotations are supported/ignored. 68 case llvm::Intrinsic::var_annotation: 69 case llvm::Intrinsic::ptr_annotation: 70 case llvm::Intrinsic::annotation: 71 case llvm::Intrinsic::donothing: 72 case llvm::Intrinsic::assume: 73 case llvm::Intrinsic::expect: 74 return true; 75 default: 76 break; 77 } 78 } 79 return false; 80 } 81 82 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 83 ScalarEvolution &SE, DominatorTree &DT, 84 IslExprBuilder *ExprBuilder) 85 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT) {} 86 87 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, 88 ValueMapT &BBMap, ValueMapT &GlobalMap, 89 LoopToScevMapT <S, Loop *L) const { 90 // We assume constants never change. 91 // This avoids map lookups for many calls to this function. 92 if (isa<Constant>(Old)) 93 return const_cast<Value *>(Old); 94 95 if (Value *New = GlobalMap.lookup(Old)) { 96 if (Old->getType()->getScalarSizeInBits() < 97 New->getType()->getScalarSizeInBits()) 98 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 99 100 return New; 101 } 102 103 if (Value *New = BBMap.lookup(Old)) 104 return New; 105 106 if (SE.isSCEVable(Old->getType())) 107 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 108 if (!isa<SCEVCouldNotCompute>(Scev)) { 109 const SCEV *NewScev = apply(Scev, LTS, SE); 110 ValueToValueMap VTV; 111 VTV.insert(BBMap.begin(), BBMap.end()); 112 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 113 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 114 SCEVExpander Expander(SE, Stmt.getParent() 115 ->getRegion() 116 .getEntry() 117 ->getParent() 118 ->getParent() 119 ->getDataLayout(), 120 "polly"); 121 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 122 Builder.GetInsertPoint()); 123 124 BBMap[Old] = Expanded; 125 return Expanded; 126 } 127 } 128 129 // A scop-constant value defined by a global or a function parameter. 130 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 131 return const_cast<Value *>(Old); 132 133 // A scop-constant value defined by an instruction executed outside the scop. 134 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 135 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 136 return const_cast<Value *>(Old); 137 138 // The scalar dependence is neither available nor SCEVCodegenable. 139 llvm_unreachable("Unexpected scalar dependence in region!"); 140 return nullptr; 141 } 142 143 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, 144 ValueMapT &BBMap, ValueMapT &GlobalMap, 145 LoopToScevMapT <S) { 146 // We do not generate debug intrinsics as we did not investigate how to 147 // copy them correctly. At the current state, they just crash the code 148 // generation as the meta-data operands are not correctly copied. 149 if (isa<DbgInfoIntrinsic>(Inst)) 150 return; 151 152 Instruction *NewInst = Inst->clone(); 153 154 // Replace old operands with the new ones. 155 for (Value *OldOperand : Inst->operands()) { 156 Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, 157 getLoopForInst(Inst)); 158 159 if (!NewOperand) { 160 assert(!isa<StoreInst>(NewInst) && 161 "Store instructions are always needed!"); 162 delete NewInst; 163 return; 164 } 165 166 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 167 } 168 169 Builder.Insert(NewInst); 170 BBMap[Inst] = NewInst; 171 172 if (!NewInst->getType()->isVoidTy()) 173 NewInst->setName("p_" + Inst->getName()); 174 } 175 176 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt, 177 const MemoryAccess &MA) { 178 isl_pw_multi_aff *PWAccRel; 179 isl_union_map *Schedule; 180 isl_ast_expr *Expr; 181 isl_ast_build *Build = Stmt.getAstBuild(); 182 183 assert(ExprBuilder && Build && 184 "Cannot generate new value without IslExprBuilder!"); 185 186 Schedule = isl_ast_build_get_schedule(Build); 187 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 188 189 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 190 Expr = isl_ast_expr_address_of(Expr); 191 192 return ExprBuilder->create(Expr); 193 } 194 195 Value *BlockGenerator::generateLocationAccessed( 196 ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, 197 ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { 198 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 199 200 Value *NewPointer; 201 if (MA.hasNewAccessRelation()) 202 NewPointer = getNewAccessOperand(Stmt, MA); 203 else 204 NewPointer = 205 getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 206 207 return NewPointer; 208 } 209 210 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 211 return LI.getLoopFor(Inst->getParent()); 212 } 213 214 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, 215 ValueMapT &BBMap, 216 ValueMapT &GlobalMap, 217 LoopToScevMapT <S) { 218 const Value *Pointer = Load->getPointerOperand(); 219 Value *NewPointer = 220 generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); 221 Value *ScalarLoad = Builder.CreateAlignedLoad( 222 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 223 return ScalarLoad; 224 } 225 226 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt, 227 const StoreInst *Store, 228 ValueMapT &BBMap, 229 ValueMapT &GlobalMap, 230 LoopToScevMapT <S) { 231 const Value *Pointer = Store->getPointerOperand(); 232 Value *NewPointer = 233 generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); 234 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, 235 GlobalMap, LTS, getLoopForInst(Store)); 236 237 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 238 Store->getAlignment()); 239 return NewStore; 240 } 241 242 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, 243 ValueMapT &BBMap, ValueMapT &GlobalMap, 244 LoopToScevMapT <S) { 245 // Terminator instructions control the control flow. They are explicitly 246 // expressed in the clast and do not need to be copied. 247 if (Inst->isTerminator()) 248 return; 249 250 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 251 return; 252 253 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 254 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); 255 // Compute NewLoad before its insertion in BBMap to make the insertion 256 // deterministic. 257 BBMap[Load] = NewLoad; 258 return; 259 } 260 261 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 262 Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); 263 // Compute NewStore before its insertion in BBMap to make the insertion 264 // deterministic. 265 BBMap[Store] = NewStore; 266 return; 267 } 268 269 // Skip some special intrinsics for which we do not adjust the semantics to 270 // the new schedule. All others are handled like every other instruction. 271 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 272 switch (IT->getIntrinsicID()) { 273 // Lifetime markers are ignored. 274 case llvm::Intrinsic::lifetime_start: 275 case llvm::Intrinsic::lifetime_end: 276 // Invariant markers are ignored. 277 case llvm::Intrinsic::invariant_start: 278 case llvm::Intrinsic::invariant_end: 279 // Some misc annotations are ignored. 280 case llvm::Intrinsic::var_annotation: 281 case llvm::Intrinsic::ptr_annotation: 282 case llvm::Intrinsic::annotation: 283 case llvm::Intrinsic::donothing: 284 case llvm::Intrinsic::assume: 285 case llvm::Intrinsic::expect: 286 return; 287 default: 288 // Other intrinsics are copied. 289 break; 290 } 291 } 292 293 copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); 294 } 295 296 void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 297 LoopToScevMapT <S) { 298 assert(Stmt.isBlockStmt() && 299 "Only block statements can be copied by the block generator"); 300 301 ValueMapT BBMap; 302 303 BasicBlock *BB = Stmt.getBasicBlock(); 304 copyBB(Stmt, BB, BBMap, GlobalMap, LTS); 305 } 306 307 BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { 308 BasicBlock *CopyBB = 309 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 310 CopyBB->setName("polly.stmt." + BB->getName()); 311 return CopyBB; 312 } 313 314 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, 315 ValueMapT &BBMap, ValueMapT &GlobalMap, 316 LoopToScevMapT <S) { 317 BasicBlock *CopyBB = splitBB(BB); 318 copyBB(Stmt, BB, CopyBB, BBMap, GlobalMap, LTS); 319 return CopyBB; 320 } 321 322 void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, 323 ValueMapT &BBMap, ValueMapT &GlobalMap, 324 LoopToScevMapT <S) { 325 Builder.SetInsertPoint(CopyBB->begin()); 326 for (Instruction &Inst : *BB) 327 copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); 328 } 329 330 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 331 VectorValueMapT &GlobalMaps, 332 std::vector<LoopToScevMapT> &VLTS, 333 isl_map *Schedule) 334 : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS), 335 Schedule(Schedule) { 336 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 337 assert(Schedule && "No statement domain provided"); 338 } 339 340 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old, 341 ValueMapT &VectorMap, 342 VectorValueMapT &ScalarMaps, 343 Loop *L) { 344 if (Value *NewValue = VectorMap.lookup(Old)) 345 return NewValue; 346 347 int Width = getVectorWidth(); 348 349 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 350 351 for (int Lane = 0; Lane < Width; Lane++) 352 Vector = Builder.CreateInsertElement( 353 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], 354 VLTS[Lane], L), 355 Builder.getInt32(Lane)); 356 357 VectorMap[Old] = Vector; 358 359 return Vector; 360 } 361 362 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 363 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 364 assert(PointerTy && "PointerType expected"); 365 366 Type *ScalarType = PointerTy->getElementType(); 367 VectorType *VectorType = VectorType::get(ScalarType, Width); 368 369 return PointerType::getUnqual(VectorType); 370 } 371 372 Value *VectorBlockGenerator::generateStrideOneLoad( 373 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, 374 bool NegativeStride = false) { 375 unsigned VectorWidth = getVectorWidth(); 376 const Value *Pointer = Load->getPointerOperand(); 377 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 378 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 379 380 Value *NewPointer = nullptr; 381 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 382 GlobalMaps[Offset], VLTS[Offset]); 383 Value *VectorPtr = 384 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 385 LoadInst *VecLoad = 386 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 387 if (!Aligned) 388 VecLoad->setAlignment(8); 389 390 if (NegativeStride) { 391 SmallVector<Constant *, 16> Indices; 392 for (int i = VectorWidth - 1; i >= 0; i--) 393 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 394 Constant *SV = llvm::ConstantVector::get(Indices); 395 Value *RevVecLoad = Builder.CreateShuffleVector( 396 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 397 return RevVecLoad; 398 } 399 400 return VecLoad; 401 } 402 403 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt, 404 const LoadInst *Load, 405 ValueMapT &BBMap) { 406 const Value *Pointer = Load->getPointerOperand(); 407 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 408 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 409 GlobalMaps[0], VLTS[0]); 410 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 411 Load->getName() + "_p_vec_p"); 412 LoadInst *ScalarLoad = 413 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 414 415 if (!Aligned) 416 ScalarLoad->setAlignment(8); 417 418 Constant *SplatVector = Constant::getNullValue( 419 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 420 421 Value *VectorLoad = Builder.CreateShuffleVector( 422 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 423 return VectorLoad; 424 } 425 426 Value *VectorBlockGenerator::generateUnknownStrideLoad( 427 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) { 428 int VectorWidth = getVectorWidth(); 429 const Value *Pointer = Load->getPointerOperand(); 430 VectorType *VectorType = VectorType::get( 431 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 432 433 Value *Vector = UndefValue::get(VectorType); 434 435 for (int i = 0; i < VectorWidth; i++) { 436 Value *NewPointer = generateLocationAccessed( 437 Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 438 Value *ScalarLoad = 439 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 440 Vector = Builder.CreateInsertElement( 441 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 442 } 443 444 return Vector; 445 } 446 447 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, 448 ValueMapT &VectorMap, 449 VectorValueMapT &ScalarMaps) { 450 if (!VectorType::isValidElementType(Load->getType())) { 451 for (int i = 0; i < getVectorWidth(); i++) 452 ScalarMaps[i][Load] = 453 generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 454 return; 455 } 456 457 const MemoryAccess &Access = Stmt.getAccessFor(Load); 458 459 // Make sure we have scalar values available to access the pointer to 460 // the data location. 461 extractScalarValues(Load, VectorMap, ScalarMaps); 462 463 Value *NewLoad; 464 if (Access.isStrideZero(isl_map_copy(Schedule))) 465 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); 466 else if (Access.isStrideOne(isl_map_copy(Schedule))) 467 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); 468 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 469 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); 470 else 471 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); 472 473 VectorMap[Load] = NewLoad; 474 } 475 476 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, 477 const UnaryInstruction *Inst, 478 ValueMapT &VectorMap, 479 VectorValueMapT &ScalarMaps) { 480 int VectorWidth = getVectorWidth(); 481 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 482 ScalarMaps, getLoopForInst(Inst)); 483 484 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 485 486 const CastInst *Cast = dyn_cast<CastInst>(Inst); 487 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 488 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 489 } 490 491 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, 492 const BinaryOperator *Inst, 493 ValueMapT &VectorMap, 494 VectorValueMapT &ScalarMaps) { 495 Loop *L = getLoopForInst(Inst); 496 Value *OpZero = Inst->getOperand(0); 497 Value *OpOne = Inst->getOperand(1); 498 499 Value *NewOpZero, *NewOpOne; 500 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 501 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 502 503 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 504 Inst->getName() + "p_vec"); 505 VectorMap[Inst] = NewInst; 506 } 507 508 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, 509 ValueMapT &VectorMap, 510 VectorValueMapT &ScalarMaps) { 511 const MemoryAccess &Access = Stmt.getAccessFor(Store); 512 513 const Value *Pointer = Store->getPointerOperand(); 514 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 515 ScalarMaps, getLoopForInst(Store)); 516 517 // Make sure we have scalar values available to access the pointer to 518 // the data location. 519 extractScalarValues(Store, VectorMap, ScalarMaps); 520 521 if (Access.isStrideOne(isl_map_copy(Schedule))) { 522 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 523 Value *NewPointer = generateLocationAccessed( 524 Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 525 526 Value *VectorPtr = 527 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 528 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 529 530 if (!Aligned) 531 Store->setAlignment(8); 532 } else { 533 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 534 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 535 Value *NewPointer = generateLocationAccessed( 536 Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 537 Builder.CreateStore(Scalar, NewPointer); 538 } 539 } 540 } 541 542 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 543 ValueMapT &VectorMap) { 544 for (Value *Operand : Inst->operands()) 545 if (VectorMap.count(Operand)) 546 return true; 547 return false; 548 } 549 550 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 551 ValueMapT &VectorMap, 552 VectorValueMapT &ScalarMaps) { 553 bool HasVectorOperand = false; 554 int VectorWidth = getVectorWidth(); 555 556 for (Value *Operand : Inst->operands()) { 557 ValueMapT::iterator VecOp = VectorMap.find(Operand); 558 559 if (VecOp == VectorMap.end()) 560 continue; 561 562 HasVectorOperand = true; 563 Value *NewVector = VecOp->second; 564 565 for (int i = 0; i < VectorWidth; ++i) { 566 ValueMapT &SM = ScalarMaps[i]; 567 568 // If there is one scalar extracted, all scalar elements should have 569 // already been extracted by the code here. So no need to check for the 570 // existance of all of them. 571 if (SM.count(Operand)) 572 break; 573 574 SM[Operand] = 575 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 576 } 577 } 578 579 return HasVectorOperand; 580 } 581 582 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt, 583 const Instruction *Inst, 584 ValueMapT &VectorMap, 585 VectorValueMapT &ScalarMaps) { 586 bool HasVectorOperand; 587 int VectorWidth = getVectorWidth(); 588 589 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 590 591 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 592 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 593 GlobalMaps[VectorLane], VLTS[VectorLane]); 594 595 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 596 return; 597 598 // Make the result available as vector value. 599 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 600 Value *Vector = UndefValue::get(VectorType); 601 602 for (int i = 0; i < VectorWidth; i++) 603 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 604 Builder.getInt32(i)); 605 606 VectorMap[Inst] = Vector; 607 } 608 609 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 610 611 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, 612 const Instruction *Inst, 613 ValueMapT &VectorMap, 614 VectorValueMapT &ScalarMaps) { 615 // Terminator instructions control the control flow. They are explicitly 616 // expressed in the clast and do not need to be copied. 617 if (Inst->isTerminator()) 618 return; 619 620 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 621 return; 622 623 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 624 generateLoad(Stmt, Load, VectorMap, ScalarMaps); 625 return; 626 } 627 628 if (hasVectorOperands(Inst, VectorMap)) { 629 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 630 copyStore(Stmt, Store, VectorMap, ScalarMaps); 631 return; 632 } 633 634 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 635 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 636 return; 637 } 638 639 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 640 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 641 return; 642 } 643 644 // Falltrough: We generate scalar instructions, if we don't know how to 645 // generate vector code. 646 } 647 648 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); 649 } 650 651 void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { 652 assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " 653 "the vector block generator"); 654 655 BasicBlock *BB = Stmt.getBasicBlock(); 656 BasicBlock *CopyBB = 657 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 658 CopyBB->setName("polly.stmt." + BB->getName()); 659 Builder.SetInsertPoint(CopyBB->begin()); 660 661 // Create two maps that store the mapping from the original instructions of 662 // the old basic block to their copies in the new basic block. Those maps 663 // are basic block local. 664 // 665 // As vector code generation is supported there is one map for scalar values 666 // and one for vector values. 667 // 668 // In case we just do scalar code generation, the vectorMap is not used and 669 // the scalarMap has just one dimension, which contains the mapping. 670 // 671 // In case vector code generation is done, an instruction may either appear 672 // in the vector map once (as it is calculating >vectorwidth< values at a 673 // time. Or (if the values are calculated using scalar operations), it 674 // appears once in every dimension of the scalarMap. 675 VectorValueMapT ScalarBlockMap(getVectorWidth()); 676 ValueMapT VectorBlockMap; 677 678 for (Instruction &Inst : *BB) 679 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); 680 } 681 682 BasicBlock *RegionGenerator::repairDominance( 683 BasicBlock *BB, BasicBlock *BBCopy, 684 DenseMap<BasicBlock *, BasicBlock *> &BlockMap) { 685 686 BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); 687 BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); 688 689 if (BBCopyIDom) 690 DT.changeImmediateDominator(BBCopy, BBCopyIDom); 691 692 return BBCopyIDom; 693 } 694 695 void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 696 LoopToScevMapT <S) { 697 assert(Stmt.isRegionStmt() && 698 "Only region statements can be copied by the block generator"); 699 700 // The region represented by the statement. 701 Region *R = Stmt.getRegion(); 702 703 // The "BBMaps" for the whole region. 704 DenseMap<BasicBlock *, ValueMapT> RegionMaps; 705 706 // A map from old to new blocks in the region 707 DenseMap<BasicBlock *, BasicBlock *> BlockMap; 708 709 // Iterate over all blocks in the region in a breadth-first search. 710 std::deque<BasicBlock *> Blocks; 711 SmallPtrSet<BasicBlock *, 8> SeenBlocks; 712 Blocks.push_back(R->getEntry()); 713 SeenBlocks.insert(R->getEntry()); 714 715 while (!Blocks.empty()) { 716 BasicBlock *BB = Blocks.front(); 717 Blocks.pop_front(); 718 719 // First split the block and update dominance information. 720 BasicBlock *BBCopy = splitBB(BB); 721 BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy, BlockMap); 722 723 // Get the mapping for this block and initialize it with the mapping 724 // available at its immediate dominator (in the new region). 725 ValueMapT &RegionMap = RegionMaps[BBCopy]; 726 RegionMap = RegionMaps[BBCopyIDom]; 727 728 // Copy the block with the BlockGenerator. 729 copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); 730 731 // And continue with new successors inside the region. 732 for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) 733 if (R->contains(*SI) && SeenBlocks.insert(*SI).second) 734 Blocks.push_back(*SI); 735 736 // In order to remap PHI nodes we store also basic block mappings. 737 BlockMap[BB] = BBCopy; 738 } 739 740 // Now create a new dedicated region exit block and add it to the region map. 741 BasicBlock *ExitBBCopy = 742 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 743 ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".as.exit"); 744 BlockMap[R->getExit()] = ExitBBCopy; 745 746 repairDominance(R->getExit(), ExitBBCopy, BlockMap); 747 748 // As the block generator doesn't handle control flow we need to add the 749 // region control flow by hand after all blocks have been copied. 750 for (BasicBlock *BB : SeenBlocks) { 751 752 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 753 754 BasicBlock *BBCopy = BlockMap[BB]; 755 Instruction *BICopy = BBCopy->getTerminator(); 756 757 ValueMapT &RegionMap = RegionMaps[BBCopy]; 758 RegionMap.insert(BlockMap.begin(), BlockMap.end()); 759 760 Builder.SetInsertPoint(BBCopy); 761 copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); 762 BICopy->eraseFromParent(); 763 } 764 765 // Reset the old insert point for the build. 766 Builder.SetInsertPoint(ExitBBCopy->begin()); 767 } 768