1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/ast.h" 19 #include "isl/ast_build.h" 20 #include "isl/set.h" 21 #include "polly/CodeGen/BlockGenerators.h" 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslExprBuilder.h" 24 #include "polly/Options.h" 25 #include "polly/Support/GICHelper.h" 26 #include "polly/Support/SCEVValidator.h" 27 #include "polly/Support/ScopHelper.h" 28 #include "llvm/Analysis/LoopInfo.h" 29 #include "llvm/Analysis/ScalarEvolution.h" 30 #include "llvm/Analysis/ScalarEvolutionExpander.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 34 using namespace llvm; 35 using namespace polly; 36 37 static cl::opt<bool> Aligned("enable-polly-aligned", 38 cl::desc("Assumed aligned memory accesses."), 39 cl::Hidden, cl::init(false), cl::ZeroOrMore, 40 cl::cat(PollyCategory)); 41 42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 43 ScalarEvolution *SE, const Region *R) { 44 if (!I || !SE->isSCEVable(I->getType())) 45 return false; 46 47 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 48 if (!isa<SCEVCouldNotCompute>(Scev)) 49 if (!hasScalarDepsInsideRegion(Scev, R)) 50 return true; 51 52 return false; 53 } 54 55 bool polly::isIgnoredIntrinsic(const Value *V) { 56 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 57 switch (IT->getIntrinsicID()) { 58 // Lifetime markers are supported/ignored. 59 case llvm::Intrinsic::lifetime_start: 60 case llvm::Intrinsic::lifetime_end: 61 // Invariant markers are supported/ignored. 62 case llvm::Intrinsic::invariant_start: 63 case llvm::Intrinsic::invariant_end: 64 // Some misc annotations are supported/ignored. 65 case llvm::Intrinsic::var_annotation: 66 case llvm::Intrinsic::ptr_annotation: 67 case llvm::Intrinsic::annotation: 68 case llvm::Intrinsic::donothing: 69 case llvm::Intrinsic::assume: 70 case llvm::Intrinsic::expect: 71 return true; 72 default: 73 break; 74 } 75 } 76 return false; 77 } 78 79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 80 ScalarEvolution &SE, DominatorTree &DT, 81 IslExprBuilder *ExprBuilder) 82 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT) {} 83 84 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, 85 ValueMapT &BBMap, ValueMapT &GlobalMap, 86 LoopToScevMapT <S, Loop *L) const { 87 // We assume constants never change. 88 // This avoids map lookups for many calls to this function. 89 if (isa<Constant>(Old)) 90 return const_cast<Value *>(Old); 91 92 if (Value *New = GlobalMap.lookup(Old)) { 93 if (Old->getType()->getScalarSizeInBits() < 94 New->getType()->getScalarSizeInBits()) 95 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 96 97 return New; 98 } 99 100 if (Value *New = BBMap.lookup(Old)) 101 return New; 102 103 if (SE.isSCEVable(Old->getType())) 104 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 105 if (!isa<SCEVCouldNotCompute>(Scev)) { 106 const SCEV *NewScev = apply(Scev, LTS, SE); 107 ValueToValueMap VTV; 108 VTV.insert(BBMap.begin(), BBMap.end()); 109 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 110 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 111 SCEVExpander Expander(SE, "polly"); 112 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 113 Builder.GetInsertPoint()); 114 115 BBMap[Old] = Expanded; 116 return Expanded; 117 } 118 } 119 120 // A scop-constant value defined by a global or a function parameter. 121 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 122 return const_cast<Value *>(Old); 123 124 // A scop-constant value defined by an instruction executed outside the scop. 125 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 126 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 127 return const_cast<Value *>(Old); 128 129 // The scalar dependence is neither available nor SCEVCodegenable. 130 llvm_unreachable("Unexpected scalar dependence in region!"); 131 return nullptr; 132 } 133 134 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, 135 ValueMapT &BBMap, ValueMapT &GlobalMap, 136 LoopToScevMapT <S) { 137 // We do not generate debug intrinsics as we did not investigate how to 138 // copy them correctly. At the current state, they just crash the code 139 // generation as the meta-data operands are not correctly copied. 140 if (isa<DbgInfoIntrinsic>(Inst)) 141 return; 142 143 Instruction *NewInst = Inst->clone(); 144 145 // Replace old operands with the new ones. 146 for (Value *OldOperand : Inst->operands()) { 147 Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, 148 getLoopForInst(Inst)); 149 150 if (!NewOperand) { 151 assert(!isa<StoreInst>(NewInst) && 152 "Store instructions are always needed!"); 153 delete NewInst; 154 return; 155 } 156 157 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 158 } 159 160 Builder.Insert(NewInst); 161 BBMap[Inst] = NewInst; 162 163 if (!NewInst->getType()->isVoidTy()) 164 NewInst->setName("p_" + Inst->getName()); 165 } 166 167 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt, 168 const MemoryAccess &MA) { 169 isl_pw_multi_aff *PWAccRel; 170 isl_union_map *Schedule; 171 isl_ast_expr *Expr; 172 isl_ast_build *Build = Stmt.getAstBuild(); 173 174 assert(ExprBuilder && Build && 175 "Cannot generate new value without IslExprBuilder!"); 176 177 Schedule = isl_ast_build_get_schedule(Build); 178 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 179 180 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 181 Expr = isl_ast_expr_address_of(Expr); 182 183 return ExprBuilder->create(Expr); 184 } 185 186 Value *BlockGenerator::generateLocationAccessed( 187 ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, 188 ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { 189 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 190 191 Value *NewPointer; 192 if (MA.hasNewAccessRelation()) 193 NewPointer = getNewAccessOperand(Stmt, MA); 194 else 195 NewPointer = 196 getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 197 198 return NewPointer; 199 } 200 201 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 202 return LI.getLoopFor(Inst->getParent()); 203 } 204 205 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, 206 ValueMapT &BBMap, 207 ValueMapT &GlobalMap, 208 LoopToScevMapT <S) { 209 const Value *Pointer = Load->getPointerOperand(); 210 Value *NewPointer = 211 generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); 212 Value *ScalarLoad = Builder.CreateAlignedLoad( 213 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 214 return ScalarLoad; 215 } 216 217 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt, 218 const StoreInst *Store, 219 ValueMapT &BBMap, 220 ValueMapT &GlobalMap, 221 LoopToScevMapT <S) { 222 const Value *Pointer = Store->getPointerOperand(); 223 Value *NewPointer = 224 generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); 225 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, 226 GlobalMap, LTS, getLoopForInst(Store)); 227 228 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 229 Store->getAlignment()); 230 return NewStore; 231 } 232 233 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, 234 ValueMapT &BBMap, ValueMapT &GlobalMap, 235 LoopToScevMapT <S) { 236 // Terminator instructions control the control flow. They are explicitly 237 // expressed in the clast and do not need to be copied. 238 if (Inst->isTerminator()) 239 return; 240 241 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 242 return; 243 244 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 245 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); 246 // Compute NewLoad before its insertion in BBMap to make the insertion 247 // deterministic. 248 BBMap[Load] = NewLoad; 249 return; 250 } 251 252 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 253 Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); 254 // Compute NewStore before its insertion in BBMap to make the insertion 255 // deterministic. 256 BBMap[Store] = NewStore; 257 return; 258 } 259 260 // Skip some special intrinsics for which we do not adjust the semantics to 261 // the new schedule. All others are handled like every other instruction. 262 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 263 switch (IT->getIntrinsicID()) { 264 // Lifetime markers are ignored. 265 case llvm::Intrinsic::lifetime_start: 266 case llvm::Intrinsic::lifetime_end: 267 // Invariant markers are ignored. 268 case llvm::Intrinsic::invariant_start: 269 case llvm::Intrinsic::invariant_end: 270 // Some misc annotations are ignored. 271 case llvm::Intrinsic::var_annotation: 272 case llvm::Intrinsic::ptr_annotation: 273 case llvm::Intrinsic::annotation: 274 case llvm::Intrinsic::donothing: 275 case llvm::Intrinsic::assume: 276 case llvm::Intrinsic::expect: 277 return; 278 default: 279 // Other intrinsics are copied. 280 break; 281 } 282 } 283 284 copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); 285 } 286 287 void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 288 LoopToScevMapT <S) { 289 assert(Stmt.isBlockStmt() && 290 "Only block statements can be copied by the block generator"); 291 292 ValueMapT BBMap; 293 294 BasicBlock *BB = Stmt.getBasicBlock(); 295 copyBB(Stmt, BB, BBMap, GlobalMap, LTS); 296 } 297 298 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, 299 ValueMapT &BBMap, ValueMapT &GlobalMap, 300 LoopToScevMapT <S) { 301 BasicBlock *CopyBB = 302 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 303 CopyBB->setName("polly.stmt." + BB->getName()); 304 Builder.SetInsertPoint(CopyBB->begin()); 305 306 for (Instruction &Inst : *BB) 307 copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); 308 309 return CopyBB; 310 } 311 312 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 313 VectorValueMapT &GlobalMaps, 314 std::vector<LoopToScevMapT> &VLTS, 315 isl_map *Schedule) 316 : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS), 317 Schedule(Schedule) { 318 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 319 assert(Schedule && "No statement domain provided"); 320 } 321 322 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old, 323 ValueMapT &VectorMap, 324 VectorValueMapT &ScalarMaps, 325 Loop *L) { 326 if (Value *NewValue = VectorMap.lookup(Old)) 327 return NewValue; 328 329 int Width = getVectorWidth(); 330 331 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 332 333 for (int Lane = 0; Lane < Width; Lane++) 334 Vector = Builder.CreateInsertElement( 335 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], 336 VLTS[Lane], L), 337 Builder.getInt32(Lane)); 338 339 VectorMap[Old] = Vector; 340 341 return Vector; 342 } 343 344 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 345 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 346 assert(PointerTy && "PointerType expected"); 347 348 Type *ScalarType = PointerTy->getElementType(); 349 VectorType *VectorType = VectorType::get(ScalarType, Width); 350 351 return PointerType::getUnqual(VectorType); 352 } 353 354 Value *VectorBlockGenerator::generateStrideOneLoad( 355 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, 356 bool NegativeStride = false) { 357 unsigned VectorWidth = getVectorWidth(); 358 const Value *Pointer = Load->getPointerOperand(); 359 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 360 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 361 362 Value *NewPointer = nullptr; 363 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 364 GlobalMaps[Offset], VLTS[Offset]); 365 Value *VectorPtr = 366 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 367 LoadInst *VecLoad = 368 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 369 if (!Aligned) 370 VecLoad->setAlignment(8); 371 372 if (NegativeStride) { 373 SmallVector<Constant *, 16> Indices; 374 for (int i = VectorWidth - 1; i >= 0; i--) 375 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 376 Constant *SV = llvm::ConstantVector::get(Indices); 377 Value *RevVecLoad = Builder.CreateShuffleVector( 378 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 379 return RevVecLoad; 380 } 381 382 return VecLoad; 383 } 384 385 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt, 386 const LoadInst *Load, 387 ValueMapT &BBMap) { 388 const Value *Pointer = Load->getPointerOperand(); 389 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 390 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 391 GlobalMaps[0], VLTS[0]); 392 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 393 Load->getName() + "_p_vec_p"); 394 LoadInst *ScalarLoad = 395 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 396 397 if (!Aligned) 398 ScalarLoad->setAlignment(8); 399 400 Constant *SplatVector = Constant::getNullValue( 401 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 402 403 Value *VectorLoad = Builder.CreateShuffleVector( 404 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 405 return VectorLoad; 406 } 407 408 Value *VectorBlockGenerator::generateUnknownStrideLoad( 409 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) { 410 int VectorWidth = getVectorWidth(); 411 const Value *Pointer = Load->getPointerOperand(); 412 VectorType *VectorType = VectorType::get( 413 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 414 415 Value *Vector = UndefValue::get(VectorType); 416 417 for (int i = 0; i < VectorWidth; i++) { 418 Value *NewPointer = generateLocationAccessed( 419 Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 420 Value *ScalarLoad = 421 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 422 Vector = Builder.CreateInsertElement( 423 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 424 } 425 426 return Vector; 427 } 428 429 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, 430 ValueMapT &VectorMap, 431 VectorValueMapT &ScalarMaps) { 432 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 433 !VectorType::isValidElementType(Load->getType())) { 434 for (int i = 0; i < getVectorWidth(); i++) 435 ScalarMaps[i][Load] = 436 generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 437 return; 438 } 439 440 const MemoryAccess &Access = Stmt.getAccessFor(Load); 441 442 // Make sure we have scalar values available to access the pointer to 443 // the data location. 444 extractScalarValues(Load, VectorMap, ScalarMaps); 445 446 Value *NewLoad; 447 if (Access.isStrideZero(isl_map_copy(Schedule))) 448 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); 449 else if (Access.isStrideOne(isl_map_copy(Schedule))) 450 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); 451 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 452 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); 453 else 454 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); 455 456 VectorMap[Load] = NewLoad; 457 } 458 459 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, 460 const UnaryInstruction *Inst, 461 ValueMapT &VectorMap, 462 VectorValueMapT &ScalarMaps) { 463 int VectorWidth = getVectorWidth(); 464 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 465 ScalarMaps, getLoopForInst(Inst)); 466 467 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 468 469 const CastInst *Cast = dyn_cast<CastInst>(Inst); 470 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 471 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 472 } 473 474 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, 475 const BinaryOperator *Inst, 476 ValueMapT &VectorMap, 477 VectorValueMapT &ScalarMaps) { 478 Loop *L = getLoopForInst(Inst); 479 Value *OpZero = Inst->getOperand(0); 480 Value *OpOne = Inst->getOperand(1); 481 482 Value *NewOpZero, *NewOpOne; 483 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 484 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 485 486 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 487 Inst->getName() + "p_vec"); 488 VectorMap[Inst] = NewInst; 489 } 490 491 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, 492 ValueMapT &VectorMap, 493 VectorValueMapT &ScalarMaps) { 494 const MemoryAccess &Access = Stmt.getAccessFor(Store); 495 496 const Value *Pointer = Store->getPointerOperand(); 497 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 498 ScalarMaps, getLoopForInst(Store)); 499 500 // Make sure we have scalar values available to access the pointer to 501 // the data location. 502 extractScalarValues(Store, VectorMap, ScalarMaps); 503 504 if (Access.isStrideOne(isl_map_copy(Schedule))) { 505 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 506 Value *NewPointer = generateLocationAccessed( 507 Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 508 509 Value *VectorPtr = 510 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 511 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 512 513 if (!Aligned) 514 Store->setAlignment(8); 515 } else { 516 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 517 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 518 Value *NewPointer = generateLocationAccessed( 519 Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 520 Builder.CreateStore(Scalar, NewPointer); 521 } 522 } 523 } 524 525 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 526 ValueMapT &VectorMap) { 527 for (Value *Operand : Inst->operands()) 528 if (VectorMap.count(Operand)) 529 return true; 530 return false; 531 } 532 533 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 534 ValueMapT &VectorMap, 535 VectorValueMapT &ScalarMaps) { 536 bool HasVectorOperand = false; 537 int VectorWidth = getVectorWidth(); 538 539 for (Value *Operand : Inst->operands()) { 540 ValueMapT::iterator VecOp = VectorMap.find(Operand); 541 542 if (VecOp == VectorMap.end()) 543 continue; 544 545 HasVectorOperand = true; 546 Value *NewVector = VecOp->second; 547 548 for (int i = 0; i < VectorWidth; ++i) { 549 ValueMapT &SM = ScalarMaps[i]; 550 551 // If there is one scalar extracted, all scalar elements should have 552 // already been extracted by the code here. So no need to check for the 553 // existance of all of them. 554 if (SM.count(Operand)) 555 break; 556 557 SM[Operand] = 558 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 559 } 560 } 561 562 return HasVectorOperand; 563 } 564 565 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt, 566 const Instruction *Inst, 567 ValueMapT &VectorMap, 568 VectorValueMapT &ScalarMaps) { 569 bool HasVectorOperand; 570 int VectorWidth = getVectorWidth(); 571 572 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 573 574 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 575 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 576 GlobalMaps[VectorLane], VLTS[VectorLane]); 577 578 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 579 return; 580 581 // Make the result available as vector value. 582 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 583 Value *Vector = UndefValue::get(VectorType); 584 585 for (int i = 0; i < VectorWidth; i++) 586 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 587 Builder.getInt32(i)); 588 589 VectorMap[Inst] = Vector; 590 } 591 592 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 593 594 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, 595 const Instruction *Inst, 596 ValueMapT &VectorMap, 597 VectorValueMapT &ScalarMaps) { 598 // Terminator instructions control the control flow. They are explicitly 599 // expressed in the clast and do not need to be copied. 600 if (Inst->isTerminator()) 601 return; 602 603 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 604 return; 605 606 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 607 generateLoad(Stmt, Load, VectorMap, ScalarMaps); 608 return; 609 } 610 611 if (hasVectorOperands(Inst, VectorMap)) { 612 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 613 copyStore(Stmt, Store, VectorMap, ScalarMaps); 614 return; 615 } 616 617 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 618 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 619 return; 620 } 621 622 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 623 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 624 return; 625 } 626 627 // Falltrough: We generate scalar instructions, if we don't know how to 628 // generate vector code. 629 } 630 631 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); 632 } 633 634 void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { 635 assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " 636 "the vector block generator"); 637 638 BasicBlock *BB = Stmt.getBasicBlock(); 639 BasicBlock *CopyBB = 640 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 641 CopyBB->setName("polly.stmt." + BB->getName()); 642 Builder.SetInsertPoint(CopyBB->begin()); 643 644 // Create two maps that store the mapping from the original instructions of 645 // the old basic block to their copies in the new basic block. Those maps 646 // are basic block local. 647 // 648 // As vector code generation is supported there is one map for scalar values 649 // and one for vector values. 650 // 651 // In case we just do scalar code generation, the vectorMap is not used and 652 // the scalarMap has just one dimension, which contains the mapping. 653 // 654 // In case vector code generation is done, an instruction may either appear 655 // in the vector map once (as it is calculating >vectorwidth< values at a 656 // time. Or (if the values are calculated using scalar operations), it 657 // appears once in every dimension of the scalarMap. 658 VectorValueMapT ScalarBlockMap(getVectorWidth()); 659 ValueMapT VectorBlockMap; 660 661 for (Instruction &Inst : *BB) 662 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); 663 } 664 665 void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 666 LoopToScevMapT <S) { 667 assert(Stmt.isRegionStmt() && 668 "Only region statements can be copied by the block generator"); 669 670 // The region represented by the statement. 671 Region *R = Stmt.getRegion(); 672 673 // The "BBMap" for the whole region. 674 ValueMapT RegionMap; 675 676 // Iterate over all blocks in the region in a breadth-first search. 677 std::deque<BasicBlock *> Blocks; 678 SmallPtrSet<BasicBlock *, 8> SeenBlocks; 679 Blocks.push_back(R->getEntry()); 680 SeenBlocks.insert(R->getEntry()); 681 682 while (!Blocks.empty()) { 683 BasicBlock *BB = Blocks.front(); 684 Blocks.pop_front(); 685 686 // Copy the block with the BlockGenerator. 687 BasicBlock *BBCopy = copyBB(Stmt, BB, RegionMap, GlobalMap, LTS); 688 689 // And continue with new successors inside the region. 690 for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) 691 if (R->contains(*SI) && SeenBlocks.insert(*SI).second) 692 Blocks.push_back(*SI); 693 694 // In order to remap PHI nodes we store also basic block mappings. 695 RegionMap[BB] = BBCopy; 696 } 697 698 // Now create a new dedicated region exit block and add it to the region map. 699 BasicBlock *RegionExit = 700 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 701 RegionExit->setName("polly.stmt." + R->getExit()->getName() + ".pre"); 702 RegionMap[R->getExit()] = RegionExit; 703 704 // As the block generator doesn't handle control flow we need to add the 705 // region control flow by hand after all blocks have been copied. 706 for (BasicBlock *BB : SeenBlocks) { 707 708 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 709 710 BasicBlock *BBCopy = cast<BasicBlock>(RegionMap[BB]); 711 Instruction *BICopy = BBCopy->getTerminator(); 712 713 Builder.SetInsertPoint(BBCopy); 714 copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); 715 BICopy->eraseFromParent(); 716 } 717 718 // Reset the old insert point for the build. 719 Builder.SetInsertPoint(RegionExit->begin()); 720 } 721