1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/BlockGenerators.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/SCEVValidator.h" 21 #include "polly/Support/ScopHelper.h" 22 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/ScalarEvolution.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 #include "llvm/Support/CommandLine.h" 28 29 #include "isl/aff.h" 30 #include "isl/set.h" 31 32 using namespace llvm; 33 using namespace polly; 34 35 static cl::opt<bool> 36 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 37 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 38 cl::init(false), cl::ZeroOrMore); 39 40 static cl::opt<bool, true> 41 SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 42 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), 43 cl::ZeroOrMore); 44 45 bool polly::SCEVCodegen; 46 47 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 48 ScalarEvolution *SE, const Region *R) { 49 if (SCEVCodegen) { 50 if (!I || !SE->isSCEVable(I->getType())) 51 return false; 52 53 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 54 if (!isa<SCEVCouldNotCompute>(Scev)) 55 if (!hasScalarDepsInsideRegion(Scev, R)) 56 return true; 57 58 return false; 59 } 60 61 Loop *L = LI->getLoopFor(I->getParent()); 62 return L && I == L->getCanonicalInductionVariable(); 63 } 64 65 66 // Helper class to generate memory location. 67 namespace { 68 class IslGenerator { 69 public: 70 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) 71 : Builder(Builder), IVS(IVS) {} 72 Value *generateIslInt(__isl_take isl_int Int); 73 Value *generateIslAff(__isl_take isl_aff *Aff); 74 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 75 76 private: 77 typedef struct { 78 Value *Result; 79 class IslGenerator *Generator; 80 } IslGenInfo; 81 82 IRBuilder<> &Builder; 83 std::vector<Value *> &IVS; 84 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 85 void *User); 86 }; 87 } 88 89 Value *IslGenerator::generateIslInt(isl_int Int) { 90 mpz_t IntMPZ; 91 mpz_init(IntMPZ); 92 isl_int_get_gmp(Int, IntMPZ); 93 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 94 mpz_clear(IntMPZ); 95 return IntValue; 96 } 97 98 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 99 Value *Result; 100 Value *ConstValue; 101 isl_int ConstIsl; 102 103 isl_int_init(ConstIsl); 104 isl_aff_get_constant(Aff, &ConstIsl); 105 ConstValue = generateIslInt(ConstIsl); 106 Type *Ty = Builder.getInt64Ty(); 107 108 // FIXME: We should give the constant and coefficients the right type. Here 109 // we force it into i64. 110 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 111 112 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 113 114 assert((IVS.size() == NbInputDims) && 115 "The Dimension of Induction Variables must match the dimension of the " 116 "affine space."); 117 118 isl_int CoefficientIsl; 119 isl_int_init(CoefficientIsl); 120 121 for (unsigned int i = 0; i < NbInputDims; ++i) { 122 Value *CoefficientValue; 123 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 124 125 if (isl_int_is_zero(CoefficientIsl)) 126 continue; 127 128 CoefficientValue = generateIslInt(CoefficientIsl); 129 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 130 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 131 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 132 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 133 } 134 135 isl_int_clear(CoefficientIsl); 136 isl_int_clear(ConstIsl); 137 isl_aff_free(Aff); 138 139 return Result; 140 } 141 142 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 143 __isl_take isl_aff *Aff, void *User) { 144 IslGenInfo *GenInfo = (IslGenInfo *)User; 145 146 assert((GenInfo->Result == NULL) && 147 "Result is already set. Currently only single isl_aff is supported"); 148 assert(isl_set_plain_is_universe(Set) && 149 "Code generation failed because the set is not universe"); 150 151 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 152 153 isl_set_free(Set); 154 return 0; 155 } 156 157 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 158 IslGenInfo User; 159 User.Result = NULL; 160 User.Generator = this; 161 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 162 assert(User.Result && "Code generation for isl_pw_aff failed"); 163 164 isl_pw_aff_free(PwAff); 165 return User.Result; 166 } 167 168 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P) 169 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 170 } 171 172 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 173 ValueMapT &GlobalMap, LoopToScevMapT <S) { 174 // We assume constants never change. 175 // This avoids map lookups for many calls to this function. 176 if (isa<Constant>(Old)) 177 return const_cast<Value *>(Old); 178 179 if (GlobalMap.count(Old)) { 180 Value *New = GlobalMap[Old]; 181 182 if (Old->getType()->getScalarSizeInBits() < 183 New->getType()->getScalarSizeInBits()) 184 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 185 186 return New; 187 } 188 189 if (BBMap.count(Old)) { 190 return BBMap[Old]; 191 } 192 193 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 194 if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old))) 195 if (!isa<SCEVCouldNotCompute>(Scev)) { 196 const SCEV *NewScev = apply(Scev, LTS, SE); 197 ValueToValueMap VTV; 198 VTV.insert(BBMap.begin(), BBMap.end()); 199 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 200 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 201 SCEVExpander Expander(SE, "polly"); 202 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 203 Builder.GetInsertPoint()); 204 205 BBMap[Old] = Expanded; 206 return Expanded; 207 } 208 209 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) { 210 (void) Inst; 211 assert(!Statement.getParent()->getRegion().contains(Inst->getParent()) && 212 "unexpected scalar dependence in region"); 213 } 214 215 // Everything else is probably a scop-constant value defined as global, 216 // function parameter or an instruction not within the scop. 217 return const_cast<Value *>(Old); 218 } 219 220 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 221 ValueMapT &GlobalMap, LoopToScevMapT <S) { 222 Instruction *NewInst = Inst->clone(); 223 224 // Replace old operands with the new ones. 225 for (Instruction::const_op_iterator OI = Inst->op_begin(), 226 OE = Inst->op_end(); 227 OI != OE; ++OI) { 228 Value *OldOperand = *OI; 229 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap, LTS); 230 231 if (!NewOperand) { 232 assert(!isa<StoreInst>(NewInst) && 233 "Store instructions are always needed!"); 234 delete NewInst; 235 return; 236 } 237 238 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 239 } 240 241 Builder.Insert(NewInst); 242 BBMap[Inst] = NewInst; 243 244 if (!NewInst->getType()->isVoidTy()) 245 NewInst->setName("p_" + Inst->getName()); 246 } 247 248 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 249 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 250 ValueMapT &GlobalMap, LoopToScevMapT <S) { 251 252 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 253 "Only single dimensional access functions supported"); 254 255 std::vector<Value *> IVS; 256 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 257 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 258 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS); 259 IVS.push_back(NewIV); 260 } 261 262 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 263 IslGenerator IslGen(Builder, IVS); 264 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 265 266 Type *Ty = Builder.getInt64Ty(); 267 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 268 269 std::vector<Value *> IndexArray; 270 Value *NullValue = Constant::getNullValue(Ty); 271 IndexArray.push_back(NullValue); 272 IndexArray.push_back(OffsetValue); 273 return IndexArray; 274 } 275 276 Value *BlockGenerator::getNewAccessOperand( 277 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 278 ValueMapT &GlobalMap, LoopToScevMapT <S) { 279 std::vector<Value *> IndexArray = getMemoryAccessIndex( 280 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS); 281 Value *NewOperand = 282 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 283 return NewOperand; 284 } 285 286 Value *BlockGenerator::generateLocationAccessed( 287 const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap, 288 ValueMapT &GlobalMap, LoopToScevMapT <S) { 289 MemoryAccess &Access = Statement.getAccessFor(Inst); 290 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 291 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 292 293 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 294 "Current and new access function use different spaces"); 295 296 Value *NewPointer; 297 298 if (!NewAccessRelation) { 299 NewPointer = getNewValue(Pointer, BBMap, GlobalMap, LTS); 300 } else { 301 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 302 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 303 GlobalMap, LTS); 304 } 305 306 isl_map_free(CurrentAccessRelation); 307 isl_map_free(NewAccessRelation); 308 return NewPointer; 309 } 310 311 Value * 312 BlockGenerator::generateScalarLoad(const LoadInst *Load, ValueMapT &BBMap, 313 ValueMapT &GlobalMap, LoopToScevMapT <S) { 314 const Value *Pointer = Load->getPointerOperand(); 315 const Instruction *Inst = dyn_cast<Instruction>(Load); 316 Value *NewPointer = 317 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 318 Value *ScalarLoad = 319 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 320 return ScalarLoad; 321 } 322 323 Value * 324 BlockGenerator::generateScalarStore(const StoreInst *Store, ValueMapT &BBMap, 325 ValueMapT &GlobalMap, LoopToScevMapT <S) { 326 const Value *Pointer = Store->getPointerOperand(); 327 Value *NewPointer = 328 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 329 Value *ValueOperand = 330 getNewValue(Store->getValueOperand(), BBMap, GlobalMap, LTS); 331 332 return Builder.CreateStore(ValueOperand, NewPointer); 333 } 334 335 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 336 ValueMapT &GlobalMap, 337 LoopToScevMapT <S) { 338 // Terminator instructions control the control flow. They are explicitly 339 // expressed in the clast and do not need to be copied. 340 if (Inst->isTerminator()) 341 return; 342 343 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 344 &Statement.getParent()->getRegion())) 345 return; 346 347 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 348 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 349 return; 350 } 351 352 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 353 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap, LTS); 354 return; 355 } 356 357 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 358 } 359 360 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 361 BasicBlock *BB = Statement.getBasicBlock(); 362 BasicBlock *CopyBB = 363 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 364 CopyBB->setName("polly.stmt." + BB->getName()); 365 Builder.SetInsertPoint(CopyBB->begin()); 366 367 ValueMapT BBMap; 368 369 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 370 ++II) 371 copyInstruction(II, BBMap, GlobalMap, LTS); 372 } 373 374 VectorBlockGenerator::VectorBlockGenerator( 375 IRBuilder<> &B, VectorValueMapT &GlobalMaps, 376 std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt, 377 __isl_keep isl_map *Schedule, Pass *P) 378 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 379 Schedule(Schedule) { 380 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 381 assert(Schedule && "No statement domain provided"); 382 } 383 384 Value *VectorBlockGenerator::getVectorValue( 385 const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 386 if (VectorMap.count(Old)) 387 return VectorMap[Old]; 388 389 int Width = getVectorWidth(); 390 391 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 392 393 for (int Lane = 0; Lane < Width; Lane++) 394 Vector = Builder.CreateInsertElement( 395 Vector, 396 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane]), 397 Builder.getInt32(Lane)); 398 399 VectorMap[Old] = Vector; 400 401 return Vector; 402 } 403 404 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 405 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 406 assert(PointerTy && "PointerType expected"); 407 408 Type *ScalarType = PointerTy->getElementType(); 409 VectorType *VectorType = VectorType::get(ScalarType, Width); 410 411 return PointerType::getUnqual(VectorType); 412 } 413 414 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 415 ValueMapT &BBMap) { 416 const Value *Pointer = Load->getPointerOperand(); 417 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 418 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0]); 419 Value *VectorPtr = 420 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 421 LoadInst *VecLoad = 422 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 423 if (!Aligned) 424 VecLoad->setAlignment(8); 425 426 return VecLoad; 427 } 428 429 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 430 ValueMapT &BBMap) { 431 const Value *Pointer = Load->getPointerOperand(); 432 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 433 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0]); 434 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 435 Load->getName() + "_p_vec_p"); 436 LoadInst *ScalarLoad = 437 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 438 439 if (!Aligned) 440 ScalarLoad->setAlignment(8); 441 442 Constant *SplatVector = Constant::getNullValue( 443 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 444 445 Value *VectorLoad = Builder.CreateShuffleVector( 446 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 447 return VectorLoad; 448 } 449 450 Value *VectorBlockGenerator::generateUnknownStrideLoad( 451 const LoadInst *Load, VectorValueMapT &ScalarMaps) { 452 int VectorWidth = getVectorWidth(); 453 const Value *Pointer = Load->getPointerOperand(); 454 VectorType *VectorType = VectorType::get( 455 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 456 457 Value *Vector = UndefValue::get(VectorType); 458 459 for (int i = 0; i < VectorWidth; i++) { 460 Value *NewPointer = 461 getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 462 Value *ScalarLoad = 463 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 464 Vector = Builder.CreateInsertElement( 465 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 466 } 467 468 return Vector; 469 } 470 471 void VectorBlockGenerator::generateLoad( 472 const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 473 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 474 !VectorType::isValidElementType(Load->getType())) { 475 for (int i = 0; i < getVectorWidth(); i++) 476 ScalarMaps[i][Load] = 477 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 478 return; 479 } 480 481 MemoryAccess &Access = Statement.getAccessFor(Load); 482 483 Value *NewLoad; 484 if (Access.isStrideZero(isl_map_copy(Schedule))) 485 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 486 else if (Access.isStrideOne(isl_map_copy(Schedule))) 487 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 488 else 489 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 490 491 VectorMap[Load] = NewLoad; 492 } 493 494 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 495 ValueMapT &VectorMap, 496 VectorValueMapT &ScalarMaps) { 497 int VectorWidth = getVectorWidth(); 498 Value *NewOperand = 499 getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps); 500 501 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 502 503 const CastInst *Cast = dyn_cast<CastInst>(Inst); 504 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 505 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 506 } 507 508 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 509 ValueMapT &VectorMap, 510 VectorValueMapT &ScalarMaps) { 511 Value *OpZero = Inst->getOperand(0); 512 Value *OpOne = Inst->getOperand(1); 513 514 Value *NewOpZero, *NewOpOne; 515 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 516 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 517 518 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 519 Inst->getName() + "p_vec"); 520 VectorMap[Inst] = NewInst; 521 } 522 523 void VectorBlockGenerator::copyStore( 524 const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 525 int VectorWidth = getVectorWidth(); 526 527 MemoryAccess &Access = Statement.getAccessFor(Store); 528 529 const Value *Pointer = Store->getPointerOperand(); 530 Value *Vector = 531 getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps); 532 533 if (Access.isStrideOne(isl_map_copy(Schedule))) { 534 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 535 Value *NewPointer = 536 getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 537 538 Value *VectorPtr = 539 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 540 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 541 542 if (!Aligned) 543 Store->setAlignment(8); 544 } else { 545 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 546 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 547 Value *NewPointer = 548 getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 549 Builder.CreateStore(Scalar, NewPointer); 550 } 551 } 552 } 553 554 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 555 ValueMapT &VectorMap) { 556 for (Instruction::const_op_iterator OI = Inst->op_begin(), 557 OE = Inst->op_end(); 558 OI != OE; ++OI) 559 if (VectorMap.count(*OI)) 560 return true; 561 return false; 562 } 563 564 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 565 ValueMapT &VectorMap, 566 VectorValueMapT &ScalarMaps) { 567 bool HasVectorOperand = false; 568 int VectorWidth = getVectorWidth(); 569 570 for (Instruction::const_op_iterator OI = Inst->op_begin(), 571 OE = Inst->op_end(); 572 OI != OE; ++OI) { 573 ValueMapT::iterator VecOp = VectorMap.find(*OI); 574 575 if (VecOp == VectorMap.end()) 576 continue; 577 578 HasVectorOperand = true; 579 Value *NewVector = VecOp->second; 580 581 for (int i = 0; i < VectorWidth; ++i) { 582 ValueMapT &SM = ScalarMaps[i]; 583 584 // If there is one scalar extracted, all scalar elements should have 585 // already been extracted by the code here. So no need to check for the 586 // existance of all of them. 587 if (SM.count(*OI)) 588 break; 589 590 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 591 } 592 } 593 594 return HasVectorOperand; 595 } 596 597 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 598 ValueMapT &VectorMap, 599 VectorValueMapT &ScalarMaps) { 600 bool HasVectorOperand; 601 int VectorWidth = getVectorWidth(); 602 603 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 604 605 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 606 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 607 VLTS[VectorLane]); 608 609 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 610 return; 611 612 // Make the result available as vector value. 613 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 614 Value *Vector = UndefValue::get(VectorType); 615 616 for (int i = 0; i < VectorWidth; i++) 617 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 618 Builder.getInt32(i)); 619 620 VectorMap[Inst] = Vector; 621 } 622 623 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 624 625 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 626 ValueMapT &VectorMap, 627 VectorValueMapT &ScalarMaps) { 628 // Terminator instructions control the control flow. They are explicitly 629 // expressed in the clast and do not need to be copied. 630 if (Inst->isTerminator()) 631 return; 632 633 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 634 &Statement.getParent()->getRegion())) 635 return; 636 637 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 638 generateLoad(Load, VectorMap, ScalarMaps); 639 return; 640 } 641 642 if (hasVectorOperands(Inst, VectorMap)) { 643 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 644 copyStore(Store, VectorMap, ScalarMaps); 645 return; 646 } 647 648 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 649 copyUnaryInst(Unary, VectorMap, ScalarMaps); 650 return; 651 } 652 653 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 654 copyBinaryInst(Binary, VectorMap, ScalarMaps); 655 return; 656 } 657 658 // Falltrough: We generate scalar instructions, if we don't know how to 659 // generate vector code. 660 } 661 662 copyInstScalarized(Inst, VectorMap, ScalarMaps); 663 } 664 665 void VectorBlockGenerator::copyBB() { 666 BasicBlock *BB = Statement.getBasicBlock(); 667 BasicBlock *CopyBB = 668 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 669 CopyBB->setName("polly.stmt." + BB->getName()); 670 Builder.SetInsertPoint(CopyBB->begin()); 671 672 // Create two maps that store the mapping from the original instructions of 673 // the old basic block to their copies in the new basic block. Those maps 674 // are basic block local. 675 // 676 // As vector code generation is supported there is one map for scalar values 677 // and one for vector values. 678 // 679 // In case we just do scalar code generation, the vectorMap is not used and 680 // the scalarMap has just one dimension, which contains the mapping. 681 // 682 // In case vector code generation is done, an instruction may either appear 683 // in the vector map once (as it is calculating >vectorwidth< values at a 684 // time. Or (if the values are calculated using scalar operations), it 685 // appears once in every dimension of the scalarMap. 686 VectorValueMapT ScalarBlockMap(getVectorWidth()); 687 ValueMapT VectorBlockMap; 688 689 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 690 ++II) 691 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 692 } 693