1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/set.h" 19 #include "polly/CodeGen/BlockGenerators.h" 20 #include "polly/CodeGen/CodeGeneration.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 31 using namespace llvm; 32 using namespace polly; 33 34 static cl::opt<bool> 35 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 36 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 37 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 38 39 static cl::opt<bool, true> 40 SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 41 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), 42 cl::ZeroOrMore, cl::cat(PollyCategory)); 43 44 bool polly::SCEVCodegen; 45 46 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 47 ScalarEvolution *SE, const Region *R) { 48 if (SCEVCodegen) { 49 if (!I || !SE->isSCEVable(I->getType())) 50 return false; 51 52 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 53 if (!isa<SCEVCouldNotCompute>(Scev)) 54 if (!hasScalarDepsInsideRegion(Scev, R)) 55 return true; 56 57 return false; 58 } 59 60 Loop *L = LI->getLoopFor(I->getParent()); 61 return L && I == L->getCanonicalInductionVariable() && R->contains(L); 62 } 63 64 // Helper class to generate memory location. 65 namespace { 66 class IslGenerator { 67 public: 68 IslGenerator(PollyIRBuilder &Builder, std::vector<Value *> &IVS) 69 : Builder(Builder), IVS(IVS) {} 70 Value *generateIslVal(__isl_take isl_val *Val); 71 Value *generateIslAff(__isl_take isl_aff *Aff); 72 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 73 74 private: 75 typedef struct { 76 Value *Result; 77 class IslGenerator *Generator; 78 } IslGenInfo; 79 80 PollyIRBuilder &Builder; 81 std::vector<Value *> &IVS; 82 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 83 void *User); 84 }; 85 } 86 87 Value *IslGenerator::generateIslVal(__isl_take isl_val *Val) { 88 Value *IntValue = Builder.getInt(APIntFromVal(Val)); 89 return IntValue; 90 } 91 92 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 93 Value *Result; 94 Value *ConstValue; 95 isl_val *Val; 96 97 Val = isl_aff_get_constant_val(Aff); 98 ConstValue = generateIslVal(Val); 99 Type *Ty = Builder.getInt64Ty(); 100 101 // FIXME: We should give the constant and coefficients the right type. Here 102 // we force it into i64. 103 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 104 105 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 106 107 assert((IVS.size() == NbInputDims) && 108 "The Dimension of Induction Variables must match the dimension of the " 109 "affine space."); 110 111 for (unsigned int i = 0; i < NbInputDims; ++i) { 112 Value *CoefficientValue; 113 Val = isl_aff_get_coefficient_val(Aff, isl_dim_in, i); 114 115 if (isl_val_is_zero(Val)) { 116 isl_val_free(Val); 117 continue; 118 } 119 120 CoefficientValue = generateIslVal(Val); 121 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 122 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 123 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 124 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 125 } 126 127 isl_aff_free(Aff); 128 129 return Result; 130 } 131 132 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 133 __isl_take isl_aff *Aff, void *User) { 134 IslGenInfo *GenInfo = (IslGenInfo *)User; 135 136 assert((GenInfo->Result == nullptr) && 137 "Result is already set. Currently only single isl_aff is supported"); 138 assert(isl_set_plain_is_universe(Set) && 139 "Code generation failed because the set is not universe"); 140 141 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 142 143 isl_set_free(Set); 144 return 0; 145 } 146 147 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 148 IslGenInfo User; 149 User.Result = nullptr; 150 User.Generator = this; 151 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 152 assert(User.Result && "Code generation for isl_pw_aff failed"); 153 154 isl_pw_aff_free(PwAff); 155 return User.Result; 156 } 157 158 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P) 159 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 160 } 161 162 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap, 163 ValueMapT &GlobalMap) const { 164 // We assume constants never change. 165 // This avoids map lookups for many calls to this function. 166 if (isa<Constant>(Old)) 167 return const_cast<Value *>(Old); 168 169 if (Value *New = GlobalMap.lookup(Old)) { 170 if (Old->getType()->getScalarSizeInBits() < 171 New->getType()->getScalarSizeInBits()) 172 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 173 174 return New; 175 } 176 177 // Or it is probably a scop-constant value defined as global, function 178 // parameter or an instruction not within the scop. 179 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 180 return const_cast<Value *>(Old); 181 182 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 183 if (!Statement.getParent()->getRegion().contains(Inst->getParent())) 184 return const_cast<Value *>(Old); 185 186 if (Value *New = BBMap.lookup(Old)) 187 return New; 188 189 return nullptr; 190 } 191 192 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 193 ValueMapT &GlobalMap, LoopToScevMapT <S, 194 Loop *L) { 195 if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap)) 196 return New; 197 198 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 199 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 200 if (!isa<SCEVCouldNotCompute>(Scev)) { 201 const SCEV *NewScev = apply(Scev, LTS, SE); 202 ValueToValueMap VTV; 203 VTV.insert(BBMap.begin(), BBMap.end()); 204 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 205 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 206 SCEVExpander Expander(SE, "polly"); 207 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 208 Builder.GetInsertPoint()); 209 210 BBMap[Old] = Expanded; 211 return Expanded; 212 } 213 } 214 215 // Now the scalar dependence is neither available nor SCEVCodegenable, this 216 // should never happen in the current code generator. 217 llvm_unreachable("Unexpected scalar dependence in region!"); 218 return nullptr; 219 } 220 221 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 222 ValueMapT &GlobalMap, LoopToScevMapT <S) { 223 // We do not generate debug intrinsics as we did not investigate how to 224 // copy them correctly. At the current state, they just crash the code 225 // generation as the meta-data operands are not correctly copied. 226 if (isa<DbgInfoIntrinsic>(Inst)) 227 return; 228 229 Instruction *NewInst = Inst->clone(); 230 231 // Replace old operands with the new ones. 232 for (Instruction::const_op_iterator OI = Inst->op_begin(), 233 OE = Inst->op_end(); 234 OI != OE; ++OI) { 235 Value *OldOperand = *OI; 236 Value *NewOperand = 237 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 238 239 if (!NewOperand) { 240 assert(!isa<StoreInst>(NewInst) && 241 "Store instructions are always needed!"); 242 delete NewInst; 243 return; 244 } 245 246 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 247 } 248 249 Builder.Insert(NewInst); 250 BBMap[Inst] = NewInst; 251 252 if (!NewInst->getType()->isVoidTy()) 253 NewInst->setName("p_" + Inst->getName()); 254 } 255 256 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 257 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 258 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 259 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 260 "Only single dimensional access functions supported"); 261 262 std::vector<Value *> IVS; 263 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 264 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 265 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 266 IVS.push_back(NewIV); 267 } 268 269 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 270 IslGenerator IslGen(Builder, IVS); 271 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 272 273 Type *Ty = Builder.getInt64Ty(); 274 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 275 276 std::vector<Value *> IndexArray; 277 Value *NullValue = Constant::getNullValue(Ty); 278 IndexArray.push_back(NullValue); 279 IndexArray.push_back(OffsetValue); 280 return IndexArray; 281 } 282 283 Value *BlockGenerator::getNewAccessOperand( 284 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 285 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 286 std::vector<Value *> IndexArray = getMemoryAccessIndex( 287 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 288 Value *NewOperand = 289 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 290 return NewOperand; 291 } 292 293 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 294 const Value *Pointer, 295 ValueMapT &BBMap, 296 ValueMapT &GlobalMap, 297 LoopToScevMapT <S) { 298 const MemoryAccess &Access = Statement.getAccessFor(Inst); 299 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 300 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 301 302 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 303 "Current and new access function use different spaces"); 304 305 Value *NewPointer; 306 307 if (!NewAccessRelation) { 308 NewPointer = 309 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 310 } else { 311 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 312 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 313 GlobalMap, LTS, getLoopForInst(Inst)); 314 } 315 316 isl_map_free(CurrentAccessRelation); 317 isl_map_free(NewAccessRelation); 318 return NewPointer; 319 } 320 321 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 322 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 323 } 324 325 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 326 ValueMapT &BBMap, 327 ValueMapT &GlobalMap, 328 LoopToScevMapT <S) { 329 const Value *Pointer = Load->getPointerOperand(); 330 const Instruction *Inst = dyn_cast<Instruction>(Load); 331 Value *NewPointer = 332 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 333 Value *ScalarLoad = 334 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 335 return ScalarLoad; 336 } 337 338 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 339 ValueMapT &BBMap, 340 ValueMapT &GlobalMap, 341 LoopToScevMapT <S) { 342 const Value *Pointer = Store->getPointerOperand(); 343 Value *NewPointer = 344 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 345 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 346 LTS, getLoopForInst(Store)); 347 348 return Builder.CreateStore(ValueOperand, NewPointer); 349 } 350 351 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 352 ValueMapT &GlobalMap, 353 LoopToScevMapT <S) { 354 // Terminator instructions control the control flow. They are explicitly 355 // expressed in the clast and do not need to be copied. 356 if (Inst->isTerminator()) 357 return; 358 359 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 360 &Statement.getParent()->getRegion())) 361 return; 362 363 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 364 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 365 // Compute NewLoad before its insertion in BBMap to make the insertion 366 // deterministic. 367 BBMap[Load] = NewLoad; 368 return; 369 } 370 371 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 372 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 373 // Compute NewStore before its insertion in BBMap to make the insertion 374 // deterministic. 375 BBMap[Store] = NewStore; 376 return; 377 } 378 379 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 380 } 381 382 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 383 BasicBlock *BB = Statement.getBasicBlock(); 384 BasicBlock *CopyBB = 385 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 386 CopyBB->setName("polly.stmt." + BB->getName()); 387 Builder.SetInsertPoint(CopyBB->begin()); 388 389 ValueMapT BBMap; 390 391 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 392 ++II) 393 copyInstruction(II, BBMap, GlobalMap, LTS); 394 } 395 396 VectorBlockGenerator::VectorBlockGenerator(PollyIRBuilder &B, 397 VectorValueMapT &GlobalMaps, 398 std::vector<LoopToScevMapT> &VLTS, 399 ScopStmt &Stmt, 400 __isl_keep isl_map *Schedule, 401 Pass *P) 402 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 403 Schedule(Schedule) { 404 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 405 assert(Schedule && "No statement domain provided"); 406 } 407 408 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 409 ValueMapT &VectorMap, 410 VectorValueMapT &ScalarMaps, 411 Loop *L) { 412 if (Value *NewValue = VectorMap.lookup(Old)) 413 return NewValue; 414 415 int Width = getVectorWidth(); 416 417 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 418 419 for (int Lane = 0; Lane < Width; Lane++) 420 Vector = Builder.CreateInsertElement( 421 Vector, 422 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 423 Builder.getInt32(Lane)); 424 425 VectorMap[Old] = Vector; 426 427 return Vector; 428 } 429 430 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 431 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 432 assert(PointerTy && "PointerType expected"); 433 434 Type *ScalarType = PointerTy->getElementType(); 435 VectorType *VectorType = VectorType::get(ScalarType, Width); 436 437 return PointerType::getUnqual(VectorType); 438 } 439 440 Value * 441 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 442 VectorValueMapT &ScalarMaps, 443 bool NegativeStride = false) { 444 unsigned VectorWidth = getVectorWidth(); 445 const Value *Pointer = Load->getPointerOperand(); 446 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 447 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 448 449 Value *NewPointer = nullptr; 450 NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset], 451 VLTS[Offset], getLoopForInst(Load)); 452 Value *VectorPtr = 453 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 454 LoadInst *VecLoad = 455 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 456 if (!Aligned) 457 VecLoad->setAlignment(8); 458 459 if (NegativeStride) { 460 SmallVector<Constant *, 16> Indices; 461 for (int i = VectorWidth - 1; i >= 0; i--) 462 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 463 Constant *SV = llvm::ConstantVector::get(Indices); 464 Value *RevVecLoad = Builder.CreateShuffleVector( 465 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 466 return RevVecLoad; 467 } 468 469 return VecLoad; 470 } 471 472 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 473 ValueMapT &BBMap) { 474 const Value *Pointer = Load->getPointerOperand(); 475 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 476 Value *NewPointer = 477 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 478 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 479 Load->getName() + "_p_vec_p"); 480 LoadInst *ScalarLoad = 481 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 482 483 if (!Aligned) 484 ScalarLoad->setAlignment(8); 485 486 Constant *SplatVector = Constant::getNullValue( 487 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 488 489 Value *VectorLoad = Builder.CreateShuffleVector( 490 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 491 return VectorLoad; 492 } 493 494 Value * 495 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 496 VectorValueMapT &ScalarMaps) { 497 int VectorWidth = getVectorWidth(); 498 const Value *Pointer = Load->getPointerOperand(); 499 VectorType *VectorType = VectorType::get( 500 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 501 502 Value *Vector = UndefValue::get(VectorType); 503 504 for (int i = 0; i < VectorWidth; i++) { 505 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 506 VLTS[i], getLoopForInst(Load)); 507 Value *ScalarLoad = 508 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 509 Vector = Builder.CreateInsertElement( 510 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 511 } 512 513 return Vector; 514 } 515 516 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 517 ValueMapT &VectorMap, 518 VectorValueMapT &ScalarMaps) { 519 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 520 !VectorType::isValidElementType(Load->getType())) { 521 for (int i = 0; i < getVectorWidth(); i++) 522 ScalarMaps[i][Load] = 523 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 524 return; 525 } 526 527 const MemoryAccess &Access = Statement.getAccessFor(Load); 528 529 // Make sure we have scalar values available to access the pointer to 530 // the data location. 531 extractScalarValues(Load, VectorMap, ScalarMaps); 532 533 Value *NewLoad; 534 if (Access.isStrideZero(isl_map_copy(Schedule))) 535 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 536 else if (Access.isStrideOne(isl_map_copy(Schedule))) 537 NewLoad = generateStrideOneLoad(Load, ScalarMaps); 538 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 539 NewLoad = generateStrideOneLoad(Load, ScalarMaps, true); 540 else 541 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 542 543 VectorMap[Load] = NewLoad; 544 } 545 546 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 547 ValueMapT &VectorMap, 548 VectorValueMapT &ScalarMaps) { 549 int VectorWidth = getVectorWidth(); 550 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 551 getLoopForInst(Inst)); 552 553 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 554 555 const CastInst *Cast = dyn_cast<CastInst>(Inst); 556 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 557 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 558 } 559 560 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 561 ValueMapT &VectorMap, 562 VectorValueMapT &ScalarMaps) { 563 Loop *L = getLoopForInst(Inst); 564 Value *OpZero = Inst->getOperand(0); 565 Value *OpOne = Inst->getOperand(1); 566 567 Value *NewOpZero, *NewOpOne; 568 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 569 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 570 571 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 572 Inst->getName() + "p_vec"); 573 VectorMap[Inst] = NewInst; 574 } 575 576 void VectorBlockGenerator::copyStore(const StoreInst *Store, 577 ValueMapT &VectorMap, 578 VectorValueMapT &ScalarMaps) { 579 int VectorWidth = getVectorWidth(); 580 581 const MemoryAccess &Access = Statement.getAccessFor(Store); 582 583 const Value *Pointer = Store->getPointerOperand(); 584 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 585 ScalarMaps, getLoopForInst(Store)); 586 587 // Make sure we have scalar values available to access the pointer to 588 // the data location. 589 extractScalarValues(Store, VectorMap, ScalarMaps); 590 591 if (Access.isStrideOne(isl_map_copy(Schedule))) { 592 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 593 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 594 VLTS[0], getLoopForInst(Store)); 595 596 Value *VectorPtr = 597 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 598 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 599 600 if (!Aligned) 601 Store->setAlignment(8); 602 } else { 603 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 604 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 605 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 606 VLTS[i], getLoopForInst(Store)); 607 Builder.CreateStore(Scalar, NewPointer); 608 } 609 } 610 } 611 612 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 613 ValueMapT &VectorMap) { 614 for (Instruction::const_op_iterator OI = Inst->op_begin(), 615 OE = Inst->op_end(); 616 OI != OE; ++OI) 617 if (VectorMap.count(*OI)) 618 return true; 619 return false; 620 } 621 622 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 623 ValueMapT &VectorMap, 624 VectorValueMapT &ScalarMaps) { 625 bool HasVectorOperand = false; 626 int VectorWidth = getVectorWidth(); 627 628 for (Instruction::const_op_iterator OI = Inst->op_begin(), 629 OE = Inst->op_end(); 630 OI != OE; ++OI) { 631 ValueMapT::iterator VecOp = VectorMap.find(*OI); 632 633 if (VecOp == VectorMap.end()) 634 continue; 635 636 HasVectorOperand = true; 637 Value *NewVector = VecOp->second; 638 639 for (int i = 0; i < VectorWidth; ++i) { 640 ValueMapT &SM = ScalarMaps[i]; 641 642 // If there is one scalar extracted, all scalar elements should have 643 // already been extracted by the code here. So no need to check for the 644 // existance of all of them. 645 if (SM.count(*OI)) 646 break; 647 648 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 649 } 650 } 651 652 return HasVectorOperand; 653 } 654 655 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 656 ValueMapT &VectorMap, 657 VectorValueMapT &ScalarMaps) { 658 bool HasVectorOperand; 659 int VectorWidth = getVectorWidth(); 660 661 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 662 663 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 664 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 665 VLTS[VectorLane]); 666 667 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 668 return; 669 670 // Make the result available as vector value. 671 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 672 Value *Vector = UndefValue::get(VectorType); 673 674 for (int i = 0; i < VectorWidth; i++) 675 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 676 Builder.getInt32(i)); 677 678 VectorMap[Inst] = Vector; 679 } 680 681 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 682 683 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 684 ValueMapT &VectorMap, 685 VectorValueMapT &ScalarMaps) { 686 // Terminator instructions control the control flow. They are explicitly 687 // expressed in the clast and do not need to be copied. 688 if (Inst->isTerminator()) 689 return; 690 691 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 692 &Statement.getParent()->getRegion())) 693 return; 694 695 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 696 generateLoad(Load, VectorMap, ScalarMaps); 697 return; 698 } 699 700 if (hasVectorOperands(Inst, VectorMap)) { 701 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 702 copyStore(Store, VectorMap, ScalarMaps); 703 return; 704 } 705 706 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 707 copyUnaryInst(Unary, VectorMap, ScalarMaps); 708 return; 709 } 710 711 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 712 copyBinaryInst(Binary, VectorMap, ScalarMaps); 713 return; 714 } 715 716 // Falltrough: We generate scalar instructions, if we don't know how to 717 // generate vector code. 718 } 719 720 copyInstScalarized(Inst, VectorMap, ScalarMaps); 721 } 722 723 void VectorBlockGenerator::copyBB() { 724 BasicBlock *BB = Statement.getBasicBlock(); 725 BasicBlock *CopyBB = 726 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 727 CopyBB->setName("polly.stmt." + BB->getName()); 728 Builder.SetInsertPoint(CopyBB->begin()); 729 730 // Create two maps that store the mapping from the original instructions of 731 // the old basic block to their copies in the new basic block. Those maps 732 // are basic block local. 733 // 734 // As vector code generation is supported there is one map for scalar values 735 // and one for vector values. 736 // 737 // In case we just do scalar code generation, the vectorMap is not used and 738 // the scalarMap has just one dimension, which contains the mapping. 739 // 740 // In case vector code generation is done, an instruction may either appear 741 // in the vector map once (as it is calculating >vectorwidth< values at a 742 // time. Or (if the values are calculated using scalar operations), it 743 // appears once in every dimension of the scalarMap. 744 VectorValueMapT ScalarBlockMap(getVectorWidth()); 745 ValueMapT VectorBlockMap; 746 747 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 748 ++II) 749 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 750 } 751