1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/set.h" 19 #include "polly/CodeGen/BlockGenerators.h" 20 #include "polly/CodeGen/CodeGeneration.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 35 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 36 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 37 38 static cl::opt<bool, true> 39 SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 40 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), 41 cl::ZeroOrMore, cl::cat(PollyCategory)); 42 43 bool polly::SCEVCodegen; 44 45 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 46 ScalarEvolution *SE, const Region *R) { 47 if (SCEVCodegen) { 48 if (!I || !SE->isSCEVable(I->getType())) 49 return false; 50 51 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 52 if (!isa<SCEVCouldNotCompute>(Scev)) 53 if (!hasScalarDepsInsideRegion(Scev, R)) 54 return true; 55 56 return false; 57 } 58 59 Loop *L = LI->getLoopFor(I->getParent()); 60 return L && I == L->getCanonicalInductionVariable() && R->contains(L); 61 } 62 63 // Helper class to generate memory location. 64 namespace { 65 class IslGenerator { 66 public: 67 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) 68 : Builder(Builder), IVS(IVS) {} 69 Value *generateIslVal(__isl_take isl_val *Val); 70 Value *generateIslAff(__isl_take isl_aff *Aff); 71 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 72 73 private: 74 typedef struct { 75 Value *Result; 76 class IslGenerator *Generator; 77 } IslGenInfo; 78 79 IRBuilder<> &Builder; 80 std::vector<Value *> &IVS; 81 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 82 void *User); 83 }; 84 } 85 86 Value *IslGenerator::generateIslVal(__isl_take isl_val *Val) { 87 Value *IntValue = Builder.getInt(APIntFromVal(Val)); 88 return IntValue; 89 } 90 91 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 92 Value *Result; 93 Value *ConstValue; 94 isl_val *Val; 95 96 Val = isl_aff_get_constant_val(Aff); 97 ConstValue = generateIslVal(Val); 98 Type *Ty = Builder.getInt64Ty(); 99 100 // FIXME: We should give the constant and coefficients the right type. Here 101 // we force it into i64. 102 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 103 104 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 105 106 assert((IVS.size() == NbInputDims) && 107 "The Dimension of Induction Variables must match the dimension of the " 108 "affine space."); 109 110 for (unsigned int i = 0; i < NbInputDims; ++i) { 111 Value *CoefficientValue; 112 Val = isl_aff_get_coefficient_val(Aff, isl_dim_in, i); 113 114 if (isl_val_is_zero(Val)) { 115 isl_val_free(Val); 116 continue; 117 } 118 119 CoefficientValue = generateIslVal(Val); 120 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 121 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 122 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 123 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 124 } 125 126 isl_aff_free(Aff); 127 128 return Result; 129 } 130 131 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 132 __isl_take isl_aff *Aff, void *User) { 133 IslGenInfo *GenInfo = (IslGenInfo *)User; 134 135 assert((GenInfo->Result == NULL) && 136 "Result is already set. Currently only single isl_aff is supported"); 137 assert(isl_set_plain_is_universe(Set) && 138 "Code generation failed because the set is not universe"); 139 140 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 141 142 isl_set_free(Set); 143 return 0; 144 } 145 146 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 147 IslGenInfo User; 148 User.Result = NULL; 149 User.Generator = this; 150 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 151 assert(User.Result && "Code generation for isl_pw_aff failed"); 152 153 isl_pw_aff_free(PwAff); 154 return User.Result; 155 } 156 157 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P) 158 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 159 } 160 161 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap, 162 ValueMapT &GlobalMap) const { 163 // We assume constants never change. 164 // This avoids map lookups for many calls to this function. 165 if (isa<Constant>(Old)) 166 return const_cast<Value *>(Old); 167 168 if (Value *New = GlobalMap.lookup(Old)) { 169 if (Old->getType()->getScalarSizeInBits() < 170 New->getType()->getScalarSizeInBits()) 171 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 172 173 return New; 174 } 175 176 // Or it is probably a scop-constant value defined as global, function 177 // parameter or an instruction not within the scop. 178 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 179 return const_cast<Value *>(Old); 180 181 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 182 if (!Statement.getParent()->getRegion().contains(Inst->getParent())) 183 return const_cast<Value *>(Old); 184 185 if (Value *New = BBMap.lookup(Old)) 186 return New; 187 188 return NULL; 189 } 190 191 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 192 ValueMapT &GlobalMap, LoopToScevMapT <S, 193 Loop *L) { 194 if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap)) 195 return New; 196 197 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 198 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 199 if (!isa<SCEVCouldNotCompute>(Scev)) { 200 const SCEV *NewScev = apply(Scev, LTS, SE); 201 ValueToValueMap VTV; 202 VTV.insert(BBMap.begin(), BBMap.end()); 203 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 204 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 205 SCEVExpander Expander(SE, "polly"); 206 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 207 Builder.GetInsertPoint()); 208 209 BBMap[Old] = Expanded; 210 return Expanded; 211 } 212 } 213 214 // Now the scalar dependence is neither available nor SCEVCodegenable, this 215 // should never happen in the current code generator. 216 llvm_unreachable("Unexpected scalar dependence in region!"); 217 return NULL; 218 } 219 220 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 221 ValueMapT &GlobalMap, LoopToScevMapT <S) { 222 Instruction *NewInst = Inst->clone(); 223 224 // Replace old operands with the new ones. 225 for (Instruction::const_op_iterator OI = Inst->op_begin(), 226 OE = Inst->op_end(); 227 OI != OE; ++OI) { 228 Value *OldOperand = *OI; 229 Value *NewOperand = 230 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 231 232 if (!NewOperand) { 233 assert(!isa<StoreInst>(NewInst) && 234 "Store instructions are always needed!"); 235 delete NewInst; 236 return; 237 } 238 239 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 240 } 241 242 Builder.Insert(NewInst); 243 BBMap[Inst] = NewInst; 244 245 if (!NewInst->getType()->isVoidTy()) 246 NewInst->setName("p_" + Inst->getName()); 247 } 248 249 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 250 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 251 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 252 253 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 254 "Only single dimensional access functions supported"); 255 256 std::vector<Value *> IVS; 257 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 258 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 259 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 260 IVS.push_back(NewIV); 261 } 262 263 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 264 IslGenerator IslGen(Builder, IVS); 265 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 266 267 Type *Ty = Builder.getInt64Ty(); 268 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 269 270 std::vector<Value *> IndexArray; 271 Value *NullValue = Constant::getNullValue(Ty); 272 IndexArray.push_back(NullValue); 273 IndexArray.push_back(OffsetValue); 274 return IndexArray; 275 } 276 277 Value *BlockGenerator::getNewAccessOperand( 278 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 279 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 280 std::vector<Value *> IndexArray = getMemoryAccessIndex( 281 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 282 Value *NewOperand = 283 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 284 return NewOperand; 285 } 286 287 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 288 const Value *Pointer, 289 ValueMapT &BBMap, 290 ValueMapT &GlobalMap, 291 LoopToScevMapT <S) { 292 const MemoryAccess &Access = Statement.getAccessFor(Inst); 293 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 294 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 295 296 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 297 "Current and new access function use different spaces"); 298 299 Value *NewPointer; 300 301 if (!NewAccessRelation) { 302 NewPointer = 303 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 304 } else { 305 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 306 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 307 GlobalMap, LTS, getLoopForInst(Inst)); 308 } 309 310 isl_map_free(CurrentAccessRelation); 311 isl_map_free(NewAccessRelation); 312 return NewPointer; 313 } 314 315 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 316 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 317 } 318 319 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 320 ValueMapT &BBMap, 321 ValueMapT &GlobalMap, 322 LoopToScevMapT <S) { 323 const Value *Pointer = Load->getPointerOperand(); 324 const Instruction *Inst = dyn_cast<Instruction>(Load); 325 Value *NewPointer = 326 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 327 Value *ScalarLoad = 328 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 329 return ScalarLoad; 330 } 331 332 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 333 ValueMapT &BBMap, 334 ValueMapT &GlobalMap, 335 LoopToScevMapT <S) { 336 const Value *Pointer = Store->getPointerOperand(); 337 Value *NewPointer = 338 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 339 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 340 LTS, getLoopForInst(Store)); 341 342 return Builder.CreateStore(ValueOperand, NewPointer); 343 } 344 345 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 346 ValueMapT &GlobalMap, 347 LoopToScevMapT <S) { 348 // Terminator instructions control the control flow. They are explicitly 349 // expressed in the clast and do not need to be copied. 350 if (Inst->isTerminator()) 351 return; 352 353 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 354 &Statement.getParent()->getRegion())) 355 return; 356 357 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 358 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 359 // Compute NewLoad before its insertion in BBMap to make the insertion 360 // deterministic. 361 BBMap[Load] = NewLoad; 362 return; 363 } 364 365 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 366 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 367 // Compute NewStore before its insertion in BBMap to make the insertion 368 // deterministic. 369 BBMap[Store] = NewStore; 370 return; 371 } 372 373 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 374 } 375 376 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 377 BasicBlock *BB = Statement.getBasicBlock(); 378 BasicBlock *CopyBB = 379 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 380 CopyBB->setName("polly.stmt." + BB->getName()); 381 Builder.SetInsertPoint(CopyBB->begin()); 382 383 ValueMapT BBMap; 384 385 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 386 ++II) 387 copyInstruction(II, BBMap, GlobalMap, LTS); 388 } 389 390 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 391 VectorValueMapT &GlobalMaps, 392 std::vector<LoopToScevMapT> &VLTS, 393 ScopStmt &Stmt, 394 __isl_keep isl_map *Schedule, 395 Pass *P) 396 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 397 Schedule(Schedule) { 398 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 399 assert(Schedule && "No statement domain provided"); 400 } 401 402 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 403 ValueMapT &VectorMap, 404 VectorValueMapT &ScalarMaps, 405 Loop *L) { 406 if (Value *NewValue = VectorMap.lookup(Old)) 407 return NewValue; 408 409 int Width = getVectorWidth(); 410 411 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 412 413 for (int Lane = 0; Lane < Width; Lane++) 414 Vector = Builder.CreateInsertElement( 415 Vector, 416 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 417 Builder.getInt32(Lane)); 418 419 VectorMap[Old] = Vector; 420 421 return Vector; 422 } 423 424 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 425 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 426 assert(PointerTy && "PointerType expected"); 427 428 Type *ScalarType = PointerTy->getElementType(); 429 VectorType *VectorType = VectorType::get(ScalarType, Width); 430 431 return PointerType::getUnqual(VectorType); 432 } 433 434 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 435 ValueMapT &BBMap) { 436 const Value *Pointer = Load->getPointerOperand(); 437 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 438 Value *NewPointer = 439 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 440 Value *VectorPtr = 441 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 442 LoadInst *VecLoad = 443 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 444 if (!Aligned) 445 VecLoad->setAlignment(8); 446 447 return VecLoad; 448 } 449 450 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 451 ValueMapT &BBMap) { 452 const Value *Pointer = Load->getPointerOperand(); 453 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 454 Value *NewPointer = 455 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 456 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 457 Load->getName() + "_p_vec_p"); 458 LoadInst *ScalarLoad = 459 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 460 461 if (!Aligned) 462 ScalarLoad->setAlignment(8); 463 464 Constant *SplatVector = Constant::getNullValue( 465 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 466 467 Value *VectorLoad = Builder.CreateShuffleVector( 468 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 469 return VectorLoad; 470 } 471 472 Value * 473 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 474 VectorValueMapT &ScalarMaps) { 475 int VectorWidth = getVectorWidth(); 476 const Value *Pointer = Load->getPointerOperand(); 477 VectorType *VectorType = VectorType::get( 478 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 479 480 Value *Vector = UndefValue::get(VectorType); 481 482 for (int i = 0; i < VectorWidth; i++) { 483 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 484 VLTS[i], getLoopForInst(Load)); 485 Value *ScalarLoad = 486 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 487 Vector = Builder.CreateInsertElement( 488 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 489 } 490 491 return Vector; 492 } 493 494 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 495 ValueMapT &VectorMap, 496 VectorValueMapT &ScalarMaps) { 497 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 498 !VectorType::isValidElementType(Load->getType())) { 499 for (int i = 0; i < getVectorWidth(); i++) 500 ScalarMaps[i][Load] = 501 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 502 return; 503 } 504 505 const MemoryAccess &Access = Statement.getAccessFor(Load); 506 507 Value *NewLoad; 508 if (Access.isStrideZero(isl_map_copy(Schedule))) 509 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 510 else if (Access.isStrideOne(isl_map_copy(Schedule))) 511 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 512 else 513 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 514 515 VectorMap[Load] = NewLoad; 516 } 517 518 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 519 ValueMapT &VectorMap, 520 VectorValueMapT &ScalarMaps) { 521 int VectorWidth = getVectorWidth(); 522 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 523 getLoopForInst(Inst)); 524 525 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 526 527 const CastInst *Cast = dyn_cast<CastInst>(Inst); 528 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 529 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 530 } 531 532 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 533 ValueMapT &VectorMap, 534 VectorValueMapT &ScalarMaps) { 535 Loop *L = getLoopForInst(Inst); 536 Value *OpZero = Inst->getOperand(0); 537 Value *OpOne = Inst->getOperand(1); 538 539 Value *NewOpZero, *NewOpOne; 540 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 541 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 542 543 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 544 Inst->getName() + "p_vec"); 545 VectorMap[Inst] = NewInst; 546 } 547 548 void VectorBlockGenerator::copyStore(const StoreInst *Store, 549 ValueMapT &VectorMap, 550 VectorValueMapT &ScalarMaps) { 551 int VectorWidth = getVectorWidth(); 552 553 const MemoryAccess &Access = Statement.getAccessFor(Store); 554 555 const Value *Pointer = Store->getPointerOperand(); 556 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 557 ScalarMaps, getLoopForInst(Store)); 558 559 if (Access.isStrideOne(isl_map_copy(Schedule))) { 560 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 561 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 562 VLTS[0], getLoopForInst(Store)); 563 564 Value *VectorPtr = 565 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 566 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 567 568 if (!Aligned) 569 Store->setAlignment(8); 570 } else { 571 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 572 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 573 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 574 VLTS[i], getLoopForInst(Store)); 575 Builder.CreateStore(Scalar, NewPointer); 576 } 577 } 578 } 579 580 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 581 ValueMapT &VectorMap) { 582 for (Instruction::const_op_iterator OI = Inst->op_begin(), 583 OE = Inst->op_end(); 584 OI != OE; ++OI) 585 if (VectorMap.count(*OI)) 586 return true; 587 return false; 588 } 589 590 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 591 ValueMapT &VectorMap, 592 VectorValueMapT &ScalarMaps) { 593 bool HasVectorOperand = false; 594 int VectorWidth = getVectorWidth(); 595 596 for (Instruction::const_op_iterator OI = Inst->op_begin(), 597 OE = Inst->op_end(); 598 OI != OE; ++OI) { 599 ValueMapT::iterator VecOp = VectorMap.find(*OI); 600 601 if (VecOp == VectorMap.end()) 602 continue; 603 604 HasVectorOperand = true; 605 Value *NewVector = VecOp->second; 606 607 for (int i = 0; i < VectorWidth; ++i) { 608 ValueMapT &SM = ScalarMaps[i]; 609 610 // If there is one scalar extracted, all scalar elements should have 611 // already been extracted by the code here. So no need to check for the 612 // existance of all of them. 613 if (SM.count(*OI)) 614 break; 615 616 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 617 } 618 } 619 620 return HasVectorOperand; 621 } 622 623 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 624 ValueMapT &VectorMap, 625 VectorValueMapT &ScalarMaps) { 626 bool HasVectorOperand; 627 int VectorWidth = getVectorWidth(); 628 629 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 630 631 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 632 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 633 VLTS[VectorLane]); 634 635 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 636 return; 637 638 // Make the result available as vector value. 639 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 640 Value *Vector = UndefValue::get(VectorType); 641 642 for (int i = 0; i < VectorWidth; i++) 643 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 644 Builder.getInt32(i)); 645 646 VectorMap[Inst] = Vector; 647 } 648 649 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 650 651 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 652 ValueMapT &VectorMap, 653 VectorValueMapT &ScalarMaps) { 654 // Terminator instructions control the control flow. They are explicitly 655 // expressed in the clast and do not need to be copied. 656 if (Inst->isTerminator()) 657 return; 658 659 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 660 &Statement.getParent()->getRegion())) 661 return; 662 663 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 664 generateLoad(Load, VectorMap, ScalarMaps); 665 return; 666 } 667 668 if (hasVectorOperands(Inst, VectorMap)) { 669 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 670 copyStore(Store, VectorMap, ScalarMaps); 671 return; 672 } 673 674 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 675 copyUnaryInst(Unary, VectorMap, ScalarMaps); 676 return; 677 } 678 679 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 680 copyBinaryInst(Binary, VectorMap, ScalarMaps); 681 return; 682 } 683 684 // Falltrough: We generate scalar instructions, if we don't know how to 685 // generate vector code. 686 } 687 688 copyInstScalarized(Inst, VectorMap, ScalarMaps); 689 } 690 691 void VectorBlockGenerator::copyBB() { 692 BasicBlock *BB = Statement.getBasicBlock(); 693 BasicBlock *CopyBB = 694 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 695 CopyBB->setName("polly.stmt." + BB->getName()); 696 Builder.SetInsertPoint(CopyBB->begin()); 697 698 // Create two maps that store the mapping from the original instructions of 699 // the old basic block to their copies in the new basic block. Those maps 700 // are basic block local. 701 // 702 // As vector code generation is supported there is one map for scalar values 703 // and one for vector values. 704 // 705 // In case we just do scalar code generation, the vectorMap is not used and 706 // the scalarMap has just one dimension, which contains the mapping. 707 // 708 // In case vector code generation is done, an instruction may either appear 709 // in the vector map once (as it is calculating >vectorwidth< values at a 710 // time. Or (if the values are calculated using scalar operations), it 711 // appears once in every dimension of the scalarMap. 712 VectorValueMapT ScalarBlockMap(getVectorWidth()); 713 ValueMapT VectorBlockMap; 714 715 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 716 ++II) 717 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 718 } 719