1 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass converts vector operations into scalar operations, in order 11 // to expose optimization opportunities on the individual scalar operations. 12 // It is mainly intended for targets that do not have vector units, but it 13 // may also be useful for revectorizing code to different vector widths. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/Argument.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/InstVisitor.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/Options.h" 40 #include "llvm/Transforms/Scalar.h" 41 #include <cassert> 42 #include <cstdint> 43 #include <iterator> 44 #include <map> 45 #include <utility> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "scalarizer" 50 51 namespace { 52 53 // Used to store the scattered form of a vector. 54 using ValueVector = SmallVector<Value *, 8>; 55 56 // Used to map a vector Value to its scattered form. We use std::map 57 // because we want iterators to persist across insertion and because the 58 // values are relatively large. 59 using ScatterMap = std::map<Value *, ValueVector>; 60 61 // Lists Instructions that have been replaced with scalar implementations, 62 // along with a pointer to their scattered forms. 63 using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; 64 65 // Provides a very limited vector-like interface for lazily accessing one 66 // component of a scattered vector or vector pointer. 67 class Scatterer { 68 public: 69 Scatterer() = default; 70 71 // Scatter V into Size components. If new instructions are needed, 72 // insert them before BBI in BB. If Cache is nonnull, use it to cache 73 // the results. 74 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 75 ValueVector *cachePtr = nullptr); 76 77 // Return component I, creating a new Value for it if necessary. 78 Value *operator[](unsigned I); 79 80 // Return the number of components. 81 unsigned size() const { return Size; } 82 83 private: 84 BasicBlock *BB; 85 BasicBlock::iterator BBI; 86 Value *V; 87 ValueVector *CachePtr; 88 PointerType *PtrTy; 89 ValueVector Tmp; 90 unsigned Size; 91 }; 92 93 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp 94 // called Name that compares X and Y in the same way as FCI. 95 struct FCmpSplitter { 96 FCmpSplitter(FCmpInst &fci) : FCI(fci) {} 97 98 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 99 const Twine &Name) const { 100 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); 101 } 102 103 FCmpInst &FCI; 104 }; 105 106 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp 107 // called Name that compares X and Y in the same way as ICI. 108 struct ICmpSplitter { 109 ICmpSplitter(ICmpInst &ici) : ICI(ici) {} 110 111 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 112 const Twine &Name) const { 113 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); 114 } 115 116 ICmpInst &ICI; 117 }; 118 119 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create 120 // a binary operator like BO called Name with operands X and Y. 121 struct BinarySplitter { 122 BinarySplitter(BinaryOperator &bo) : BO(bo) {} 123 124 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, 125 const Twine &Name) const { 126 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); 127 } 128 129 BinaryOperator &BO; 130 }; 131 132 // Information about a load or store that we're scalarizing. 133 struct VectorLayout { 134 VectorLayout() = default; 135 136 // Return the alignment of element I. 137 uint64_t getElemAlign(unsigned I) { 138 return MinAlign(VecAlign, I * ElemSize); 139 } 140 141 // The type of the vector. 142 VectorType *VecTy = nullptr; 143 144 // The type of each element. 145 Type *ElemTy = nullptr; 146 147 // The alignment of the vector. 148 uint64_t VecAlign = 0; 149 150 // The size of each element. 151 uint64_t ElemSize = 0; 152 }; 153 154 class Scalarizer : public FunctionPass, 155 public InstVisitor<Scalarizer, bool> { 156 public: 157 static char ID; 158 159 Scalarizer() : FunctionPass(ID) { 160 initializeScalarizerPass(*PassRegistry::getPassRegistry()); 161 } 162 163 bool doInitialization(Module &M) override; 164 bool runOnFunction(Function &F) override; 165 166 // InstVisitor methods. They return true if the instruction was scalarized, 167 // false if nothing changed. 168 bool visitInstruction(Instruction &I) { return false; } 169 bool visitSelectInst(SelectInst &SI); 170 bool visitICmpInst(ICmpInst &ICI); 171 bool visitFCmpInst(FCmpInst &FCI); 172 bool visitBinaryOperator(BinaryOperator &BO); 173 bool visitGetElementPtrInst(GetElementPtrInst &GEPI); 174 bool visitCastInst(CastInst &CI); 175 bool visitBitCastInst(BitCastInst &BCI); 176 bool visitShuffleVectorInst(ShuffleVectorInst &SVI); 177 bool visitPHINode(PHINode &PHI); 178 bool visitLoadInst(LoadInst &LI); 179 bool visitStoreInst(StoreInst &SI); 180 bool visitCallInst(CallInst &ICI); 181 182 static void registerOptions() { 183 // This is disabled by default because having separate loads and stores 184 // makes it more likely that the -combiner-alias-analysis limits will be 185 // reached. 186 OptionRegistry::registerOption<bool, Scalarizer, 187 &Scalarizer::ScalarizeLoadStore>( 188 "scalarize-load-store", 189 "Allow the scalarizer pass to scalarize loads and store", false); 190 } 191 192 private: 193 Scatterer scatter(Instruction *Point, Value *V); 194 void gather(Instruction *Op, const ValueVector &CV); 195 bool canTransferMetadata(unsigned Kind); 196 void transferMetadata(Instruction *Op, const ValueVector &CV); 197 bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout, 198 const DataLayout &DL); 199 bool finish(); 200 201 template<typename T> bool splitBinary(Instruction &, const T &); 202 203 bool splitCall(CallInst &CI); 204 205 ScatterMap Scattered; 206 GatherList Gathered; 207 unsigned ParallelLoopAccessMDKind; 208 bool ScalarizeLoadStore; 209 }; 210 211 } // end anonymous namespace 212 213 char Scalarizer::ID = 0; 214 215 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", 216 "Scalarize vector operations", false, false) 217 218 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, 219 ValueVector *cachePtr) 220 : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { 221 Type *Ty = V->getType(); 222 PtrTy = dyn_cast<PointerType>(Ty); 223 if (PtrTy) 224 Ty = PtrTy->getElementType(); 225 Size = Ty->getVectorNumElements(); 226 if (!CachePtr) 227 Tmp.resize(Size, nullptr); 228 else if (CachePtr->empty()) 229 CachePtr->resize(Size, nullptr); 230 else 231 assert(Size == CachePtr->size() && "Inconsistent vector sizes"); 232 } 233 234 // Return component I, creating a new Value for it if necessary. 235 Value *Scatterer::operator[](unsigned I) { 236 ValueVector &CV = (CachePtr ? *CachePtr : Tmp); 237 // Try to reuse a previous value. 238 if (CV[I]) 239 return CV[I]; 240 IRBuilder<> Builder(BB, BBI); 241 if (PtrTy) { 242 if (!CV[0]) { 243 Type *Ty = 244 PointerType::get(PtrTy->getElementType()->getVectorElementType(), 245 PtrTy->getAddressSpace()); 246 CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); 247 } 248 if (I != 0) 249 CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I, 250 V->getName() + ".i" + Twine(I)); 251 } else { 252 // Search through a chain of InsertElementInsts looking for element I. 253 // Record other elements in the cache. The new V is still suitable 254 // for all uncached indices. 255 while (true) { 256 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); 257 if (!Insert) 258 break; 259 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); 260 if (!Idx) 261 break; 262 unsigned J = Idx->getZExtValue(); 263 V = Insert->getOperand(0); 264 if (I == J) { 265 CV[J] = Insert->getOperand(1); 266 return CV[J]; 267 } else if (!CV[J]) { 268 // Only cache the first entry we find for each index we're not actively 269 // searching for. This prevents us from going too far up the chain and 270 // caching incorrect entries. 271 CV[J] = Insert->getOperand(1); 272 } 273 } 274 CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), 275 V->getName() + ".i" + Twine(I)); 276 } 277 return CV[I]; 278 } 279 280 bool Scalarizer::doInitialization(Module &M) { 281 ParallelLoopAccessMDKind = 282 M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); 283 ScalarizeLoadStore = 284 M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>(); 285 return false; 286 } 287 288 bool Scalarizer::runOnFunction(Function &F) { 289 if (skipFunction(F)) 290 return false; 291 assert(Gathered.empty() && Scattered.empty()); 292 for (BasicBlock &BB : F) { 293 for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) { 294 Instruction *I = &*II; 295 bool Done = visit(I); 296 ++II; 297 if (Done && I->getType()->isVoidTy()) 298 I->eraseFromParent(); 299 } 300 } 301 return finish(); 302 } 303 304 // Return a scattered form of V that can be accessed by Point. V must be a 305 // vector or a pointer to a vector. 306 Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { 307 if (Argument *VArg = dyn_cast<Argument>(V)) { 308 // Put the scattered form of arguments in the entry block, 309 // so that it can be used everywhere. 310 Function *F = VArg->getParent(); 311 BasicBlock *BB = &F->getEntryBlock(); 312 return Scatterer(BB, BB->begin(), V, &Scattered[V]); 313 } 314 if (Instruction *VOp = dyn_cast<Instruction>(V)) { 315 // Put the scattered form of an instruction directly after the 316 // instruction. 317 BasicBlock *BB = VOp->getParent(); 318 return Scatterer(BB, std::next(BasicBlock::iterator(VOp)), 319 V, &Scattered[V]); 320 } 321 // In the fallback case, just put the scattered before Point and 322 // keep the result local to Point. 323 return Scatterer(Point->getParent(), Point->getIterator(), V); 324 } 325 326 // Replace Op with the gathered form of the components in CV. Defer the 327 // deletion of Op and creation of the gathered form to the end of the pass, 328 // so that we can avoid creating the gathered form if all uses of Op are 329 // replaced with uses of CV. 330 void Scalarizer::gather(Instruction *Op, const ValueVector &CV) { 331 // Since we're not deleting Op yet, stub out its operands, so that it 332 // doesn't make anything live unnecessarily. 333 for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) 334 Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType())); 335 336 transferMetadata(Op, CV); 337 338 // If we already have a scattered form of Op (created from ExtractElements 339 // of Op itself), replace them with the new form. 340 ValueVector &SV = Scattered[Op]; 341 if (!SV.empty()) { 342 for (unsigned I = 0, E = SV.size(); I != E; ++I) { 343 Value *V = SV[I]; 344 if (V == nullptr) 345 continue; 346 347 Instruction *Old = cast<Instruction>(V); 348 CV[I]->takeName(Old); 349 Old->replaceAllUsesWith(CV[I]); 350 Old->eraseFromParent(); 351 } 352 } 353 SV = CV; 354 Gathered.push_back(GatherList::value_type(Op, &SV)); 355 } 356 357 // Return true if it is safe to transfer the given metadata tag from 358 // vector to scalar instructions. 359 bool Scalarizer::canTransferMetadata(unsigned Tag) { 360 return (Tag == LLVMContext::MD_tbaa 361 || Tag == LLVMContext::MD_fpmath 362 || Tag == LLVMContext::MD_tbaa_struct 363 || Tag == LLVMContext::MD_invariant_load 364 || Tag == LLVMContext::MD_alias_scope 365 || Tag == LLVMContext::MD_noalias 366 || Tag == ParallelLoopAccessMDKind); 367 } 368 369 // Transfer metadata from Op to the instructions in CV if it is known 370 // to be safe to do so. 371 void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { 372 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 373 Op->getAllMetadataOtherThanDebugLoc(MDs); 374 for (unsigned I = 0, E = CV.size(); I != E; ++I) { 375 if (Instruction *New = dyn_cast<Instruction>(CV[I])) { 376 for (const auto &MD : MDs) 377 if (canTransferMetadata(MD.first)) 378 New->setMetadata(MD.first, MD.second); 379 if (Op->getDebugLoc() && !New->getDebugLoc()) 380 New->setDebugLoc(Op->getDebugLoc()); 381 } 382 } 383 } 384 385 // Try to fill in Layout from Ty, returning true on success. Alignment is 386 // the alignment of the vector, or 0 if the ABI default should be used. 387 bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, 388 VectorLayout &Layout, const DataLayout &DL) { 389 // Make sure we're dealing with a vector. 390 Layout.VecTy = dyn_cast<VectorType>(Ty); 391 if (!Layout.VecTy) 392 return false; 393 394 // Check that we're dealing with full-byte elements. 395 Layout.ElemTy = Layout.VecTy->getElementType(); 396 if (DL.getTypeSizeInBits(Layout.ElemTy) != 397 DL.getTypeStoreSizeInBits(Layout.ElemTy)) 398 return false; 399 400 if (Alignment) 401 Layout.VecAlign = Alignment; 402 else 403 Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy); 404 Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); 405 return true; 406 } 407 408 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) 409 // to create an instruction like I with operands X and Y and name Name. 410 template<typename Splitter> 411 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { 412 VectorType *VT = dyn_cast<VectorType>(I.getType()); 413 if (!VT) 414 return false; 415 416 unsigned NumElems = VT->getNumElements(); 417 IRBuilder<> Builder(&I); 418 Scatterer Op0 = scatter(&I, I.getOperand(0)); 419 Scatterer Op1 = scatter(&I, I.getOperand(1)); 420 assert(Op0.size() == NumElems && "Mismatched binary operation"); 421 assert(Op1.size() == NumElems && "Mismatched binary operation"); 422 ValueVector Res; 423 Res.resize(NumElems); 424 for (unsigned Elem = 0; Elem < NumElems; ++Elem) 425 Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], 426 I.getName() + ".i" + Twine(Elem)); 427 gather(&I, Res); 428 return true; 429 } 430 431 static bool isTriviallyScalariable(Intrinsic::ID ID) { 432 return isTriviallyVectorizable(ID); 433 } 434 435 // All of the current scalarizable intrinsics only have one mangled type. 436 static Function *getScalarIntrinsicDeclaration(Module *M, 437 Intrinsic::ID ID, 438 VectorType *Ty) { 439 return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() }); 440 } 441 442 /// If a call to a vector typed intrinsic function, split into a scalar call per 443 /// element if possible for the intrinsic. 444 bool Scalarizer::splitCall(CallInst &CI) { 445 VectorType *VT = dyn_cast<VectorType>(CI.getType()); 446 if (!VT) 447 return false; 448 449 Function *F = CI.getCalledFunction(); 450 if (!F) 451 return false; 452 453 Intrinsic::ID ID = F->getIntrinsicID(); 454 if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) 455 return false; 456 457 unsigned NumElems = VT->getNumElements(); 458 unsigned NumArgs = CI.getNumArgOperands(); 459 460 ValueVector ScalarOperands(NumArgs); 461 SmallVector<Scatterer, 8> Scattered(NumArgs); 462 463 Scattered.resize(NumArgs); 464 465 // Assumes that any vector type has the same number of elements as the return 466 // vector type, which is true for all current intrinsics. 467 for (unsigned I = 0; I != NumArgs; ++I) { 468 Value *OpI = CI.getOperand(I); 469 if (OpI->getType()->isVectorTy()) { 470 Scattered[I] = scatter(&CI, OpI); 471 assert(Scattered[I].size() == NumElems && "mismatched call operands"); 472 } else { 473 ScalarOperands[I] = OpI; 474 } 475 } 476 477 ValueVector Res(NumElems); 478 ValueVector ScalarCallOps(NumArgs); 479 480 Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT); 481 IRBuilder<> Builder(&CI); 482 483 // Perform actual scalarization, taking care to preserve any scalar operands. 484 for (unsigned Elem = 0; Elem < NumElems; ++Elem) { 485 ScalarCallOps.clear(); 486 487 for (unsigned J = 0; J != NumArgs; ++J) { 488 if (hasVectorInstrinsicScalarOpd(ID, J)) 489 ScalarCallOps.push_back(ScalarOperands[J]); 490 else 491 ScalarCallOps.push_back(Scattered[J][Elem]); 492 } 493 494 Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps, 495 CI.getName() + ".i" + Twine(Elem)); 496 } 497 498 gather(&CI, Res); 499 return true; 500 } 501 502 bool Scalarizer::visitSelectInst(SelectInst &SI) { 503 VectorType *VT = dyn_cast<VectorType>(SI.getType()); 504 if (!VT) 505 return false; 506 507 unsigned NumElems = VT->getNumElements(); 508 IRBuilder<> Builder(&SI); 509 Scatterer Op1 = scatter(&SI, SI.getOperand(1)); 510 Scatterer Op2 = scatter(&SI, SI.getOperand(2)); 511 assert(Op1.size() == NumElems && "Mismatched select"); 512 assert(Op2.size() == NumElems && "Mismatched select"); 513 ValueVector Res; 514 Res.resize(NumElems); 515 516 if (SI.getOperand(0)->getType()->isVectorTy()) { 517 Scatterer Op0 = scatter(&SI, SI.getOperand(0)); 518 assert(Op0.size() == NumElems && "Mismatched select"); 519 for (unsigned I = 0; I < NumElems; ++I) 520 Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], 521 SI.getName() + ".i" + Twine(I)); 522 } else { 523 Value *Op0 = SI.getOperand(0); 524 for (unsigned I = 0; I < NumElems; ++I) 525 Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], 526 SI.getName() + ".i" + Twine(I)); 527 } 528 gather(&SI, Res); 529 return true; 530 } 531 532 bool Scalarizer::visitICmpInst(ICmpInst &ICI) { 533 return splitBinary(ICI, ICmpSplitter(ICI)); 534 } 535 536 bool Scalarizer::visitFCmpInst(FCmpInst &FCI) { 537 return splitBinary(FCI, FCmpSplitter(FCI)); 538 } 539 540 bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { 541 return splitBinary(BO, BinarySplitter(BO)); 542 } 543 544 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 545 VectorType *VT = dyn_cast<VectorType>(GEPI.getType()); 546 if (!VT) 547 return false; 548 549 IRBuilder<> Builder(&GEPI); 550 unsigned NumElems = VT->getNumElements(); 551 unsigned NumIndices = GEPI.getNumIndices(); 552 553 // The base pointer might be scalar even if it's a vector GEP. In those cases, 554 // splat the pointer into a vector value, and scatter that vector. 555 Value *Op0 = GEPI.getOperand(0); 556 if (!Op0->getType()->isVectorTy()) 557 Op0 = Builder.CreateVectorSplat(NumElems, Op0); 558 Scatterer Base = scatter(&GEPI, Op0); 559 560 SmallVector<Scatterer, 8> Ops; 561 Ops.resize(NumIndices); 562 for (unsigned I = 0; I < NumIndices; ++I) { 563 Value *Op = GEPI.getOperand(I + 1); 564 565 // The indices might be scalars even if it's a vector GEP. In those cases, 566 // splat the scalar into a vector value, and scatter that vector. 567 if (!Op->getType()->isVectorTy()) 568 Op = Builder.CreateVectorSplat(NumElems, Op); 569 570 Ops[I] = scatter(&GEPI, Op); 571 } 572 573 ValueVector Res; 574 Res.resize(NumElems); 575 for (unsigned I = 0; I < NumElems; ++I) { 576 SmallVector<Value *, 8> Indices; 577 Indices.resize(NumIndices); 578 for (unsigned J = 0; J < NumIndices; ++J) 579 Indices[J] = Ops[J][I]; 580 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, 581 GEPI.getName() + ".i" + Twine(I)); 582 if (GEPI.isInBounds()) 583 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) 584 NewGEPI->setIsInBounds(); 585 } 586 gather(&GEPI, Res); 587 return true; 588 } 589 590 bool Scalarizer::visitCastInst(CastInst &CI) { 591 VectorType *VT = dyn_cast<VectorType>(CI.getDestTy()); 592 if (!VT) 593 return false; 594 595 unsigned NumElems = VT->getNumElements(); 596 IRBuilder<> Builder(&CI); 597 Scatterer Op0 = scatter(&CI, CI.getOperand(0)); 598 assert(Op0.size() == NumElems && "Mismatched cast"); 599 ValueVector Res; 600 Res.resize(NumElems); 601 for (unsigned I = 0; I < NumElems; ++I) 602 Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), 603 CI.getName() + ".i" + Twine(I)); 604 gather(&CI, Res); 605 return true; 606 } 607 608 bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { 609 VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); 610 VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); 611 if (!DstVT || !SrcVT) 612 return false; 613 614 unsigned DstNumElems = DstVT->getNumElements(); 615 unsigned SrcNumElems = SrcVT->getNumElements(); 616 IRBuilder<> Builder(&BCI); 617 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); 618 ValueVector Res; 619 Res.resize(DstNumElems); 620 621 if (DstNumElems == SrcNumElems) { 622 for (unsigned I = 0; I < DstNumElems; ++I) 623 Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), 624 BCI.getName() + ".i" + Twine(I)); 625 } else if (DstNumElems > SrcNumElems) { 626 // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the 627 // individual elements to the destination. 628 unsigned FanOut = DstNumElems / SrcNumElems; 629 Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); 630 unsigned ResI = 0; 631 for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { 632 Value *V = Op0[Op0I]; 633 Instruction *VI; 634 // Look through any existing bitcasts before converting to <N x t2>. 635 // In the best case, the resulting conversion might be a no-op. 636 while ((VI = dyn_cast<Instruction>(V)) && 637 VI->getOpcode() == Instruction::BitCast) 638 V = VI->getOperand(0); 639 V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); 640 Scatterer Mid = scatter(&BCI, V); 641 for (unsigned MidI = 0; MidI < FanOut; ++MidI) 642 Res[ResI++] = Mid[MidI]; 643 } 644 } else { 645 // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. 646 unsigned FanIn = SrcNumElems / DstNumElems; 647 Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); 648 unsigned Op0I = 0; 649 for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { 650 Value *V = UndefValue::get(MidTy); 651 for (unsigned MidI = 0; MidI < FanIn; ++MidI) 652 V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), 653 BCI.getName() + ".i" + Twine(ResI) 654 + ".upto" + Twine(MidI)); 655 Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), 656 BCI.getName() + ".i" + Twine(ResI)); 657 } 658 } 659 gather(&BCI, Res); 660 return true; 661 } 662 663 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 664 VectorType *VT = dyn_cast<VectorType>(SVI.getType()); 665 if (!VT) 666 return false; 667 668 unsigned NumElems = VT->getNumElements(); 669 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); 670 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); 671 ValueVector Res; 672 Res.resize(NumElems); 673 674 for (unsigned I = 0; I < NumElems; ++I) { 675 int Selector = SVI.getMaskValue(I); 676 if (Selector < 0) 677 Res[I] = UndefValue::get(VT->getElementType()); 678 else if (unsigned(Selector) < Op0.size()) 679 Res[I] = Op0[Selector]; 680 else 681 Res[I] = Op1[Selector - Op0.size()]; 682 } 683 gather(&SVI, Res); 684 return true; 685 } 686 687 bool Scalarizer::visitPHINode(PHINode &PHI) { 688 VectorType *VT = dyn_cast<VectorType>(PHI.getType()); 689 if (!VT) 690 return false; 691 692 unsigned NumElems = VT->getNumElements(); 693 IRBuilder<> Builder(&PHI); 694 ValueVector Res; 695 Res.resize(NumElems); 696 697 unsigned NumOps = PHI.getNumOperands(); 698 for (unsigned I = 0; I < NumElems; ++I) 699 Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, 700 PHI.getName() + ".i" + Twine(I)); 701 702 for (unsigned I = 0; I < NumOps; ++I) { 703 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); 704 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); 705 for (unsigned J = 0; J < NumElems; ++J) 706 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); 707 } 708 gather(&PHI, Res); 709 return true; 710 } 711 712 bool Scalarizer::visitLoadInst(LoadInst &LI) { 713 if (!ScalarizeLoadStore) 714 return false; 715 if (!LI.isSimple()) 716 return false; 717 718 VectorLayout Layout; 719 if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout, 720 LI.getModule()->getDataLayout())) 721 return false; 722 723 unsigned NumElems = Layout.VecTy->getNumElements(); 724 IRBuilder<> Builder(&LI); 725 Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); 726 ValueVector Res; 727 Res.resize(NumElems); 728 729 for (unsigned I = 0; I < NumElems; ++I) 730 Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I), 731 LI.getName() + ".i" + Twine(I)); 732 gather(&LI, Res); 733 return true; 734 } 735 736 bool Scalarizer::visitStoreInst(StoreInst &SI) { 737 if (!ScalarizeLoadStore) 738 return false; 739 if (!SI.isSimple()) 740 return false; 741 742 VectorLayout Layout; 743 Value *FullValue = SI.getValueOperand(); 744 if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout, 745 SI.getModule()->getDataLayout())) 746 return false; 747 748 unsigned NumElems = Layout.VecTy->getNumElements(); 749 IRBuilder<> Builder(&SI); 750 Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); 751 Scatterer Val = scatter(&SI, FullValue); 752 753 ValueVector Stores; 754 Stores.resize(NumElems); 755 for (unsigned I = 0; I < NumElems; ++I) { 756 unsigned Align = Layout.getElemAlign(I); 757 Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); 758 } 759 transferMetadata(&SI, Stores); 760 return true; 761 } 762 763 bool Scalarizer::visitCallInst(CallInst &CI) { 764 return splitCall(CI); 765 } 766 767 // Delete the instructions that we scalarized. If a full vector result 768 // is still needed, recreate it using InsertElements. 769 bool Scalarizer::finish() { 770 // The presence of data in Gathered or Scattered indicates changes 771 // made to the Function. 772 if (Gathered.empty() && Scattered.empty()) 773 return false; 774 for (const auto &GMI : Gathered) { 775 Instruction *Op = GMI.first; 776 ValueVector &CV = *GMI.second; 777 if (!Op->use_empty()) { 778 // The value is still needed, so recreate it using a series of 779 // InsertElements. 780 Type *Ty = Op->getType(); 781 Value *Res = UndefValue::get(Ty); 782 BasicBlock *BB = Op->getParent(); 783 unsigned Count = Ty->getVectorNumElements(); 784 IRBuilder<> Builder(Op); 785 if (isa<PHINode>(Op)) 786 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 787 for (unsigned I = 0; I < Count; ++I) 788 Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), 789 Op->getName() + ".upto" + Twine(I)); 790 Res->takeName(Op); 791 Op->replaceAllUsesWith(Res); 792 } 793 Op->eraseFromParent(); 794 } 795 Gathered.clear(); 796 Scattered.clear(); 797 return true; 798 } 799 800 FunctionPass *llvm::createScalarizerPass() { 801 return new Scalarizer(); 802 } 803