1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass optimizes scalar/vector interactions using target cost models. The 10 // transforms implemented here may not fit in traditional loop-based or SLP 11 // vectorization passes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Vectorize/VectorCombine.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/Analysis/VectorUtils.h" 22 #include "llvm/IR/Dominators.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/InitializePasses.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Transforms/Utils/Local.h" 30 #include "llvm/Transforms/Vectorize.h" 31 32 using namespace llvm; 33 using namespace llvm::PatternMatch; 34 35 #define DEBUG_TYPE "vector-combine" 36 STATISTIC(NumVecCmp, "Number of vector compares formed"); 37 STATISTIC(NumVecBO, "Number of vector binops formed"); 38 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast"); 39 STATISTIC(NumScalarBO, "Number of scalar binops formed"); 40 STATISTIC(NumScalarCmp, "Number of scalar compares formed"); 41 42 static cl::opt<bool> DisableVectorCombine( 43 "disable-vector-combine", cl::init(false), cl::Hidden, 44 cl::desc("Disable all vector combine transforms")); 45 46 static cl::opt<bool> DisableBinopExtractShuffle( 47 "disable-binop-extract-shuffle", cl::init(false), cl::Hidden, 48 cl::desc("Disable binop extract to shuffle transforms")); 49 50 class VectorCombine { 51 public: 52 VectorCombine(Function &F, const TargetTransformInfo &TTI, 53 const DominatorTree &DT) 54 : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {} 55 56 bool run(); 57 58 private: 59 Function &F; 60 IRBuilder<> Builder; 61 const TargetTransformInfo &TTI; 62 const DominatorTree &DT; 63 64 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 65 unsigned Opcode, 66 ExtractElementInst *&ConvertToShuffle, 67 unsigned PreferredExtractIndex); 68 void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 69 Instruction &I); 70 void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 71 Instruction &I); 72 bool foldExtractExtract(Instruction &I); 73 bool foldBitcastShuf(Instruction &I); 74 bool scalarizeBinopOrCmp(Instruction &I); 75 }; 76 77 static void replaceValue(Value &Old, Value &New) { 78 Old.replaceAllUsesWith(&New); 79 New.takeName(&Old); 80 } 81 82 /// Compare the relative costs of 2 extracts followed by scalar operation vs. 83 /// vector operation(s) followed by extract. Return true if the existing 84 /// instructions are cheaper than a vector alternative. Otherwise, return false 85 /// and if one of the extracts should be transformed to a shufflevector, set 86 /// \p ConvertToShuffle to that extract instruction. 87 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, 88 ExtractElementInst *Ext1, 89 unsigned Opcode, 90 ExtractElementInst *&ConvertToShuffle, 91 unsigned PreferredExtractIndex) { 92 assert(isa<ConstantInt>(Ext0->getOperand(1)) && 93 isa<ConstantInt>(Ext1->getOperand(1)) && 94 "Expected constant extract indexes"); 95 Type *ScalarTy = Ext0->getType(); 96 auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType()); 97 int ScalarOpCost, VectorOpCost; 98 99 // Get cost estimates for scalar and vector versions of the operation. 100 bool IsBinOp = Instruction::isBinaryOp(Opcode); 101 if (IsBinOp) { 102 ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 103 VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 104 } else { 105 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 106 "Expected a compare"); 107 ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy, 108 CmpInst::makeCmpResultType(ScalarTy)); 109 VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy, 110 CmpInst::makeCmpResultType(VecTy)); 111 } 112 113 // Get cost estimates for the extract elements. These costs will factor into 114 // both sequences. 115 unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue(); 116 unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue(); 117 118 int Extract0Cost = 119 TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); 120 int Extract1Cost = 121 TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index); 122 123 // A more expensive extract will always be replaced by a splat shuffle. 124 // For example, if Ext0 is more expensive: 125 // opcode (extelt V0, Ext0), (ext V1, Ext1) --> 126 // extelt (opcode (splat V0, Ext0), V1), Ext1 127 // TODO: Evaluate whether that always results in lowest cost. Alternatively, 128 // check the cost of creating a broadcast shuffle and shuffling both 129 // operands to element 0. 130 int CheapExtractCost = std::min(Extract0Cost, Extract1Cost); 131 132 // Extra uses of the extracts mean that we include those costs in the 133 // vector total because those instructions will not be eliminated. 134 int OldCost, NewCost; 135 if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) { 136 // Handle a special case. If the 2 extracts are identical, adjust the 137 // formulas to account for that. The extra use charge allows for either the 138 // CSE'd pattern or an unoptimized form with identical values: 139 // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C 140 bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2) 141 : !Ext0->hasOneUse() || !Ext1->hasOneUse(); 142 OldCost = CheapExtractCost + ScalarOpCost; 143 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost; 144 } else { 145 // Handle the general case. Each extract is actually a different value: 146 // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C 147 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost; 148 NewCost = VectorOpCost + CheapExtractCost + 149 !Ext0->hasOneUse() * Extract0Cost + 150 !Ext1->hasOneUse() * Extract1Cost; 151 } 152 153 if (Ext0Index == Ext1Index) { 154 // If the extract indexes are identical, no shuffle is needed. 155 ConvertToShuffle = nullptr; 156 } else { 157 if (IsBinOp && DisableBinopExtractShuffle) 158 return true; 159 160 // If we are extracting from 2 different indexes, then one operand must be 161 // shuffled before performing the vector operation. The shuffle mask is 162 // undefined except for 1 lane that is being translated to the remaining 163 // extraction lane. Therefore, it is a splat shuffle. Ex: 164 // ShufMask = { undef, undef, 0, undef } 165 // TODO: The cost model has an option for a "broadcast" shuffle 166 // (splat-from-element-0), but no option for a more general splat. 167 NewCost += 168 TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); 169 170 // The more expensive extract will be replaced by a shuffle. If the costs 171 // are equal and there is a preferred extract index, shuffle the opposite 172 // operand. Otherwise, replace the extract with the higher index. 173 if (Extract0Cost > Extract1Cost) 174 ConvertToShuffle = Ext0; 175 else if (Extract1Cost > Extract0Cost) 176 ConvertToShuffle = Ext1; 177 else if (PreferredExtractIndex == Ext0Index) 178 ConvertToShuffle = Ext1; 179 else if (PreferredExtractIndex == Ext1Index) 180 ConvertToShuffle = Ext0; 181 else 182 ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1; 183 } 184 185 // Aggressively form a vector op if the cost is equal because the transform 186 // may enable further optimization. 187 // Codegen can reverse this transform (scalarize) if it was not profitable. 188 return OldCost < NewCost; 189 } 190 191 /// Create a shuffle that translates (shifts) 1 element from the input vector 192 /// to a new element location. 193 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, 194 unsigned NewIndex, IRBuilder<> &Builder) { 195 // The shuffle mask is undefined except for 1 lane that is being translated 196 // to the new element index. Example for OldIndex == 2 and NewIndex == 0: 197 // ShufMask = { 2, undef, undef, undef } 198 auto *VecTy = cast<FixedVectorType>(Vec->getType()); 199 SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem); 200 ShufMask[NewIndex] = OldIndex; 201 Value *Undef = UndefValue::get(VecTy); 202 return Builder.CreateShuffleVector(Vec, Undef, ShufMask, "shift"); 203 } 204 205 /// Given an extract element instruction with constant index operand, shuffle 206 /// the source vector (shift the scalar element) to a NewIndex for extraction. 207 /// Return null if the input can be constant folded, so that we are not creating 208 /// unnecessary instructions. 209 static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, 210 unsigned NewIndex, 211 IRBuilder<> &Builder) { 212 // If the extract can be constant-folded, this code is unsimplified. Defer 213 // to other passes to handle that. 214 Value *X = ExtElt->getVectorOperand(); 215 Value *C = ExtElt->getIndexOperand(); 216 assert(isa<ConstantInt>(C) && "Expected a constant index operand"); 217 if (isa<Constant>(X)) 218 return nullptr; 219 220 Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(), 221 NewIndex, Builder); 222 return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex)); 223 } 224 225 /// Try to reduce extract element costs by converting scalar compares to vector 226 /// compares followed by extract. 227 /// cmp (ext0 V0, C), (ext1 V1, C) 228 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0, 229 ExtractElementInst *Ext1, Instruction &I) { 230 assert(isa<CmpInst>(&I) && "Expected a compare"); 231 assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() == 232 cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() && 233 "Expected matching constant extract indexes"); 234 235 // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C 236 ++NumVecCmp; 237 CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate(); 238 Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); 239 Value *VecCmp = Builder.CreateCmp(Pred, V0, V1); 240 Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand()); 241 replaceValue(I, *NewExt); 242 } 243 244 /// Try to reduce extract element costs by converting scalar binops to vector 245 /// binops followed by extract. 246 /// bo (ext0 V0, C), (ext1 V1, C) 247 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0, 248 ExtractElementInst *Ext1, Instruction &I) { 249 assert(isa<BinaryOperator>(&I) && "Expected a binary operator"); 250 assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() == 251 cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() && 252 "Expected matching constant extract indexes"); 253 254 // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C 255 ++NumVecBO; 256 Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); 257 Value *VecBO = 258 Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1); 259 260 // All IR flags are safe to back-propagate because any potential poison 261 // created in unused vector elements is discarded by the extract. 262 if (auto *VecBOInst = dyn_cast<Instruction>(VecBO)) 263 VecBOInst->copyIRFlags(&I); 264 265 Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand()); 266 replaceValue(I, *NewExt); 267 } 268 269 /// Match an instruction with extracted vector operands. 270 bool VectorCombine::foldExtractExtract(Instruction &I) { 271 // It is not safe to transform things like div, urem, etc. because we may 272 // create undefined behavior when executing those on unknown vector elements. 273 if (!isSafeToSpeculativelyExecute(&I)) 274 return false; 275 276 Instruction *I0, *I1; 277 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 278 if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) && 279 !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1)))) 280 return false; 281 282 Value *V0, *V1; 283 uint64_t C0, C1; 284 if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) || 285 !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) || 286 V0->getType() != V1->getType()) 287 return false; 288 289 // If the scalar value 'I' is going to be re-inserted into a vector, then try 290 // to create an extract to that same element. The extract/insert can be 291 // reduced to a "select shuffle". 292 // TODO: If we add a larger pattern match that starts from an insert, this 293 // probably becomes unnecessary. 294 auto *Ext0 = cast<ExtractElementInst>(I0); 295 auto *Ext1 = cast<ExtractElementInst>(I1); 296 uint64_t InsertIndex = std::numeric_limits<uint64_t>::max(); 297 if (I.hasOneUse()) 298 match(I.user_back(), 299 m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex))); 300 301 ExtractElementInst *ExtractToChange; 302 if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange, 303 InsertIndex)) 304 return false; 305 306 if (ExtractToChange) { 307 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0; 308 ExtractElementInst *NewExtract = 309 translateExtract(ExtractToChange, CheapExtractIdx, Builder); 310 if (!NewExtract) 311 return false; 312 if (ExtractToChange == Ext0) 313 Ext0 = NewExtract; 314 else 315 Ext1 = NewExtract; 316 } 317 318 if (Pred != CmpInst::BAD_ICMP_PREDICATE) 319 foldExtExtCmp(Ext0, Ext1, I); 320 else 321 foldExtExtBinop(Ext0, Ext1, I); 322 323 return true; 324 } 325 326 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the 327 /// destination type followed by shuffle. This can enable further transforms by 328 /// moving bitcasts or shuffles together. 329 bool VectorCombine::foldBitcastShuf(Instruction &I) { 330 Value *V; 331 ArrayRef<int> Mask; 332 if (!match(&I, m_BitCast( 333 m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) 334 return false; 335 336 // Disallow non-vector casts and length-changing shuffles. 337 // TODO: We could allow any shuffle. 338 auto *DestTy = dyn_cast<VectorType>(I.getType()); 339 auto *SrcTy = cast<VectorType>(V->getType()); 340 if (!DestTy || I.getOperand(0)->getType() != SrcTy) 341 return false; 342 343 // The new shuffle must not cost more than the old shuffle. The bitcast is 344 // moved ahead of the shuffle, so assume that it has the same cost as before. 345 if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) > 346 TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) 347 return false; 348 349 unsigned DestNumElts = DestTy->getNumElements(); 350 unsigned SrcNumElts = SrcTy->getNumElements(); 351 SmallVector<int, 16> NewMask; 352 if (SrcNumElts <= DestNumElts) { 353 // The bitcast is from wide to narrow/equal elements. The shuffle mask can 354 // always be expanded to the equivalent form choosing narrower elements. 355 assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask"); 356 unsigned ScaleFactor = DestNumElts / SrcNumElts; 357 narrowShuffleMaskElts(ScaleFactor, Mask, NewMask); 358 } else { 359 // The bitcast is from narrow elements to wide elements. The shuffle mask 360 // must choose consecutive elements to allow casting first. 361 assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask"); 362 unsigned ScaleFactor = SrcNumElts / DestNumElts; 363 if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask)) 364 return false; 365 } 366 // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' 367 ++NumShufOfBitcast; 368 Value *CastV = Builder.CreateBitCast(V, DestTy); 369 Value *Shuf = 370 Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask); 371 replaceValue(I, *Shuf); 372 return true; 373 } 374 375 /// Match a vector binop or compare instruction with at least one inserted 376 /// scalar operand and convert to scalar binop/cmp followed by insertelement. 377 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { 378 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 379 Value *Ins0, *Ins1; 380 if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) && 381 !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) 382 return false; 383 384 // Do not convert the vector condition of a vector select into a scalar 385 // condition. That may cause problems for codegen because of differences in 386 // boolean formats and register-file transfers. 387 // TODO: Can we account for that in the cost model? 388 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE; 389 if (IsCmp) 390 for (User *U : I.users()) 391 if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value()))) 392 return false; 393 394 // Match against one or both scalar values being inserted into constant 395 // vectors: 396 // vec_op VecC0, (inselt VecC1, V1, Index) 397 // vec_op (inselt VecC0, V0, Index), VecC1 398 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) 399 // TODO: Deal with mismatched index constants and variable indexes? 400 Constant *VecC0 = nullptr, *VecC1 = nullptr; 401 Value *V0 = nullptr, *V1 = nullptr; 402 uint64_t Index0 = 0, Index1 = 0; 403 if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0), 404 m_ConstantInt(Index0))) && 405 !match(Ins0, m_Constant(VecC0))) 406 return false; 407 if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), 408 m_ConstantInt(Index1))) && 409 !match(Ins1, m_Constant(VecC1))) 410 return false; 411 412 bool IsConst0 = !V0; 413 bool IsConst1 = !V1; 414 if (IsConst0 && IsConst1) 415 return false; 416 if (!IsConst0 && !IsConst1 && Index0 != Index1) 417 return false; 418 419 // Bail for single insertion if it is a load. 420 // TODO: Handle this once getVectorInstrCost can cost for load/stores. 421 auto *I0 = dyn_cast_or_null<Instruction>(V0); 422 auto *I1 = dyn_cast_or_null<Instruction>(V1); 423 if ((IsConst0 && I1 && I1->mayReadFromMemory()) || 424 (IsConst1 && I0 && I0->mayReadFromMemory())) 425 return false; 426 427 uint64_t Index = IsConst0 ? Index1 : Index0; 428 Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType(); 429 Type *VecTy = I.getType(); 430 assert(VecTy->isVectorTy() && 431 (IsConst0 || IsConst1 || V0->getType() == V1->getType()) && 432 (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() || 433 ScalarTy->isPointerTy()) && 434 "Unexpected types for insert element into binop or cmp"); 435 436 unsigned Opcode = I.getOpcode(); 437 int ScalarOpCost, VectorOpCost; 438 if (IsCmp) { 439 ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy); 440 VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy); 441 } else { 442 ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 443 VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 444 } 445 446 // Get cost estimate for the insert element. This cost will factor into 447 // both sequences. 448 int InsertCost = 449 TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index); 450 int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + 451 VectorOpCost; 452 int NewCost = ScalarOpCost + InsertCost + 453 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + 454 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); 455 456 // We want to scalarize unless the vector variant actually has lower cost. 457 if (OldCost < NewCost) 458 return false; 459 460 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) --> 461 // inselt NewVecC, (scalar_op V0, V1), Index 462 if (IsCmp) 463 ++NumScalarCmp; 464 else 465 ++NumScalarBO; 466 467 // For constant cases, extract the scalar element, this should constant fold. 468 if (IsConst0) 469 V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index)); 470 if (IsConst1) 471 V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index)); 472 473 Value *Scalar = 474 IsCmp ? Builder.CreateCmp(Pred, V0, V1) 475 : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1); 476 477 Scalar->setName(I.getName() + ".scalar"); 478 479 // All IR flags are safe to back-propagate. There is no potential for extra 480 // poison to be created by the scalar instruction. 481 if (auto *ScalarInst = dyn_cast<Instruction>(Scalar)) 482 ScalarInst->copyIRFlags(&I); 483 484 // Fold the vector constants in the original vectors into a new base vector. 485 Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1) 486 : ConstantExpr::get(Opcode, VecC0, VecC1); 487 Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index); 488 replaceValue(I, *Insert); 489 return true; 490 } 491 492 /// This is the entry point for all transforms. Pass manager differences are 493 /// handled in the callers of this function. 494 bool VectorCombine::run() { 495 if (DisableVectorCombine) 496 return false; 497 498 bool MadeChange = false; 499 for (BasicBlock &BB : F) { 500 // Ignore unreachable basic blocks. 501 if (!DT.isReachableFromEntry(&BB)) 502 continue; 503 // Do not delete instructions under here and invalidate the iterator. 504 // Walk the block forwards to enable simple iterative chains of transforms. 505 // TODO: It could be more efficient to remove dead instructions 506 // iteratively in this loop rather than waiting until the end. 507 for (Instruction &I : BB) { 508 if (isa<DbgInfoIntrinsic>(I)) 509 continue; 510 Builder.SetInsertPoint(&I); 511 MadeChange |= foldExtractExtract(I); 512 MadeChange |= foldBitcastShuf(I); 513 MadeChange |= scalarizeBinopOrCmp(I); 514 } 515 } 516 517 // We're done with transforms, so remove dead instructions. 518 if (MadeChange) 519 for (BasicBlock &BB : F) 520 SimplifyInstructionsInBlock(&BB); 521 522 return MadeChange; 523 } 524 525 // Pass manager boilerplate below here. 526 527 namespace { 528 class VectorCombineLegacyPass : public FunctionPass { 529 public: 530 static char ID; 531 VectorCombineLegacyPass() : FunctionPass(ID) { 532 initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry()); 533 } 534 535 void getAnalysisUsage(AnalysisUsage &AU) const override { 536 AU.addRequired<DominatorTreeWrapperPass>(); 537 AU.addRequired<TargetTransformInfoWrapperPass>(); 538 AU.setPreservesCFG(); 539 AU.addPreserved<DominatorTreeWrapperPass>(); 540 AU.addPreserved<GlobalsAAWrapperPass>(); 541 AU.addPreserved<AAResultsWrapperPass>(); 542 AU.addPreserved<BasicAAWrapperPass>(); 543 FunctionPass::getAnalysisUsage(AU); 544 } 545 546 bool runOnFunction(Function &F) override { 547 if (skipFunction(F)) 548 return false; 549 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 550 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 551 VectorCombine Combiner(F, TTI, DT); 552 return Combiner.run(); 553 } 554 }; 555 } // namespace 556 557 char VectorCombineLegacyPass::ID = 0; 558 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", 559 "Optimize scalar/vector ops", false, 560 false) 561 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 562 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine", 563 "Optimize scalar/vector ops", false, false) 564 Pass *llvm::createVectorCombinePass() { 565 return new VectorCombineLegacyPass(); 566 } 567 568 PreservedAnalyses VectorCombinePass::run(Function &F, 569 FunctionAnalysisManager &FAM) { 570 TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); 571 DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 572 VectorCombine Combiner(F, TTI, DT); 573 if (!Combiner.run()) 574 return PreservedAnalyses::all(); 575 PreservedAnalyses PA; 576 PA.preserveSet<CFGAnalyses>(); 577 PA.preserve<GlobalsAA>(); 578 PA.preserve<AAManager>(); 579 PA.preserve<BasicAA>(); 580 return PA; 581 } 582