1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass optimizes scalar/vector interactions using target cost models. The 10 // transforms implemented here may not fit in traditional loop-based or SLP 11 // vectorization passes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Vectorize/VectorCombine.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/Loads.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/Analysis/VectorUtils.h" 23 #include "llvm/IR/Dominators.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/IRBuilder.h" 26 #include "llvm/IR/PatternMatch.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Transforms/Utils/Local.h" 31 #include "llvm/Transforms/Vectorize.h" 32 33 using namespace llvm; 34 using namespace llvm::PatternMatch; 35 36 #define DEBUG_TYPE "vector-combine" 37 STATISTIC(NumVecLoad, "Number of vector loads formed"); 38 STATISTIC(NumVecCmp, "Number of vector compares formed"); 39 STATISTIC(NumVecBO, "Number of vector binops formed"); 40 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed"); 41 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast"); 42 STATISTIC(NumScalarBO, "Number of scalar binops formed"); 43 STATISTIC(NumScalarCmp, "Number of scalar compares formed"); 44 45 static cl::opt<bool> DisableVectorCombine( 46 "disable-vector-combine", cl::init(false), cl::Hidden, 47 cl::desc("Disable all vector combine transforms")); 48 49 static cl::opt<bool> DisableBinopExtractShuffle( 50 "disable-binop-extract-shuffle", cl::init(false), cl::Hidden, 51 cl::desc("Disable binop extract to shuffle transforms")); 52 53 static cl::opt<unsigned> MaxInstrsToScan( 54 "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, 55 cl::desc("Max number of instructions to scan for vector combining.")); 56 57 static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max(); 58 59 namespace { 60 class VectorCombine { 61 public: 62 VectorCombine(Function &F, const TargetTransformInfo &TTI, 63 const DominatorTree &DT, AAResults &AA) 64 : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA) {} 65 66 bool run(); 67 68 private: 69 Function &F; 70 IRBuilder<> Builder; 71 const TargetTransformInfo &TTI; 72 const DominatorTree &DT; 73 AAResults &AA; 74 75 bool vectorizeLoadInsert(Instruction &I); 76 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0, 77 ExtractElementInst *Ext1, 78 unsigned PreferredExtractIndex) const; 79 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 80 unsigned Opcode, 81 ExtractElementInst *&ConvertToShuffle, 82 unsigned PreferredExtractIndex); 83 void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 84 Instruction &I); 85 void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, 86 Instruction &I); 87 bool foldExtractExtract(Instruction &I); 88 bool foldBitcastShuf(Instruction &I); 89 bool scalarizeBinopOrCmp(Instruction &I); 90 bool foldExtractedCmps(Instruction &I); 91 bool foldSingleElementStore(Instruction &I); 92 bool scalarizeLoadExtract(Instruction &I); 93 }; 94 } // namespace 95 96 static void replaceValue(Value &Old, Value &New) { 97 Old.replaceAllUsesWith(&New); 98 New.takeName(&Old); 99 } 100 101 bool VectorCombine::vectorizeLoadInsert(Instruction &I) { 102 // Match insert into fixed vector of scalar value. 103 // TODO: Handle non-zero insert index. 104 auto *Ty = dyn_cast<FixedVectorType>(I.getType()); 105 Value *Scalar; 106 if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || 107 !Scalar->hasOneUse()) 108 return false; 109 110 // Optionally match an extract from another vector. 111 Value *X; 112 bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt())); 113 if (!HasExtract) 114 X = Scalar; 115 116 // Match source value as load of scalar or vector. 117 // Do not vectorize scalar load (widening) if atomic/volatile or under 118 // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions 119 // or create data races non-existent in the source. 120 auto *Load = dyn_cast<LoadInst>(X); 121 if (!Load || !Load->isSimple() || !Load->hasOneUse() || 122 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || 123 mustSuppressSpeculation(*Load)) 124 return false; 125 126 const DataLayout &DL = I.getModule()->getDataLayout(); 127 Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); 128 assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type"); 129 130 // If original AS != Load's AS, we can't bitcast the original pointer and have 131 // to use Load's operand instead. Ideally we would want to strip pointer casts 132 // without changing AS, but there's no API to do that ATM. 133 unsigned AS = Load->getPointerAddressSpace(); 134 if (AS != SrcPtr->getType()->getPointerAddressSpace()) 135 SrcPtr = Load->getPointerOperand(); 136 137 // We are potentially transforming byte-sized (8-bit) memory accesses, so make 138 // sure we have all of our type-based constraints in place for this target. 139 Type *ScalarTy = Scalar->getType(); 140 uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); 141 unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); 142 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || 143 ScalarSize % 8 != 0) 144 return false; 145 146 // Check safety of replacing the scalar load with a larger vector load. 147 // We use minimal alignment (maximum flexibility) because we only care about 148 // the dereferenceable region. When calculating cost and creating a new op, 149 // we may use a larger value based on alignment attributes. 150 unsigned MinVecNumElts = MinVectorSize / ScalarSize; 151 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); 152 unsigned OffsetEltIndex = 0; 153 Align Alignment = Load->getAlign(); 154 if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) { 155 // It is not safe to load directly from the pointer, but we can still peek 156 // through gep offsets and check if it safe to load from a base address with 157 // updated alignment. If it is, we can shuffle the element(s) into place 158 // after loading. 159 unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType()); 160 APInt Offset(OffsetBitWidth, 0); 161 SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); 162 163 // We want to shuffle the result down from a high element of a vector, so 164 // the offset must be positive. 165 if (Offset.isNegative()) 166 return false; 167 168 // The offset must be a multiple of the scalar element to shuffle cleanly 169 // in the element's size. 170 uint64_t ScalarSizeInBytes = ScalarSize / 8; 171 if (Offset.urem(ScalarSizeInBytes) != 0) 172 return false; 173 174 // If we load MinVecNumElts, will our target element still be loaded? 175 OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue(); 176 if (OffsetEltIndex >= MinVecNumElts) 177 return false; 178 179 if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) 180 return false; 181 182 // Update alignment with offset value. Note that the offset could be negated 183 // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but 184 // negation does not change the result of the alignment calculation. 185 Alignment = commonAlignment(Alignment, Offset.getZExtValue()); 186 } 187 188 // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 189 // Use the greater of the alignment on the load or its source pointer. 190 Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); 191 Type *LoadTy = Load->getType(); 192 InstructionCost OldCost = 193 TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); 194 APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); 195 OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, 196 /* Insert */ true, HasExtract); 197 198 // New pattern: load VecPtr 199 InstructionCost NewCost = 200 TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); 201 // Optionally, we are shuffling the loaded vector element(s) into place. 202 // For the mask set everything but element 0 to undef to prevent poison from 203 // propagating from the extra loaded memory. This will also optionally 204 // shrink/grow the vector from the loaded size to the output size. 205 // We assume this operation has no cost in codegen if there was no offset. 206 // Note that we could use freeze to avoid poison problems, but then we might 207 // still need a shuffle to change the vector size. 208 unsigned OutputNumElts = Ty->getNumElements(); 209 SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem); 210 assert(OffsetEltIndex < MinVecNumElts && "Address offset too big"); 211 Mask[0] = OffsetEltIndex; 212 if (OffsetEltIndex) 213 NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask); 214 215 // We can aggressively convert to the vector form because the backend can 216 // invert this transform if it does not result in a performance win. 217 if (OldCost < NewCost || !NewCost.isValid()) 218 return false; 219 220 // It is safe and potentially profitable to load a vector directly: 221 // inselt undef, load Scalar, 0 --> load VecPtr 222 IRBuilder<> Builder(Load); 223 Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); 224 Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); 225 VecLd = Builder.CreateShuffleVector(VecLd, Mask); 226 227 replaceValue(I, *VecLd); 228 ++NumVecLoad; 229 return true; 230 } 231 232 /// Determine which, if any, of the inputs should be replaced by a shuffle 233 /// followed by extract from a different index. 234 ExtractElementInst *VectorCombine::getShuffleExtract( 235 ExtractElementInst *Ext0, ExtractElementInst *Ext1, 236 unsigned PreferredExtractIndex = InvalidIndex) const { 237 assert(isa<ConstantInt>(Ext0->getIndexOperand()) && 238 isa<ConstantInt>(Ext1->getIndexOperand()) && 239 "Expected constant extract indexes"); 240 241 unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue(); 242 unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue(); 243 244 // If the extract indexes are identical, no shuffle is needed. 245 if (Index0 == Index1) 246 return nullptr; 247 248 Type *VecTy = Ext0->getVectorOperand()->getType(); 249 assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types"); 250 InstructionCost Cost0 = 251 TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); 252 InstructionCost Cost1 = 253 TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); 254 255 // If both costs are invalid no shuffle is needed 256 if (!Cost0.isValid() && !Cost1.isValid()) 257 return nullptr; 258 259 // We are extracting from 2 different indexes, so one operand must be shuffled 260 // before performing a vector operation and/or extract. The more expensive 261 // extract will be replaced by a shuffle. 262 if (Cost0 > Cost1) 263 return Ext0; 264 if (Cost1 > Cost0) 265 return Ext1; 266 267 // If the costs are equal and there is a preferred extract index, shuffle the 268 // opposite operand. 269 if (PreferredExtractIndex == Index0) 270 return Ext1; 271 if (PreferredExtractIndex == Index1) 272 return Ext0; 273 274 // Otherwise, replace the extract with the higher index. 275 return Index0 > Index1 ? Ext0 : Ext1; 276 } 277 278 /// Compare the relative costs of 2 extracts followed by scalar operation vs. 279 /// vector operation(s) followed by extract. Return true if the existing 280 /// instructions are cheaper than a vector alternative. Otherwise, return false 281 /// and if one of the extracts should be transformed to a shufflevector, set 282 /// \p ConvertToShuffle to that extract instruction. 283 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, 284 ExtractElementInst *Ext1, 285 unsigned Opcode, 286 ExtractElementInst *&ConvertToShuffle, 287 unsigned PreferredExtractIndex) { 288 assert(isa<ConstantInt>(Ext0->getOperand(1)) && 289 isa<ConstantInt>(Ext1->getOperand(1)) && 290 "Expected constant extract indexes"); 291 Type *ScalarTy = Ext0->getType(); 292 auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType()); 293 InstructionCost ScalarOpCost, VectorOpCost; 294 295 // Get cost estimates for scalar and vector versions of the operation. 296 bool IsBinOp = Instruction::isBinaryOp(Opcode); 297 if (IsBinOp) { 298 ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 299 VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 300 } else { 301 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 302 "Expected a compare"); 303 ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy, 304 CmpInst::makeCmpResultType(ScalarTy)); 305 VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy, 306 CmpInst::makeCmpResultType(VecTy)); 307 } 308 309 // Get cost estimates for the extract elements. These costs will factor into 310 // both sequences. 311 unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue(); 312 unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue(); 313 314 InstructionCost Extract0Cost = 315 TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); 316 InstructionCost Extract1Cost = 317 TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index); 318 319 // A more expensive extract will always be replaced by a splat shuffle. 320 // For example, if Ext0 is more expensive: 321 // opcode (extelt V0, Ext0), (ext V1, Ext1) --> 322 // extelt (opcode (splat V0, Ext0), V1), Ext1 323 // TODO: Evaluate whether that always results in lowest cost. Alternatively, 324 // check the cost of creating a broadcast shuffle and shuffling both 325 // operands to element 0. 326 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost); 327 328 // Extra uses of the extracts mean that we include those costs in the 329 // vector total because those instructions will not be eliminated. 330 InstructionCost OldCost, NewCost; 331 if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) { 332 // Handle a special case. If the 2 extracts are identical, adjust the 333 // formulas to account for that. The extra use charge allows for either the 334 // CSE'd pattern or an unoptimized form with identical values: 335 // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C 336 bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2) 337 : !Ext0->hasOneUse() || !Ext1->hasOneUse(); 338 OldCost = CheapExtractCost + ScalarOpCost; 339 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost; 340 } else { 341 // Handle the general case. Each extract is actually a different value: 342 // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C 343 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost; 344 NewCost = VectorOpCost + CheapExtractCost + 345 !Ext0->hasOneUse() * Extract0Cost + 346 !Ext1->hasOneUse() * Extract1Cost; 347 } 348 349 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex); 350 if (ConvertToShuffle) { 351 if (IsBinOp && DisableBinopExtractShuffle) 352 return true; 353 354 // If we are extracting from 2 different indexes, then one operand must be 355 // shuffled before performing the vector operation. The shuffle mask is 356 // undefined except for 1 lane that is being translated to the remaining 357 // extraction lane. Therefore, it is a splat shuffle. Ex: 358 // ShufMask = { undef, undef, 0, undef } 359 // TODO: The cost model has an option for a "broadcast" shuffle 360 // (splat-from-element-0), but no option for a more general splat. 361 NewCost += 362 TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); 363 } 364 365 // Aggressively form a vector op if the cost is equal because the transform 366 // may enable further optimization. 367 // Codegen can reverse this transform (scalarize) if it was not profitable. 368 return OldCost < NewCost; 369 } 370 371 /// Create a shuffle that translates (shifts) 1 element from the input vector 372 /// to a new element location. 373 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, 374 unsigned NewIndex, IRBuilder<> &Builder) { 375 // The shuffle mask is undefined except for 1 lane that is being translated 376 // to the new element index. Example for OldIndex == 2 and NewIndex == 0: 377 // ShufMask = { 2, undef, undef, undef } 378 auto *VecTy = cast<FixedVectorType>(Vec->getType()); 379 SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem); 380 ShufMask[NewIndex] = OldIndex; 381 return Builder.CreateShuffleVector(Vec, ShufMask, "shift"); 382 } 383 384 /// Given an extract element instruction with constant index operand, shuffle 385 /// the source vector (shift the scalar element) to a NewIndex for extraction. 386 /// Return null if the input can be constant folded, so that we are not creating 387 /// unnecessary instructions. 388 static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, 389 unsigned NewIndex, 390 IRBuilder<> &Builder) { 391 // If the extract can be constant-folded, this code is unsimplified. Defer 392 // to other passes to handle that. 393 Value *X = ExtElt->getVectorOperand(); 394 Value *C = ExtElt->getIndexOperand(); 395 assert(isa<ConstantInt>(C) && "Expected a constant index operand"); 396 if (isa<Constant>(X)) 397 return nullptr; 398 399 Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(), 400 NewIndex, Builder); 401 return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex)); 402 } 403 404 /// Try to reduce extract element costs by converting scalar compares to vector 405 /// compares followed by extract. 406 /// cmp (ext0 V0, C), (ext1 V1, C) 407 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0, 408 ExtractElementInst *Ext1, Instruction &I) { 409 assert(isa<CmpInst>(&I) && "Expected a compare"); 410 assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() == 411 cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() && 412 "Expected matching constant extract indexes"); 413 414 // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C 415 ++NumVecCmp; 416 CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate(); 417 Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); 418 Value *VecCmp = Builder.CreateCmp(Pred, V0, V1); 419 Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand()); 420 replaceValue(I, *NewExt); 421 } 422 423 /// Try to reduce extract element costs by converting scalar binops to vector 424 /// binops followed by extract. 425 /// bo (ext0 V0, C), (ext1 V1, C) 426 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0, 427 ExtractElementInst *Ext1, Instruction &I) { 428 assert(isa<BinaryOperator>(&I) && "Expected a binary operator"); 429 assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() == 430 cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() && 431 "Expected matching constant extract indexes"); 432 433 // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C 434 ++NumVecBO; 435 Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); 436 Value *VecBO = 437 Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1); 438 439 // All IR flags are safe to back-propagate because any potential poison 440 // created in unused vector elements is discarded by the extract. 441 if (auto *VecBOInst = dyn_cast<Instruction>(VecBO)) 442 VecBOInst->copyIRFlags(&I); 443 444 Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand()); 445 replaceValue(I, *NewExt); 446 } 447 448 /// Match an instruction with extracted vector operands. 449 bool VectorCombine::foldExtractExtract(Instruction &I) { 450 // It is not safe to transform things like div, urem, etc. because we may 451 // create undefined behavior when executing those on unknown vector elements. 452 if (!isSafeToSpeculativelyExecute(&I)) 453 return false; 454 455 Instruction *I0, *I1; 456 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 457 if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) && 458 !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1)))) 459 return false; 460 461 Value *V0, *V1; 462 uint64_t C0, C1; 463 if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) || 464 !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) || 465 V0->getType() != V1->getType()) 466 return false; 467 468 // If the scalar value 'I' is going to be re-inserted into a vector, then try 469 // to create an extract to that same element. The extract/insert can be 470 // reduced to a "select shuffle". 471 // TODO: If we add a larger pattern match that starts from an insert, this 472 // probably becomes unnecessary. 473 auto *Ext0 = cast<ExtractElementInst>(I0); 474 auto *Ext1 = cast<ExtractElementInst>(I1); 475 uint64_t InsertIndex = InvalidIndex; 476 if (I.hasOneUse()) 477 match(I.user_back(), 478 m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex))); 479 480 ExtractElementInst *ExtractToChange; 481 if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange, 482 InsertIndex)) 483 return false; 484 485 if (ExtractToChange) { 486 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0; 487 ExtractElementInst *NewExtract = 488 translateExtract(ExtractToChange, CheapExtractIdx, Builder); 489 if (!NewExtract) 490 return false; 491 if (ExtractToChange == Ext0) 492 Ext0 = NewExtract; 493 else 494 Ext1 = NewExtract; 495 } 496 497 if (Pred != CmpInst::BAD_ICMP_PREDICATE) 498 foldExtExtCmp(Ext0, Ext1, I); 499 else 500 foldExtExtBinop(Ext0, Ext1, I); 501 502 return true; 503 } 504 505 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the 506 /// destination type followed by shuffle. This can enable further transforms by 507 /// moving bitcasts or shuffles together. 508 bool VectorCombine::foldBitcastShuf(Instruction &I) { 509 Value *V; 510 ArrayRef<int> Mask; 511 if (!match(&I, m_BitCast( 512 m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) 513 return false; 514 515 // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for 516 // scalable type is unknown; Second, we cannot reason if the narrowed shuffle 517 // mask for scalable type is a splat or not. 518 // 2) Disallow non-vector casts and length-changing shuffles. 519 // TODO: We could allow any shuffle. 520 auto *DestTy = dyn_cast<FixedVectorType>(I.getType()); 521 auto *SrcTy = dyn_cast<FixedVectorType>(V->getType()); 522 if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) 523 return false; 524 525 unsigned DestNumElts = DestTy->getNumElements(); 526 unsigned SrcNumElts = SrcTy->getNumElements(); 527 SmallVector<int, 16> NewMask; 528 if (SrcNumElts <= DestNumElts) { 529 // The bitcast is from wide to narrow/equal elements. The shuffle mask can 530 // always be expanded to the equivalent form choosing narrower elements. 531 assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask"); 532 unsigned ScaleFactor = DestNumElts / SrcNumElts; 533 narrowShuffleMaskElts(ScaleFactor, Mask, NewMask); 534 } else { 535 // The bitcast is from narrow elements to wide elements. The shuffle mask 536 // must choose consecutive elements to allow casting first. 537 assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask"); 538 unsigned ScaleFactor = SrcNumElts / DestNumElts; 539 if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask)) 540 return false; 541 } 542 543 // The new shuffle must not cost more than the old shuffle. The bitcast is 544 // moved ahead of the shuffle, so assume that it has the same cost as before. 545 InstructionCost DestCost = TTI.getShuffleCost( 546 TargetTransformInfo::SK_PermuteSingleSrc, DestTy, NewMask); 547 InstructionCost SrcCost = 548 TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy, Mask); 549 if (DestCost > SrcCost || !DestCost.isValid()) 550 return false; 551 552 // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' 553 ++NumShufOfBitcast; 554 Value *CastV = Builder.CreateBitCast(V, DestTy); 555 Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask); 556 replaceValue(I, *Shuf); 557 return true; 558 } 559 560 /// Match a vector binop or compare instruction with at least one inserted 561 /// scalar operand and convert to scalar binop/cmp followed by insertelement. 562 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { 563 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 564 Value *Ins0, *Ins1; 565 if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) && 566 !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) 567 return false; 568 569 // Do not convert the vector condition of a vector select into a scalar 570 // condition. That may cause problems for codegen because of differences in 571 // boolean formats and register-file transfers. 572 // TODO: Can we account for that in the cost model? 573 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE; 574 if (IsCmp) 575 for (User *U : I.users()) 576 if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value()))) 577 return false; 578 579 // Match against one or both scalar values being inserted into constant 580 // vectors: 581 // vec_op VecC0, (inselt VecC1, V1, Index) 582 // vec_op (inselt VecC0, V0, Index), VecC1 583 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) 584 // TODO: Deal with mismatched index constants and variable indexes? 585 Constant *VecC0 = nullptr, *VecC1 = nullptr; 586 Value *V0 = nullptr, *V1 = nullptr; 587 uint64_t Index0 = 0, Index1 = 0; 588 if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0), 589 m_ConstantInt(Index0))) && 590 !match(Ins0, m_Constant(VecC0))) 591 return false; 592 if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), 593 m_ConstantInt(Index1))) && 594 !match(Ins1, m_Constant(VecC1))) 595 return false; 596 597 bool IsConst0 = !V0; 598 bool IsConst1 = !V1; 599 if (IsConst0 && IsConst1) 600 return false; 601 if (!IsConst0 && !IsConst1 && Index0 != Index1) 602 return false; 603 604 // Bail for single insertion if it is a load. 605 // TODO: Handle this once getVectorInstrCost can cost for load/stores. 606 auto *I0 = dyn_cast_or_null<Instruction>(V0); 607 auto *I1 = dyn_cast_or_null<Instruction>(V1); 608 if ((IsConst0 && I1 && I1->mayReadFromMemory()) || 609 (IsConst1 && I0 && I0->mayReadFromMemory())) 610 return false; 611 612 uint64_t Index = IsConst0 ? Index1 : Index0; 613 Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType(); 614 Type *VecTy = I.getType(); 615 assert(VecTy->isVectorTy() && 616 (IsConst0 || IsConst1 || V0->getType() == V1->getType()) && 617 (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() || 618 ScalarTy->isPointerTy()) && 619 "Unexpected types for insert element into binop or cmp"); 620 621 unsigned Opcode = I.getOpcode(); 622 InstructionCost ScalarOpCost, VectorOpCost; 623 if (IsCmp) { 624 ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy); 625 VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy); 626 } else { 627 ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy); 628 VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy); 629 } 630 631 // Get cost estimate for the insert element. This cost will factor into 632 // both sequences. 633 InstructionCost InsertCost = 634 TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index); 635 InstructionCost OldCost = 636 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost; 637 InstructionCost NewCost = ScalarOpCost + InsertCost + 638 (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + 639 (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); 640 641 // We want to scalarize unless the vector variant actually has lower cost. 642 if (OldCost < NewCost || !NewCost.isValid()) 643 return false; 644 645 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) --> 646 // inselt NewVecC, (scalar_op V0, V1), Index 647 if (IsCmp) 648 ++NumScalarCmp; 649 else 650 ++NumScalarBO; 651 652 // For constant cases, extract the scalar element, this should constant fold. 653 if (IsConst0) 654 V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index)); 655 if (IsConst1) 656 V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index)); 657 658 Value *Scalar = 659 IsCmp ? Builder.CreateCmp(Pred, V0, V1) 660 : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1); 661 662 Scalar->setName(I.getName() + ".scalar"); 663 664 // All IR flags are safe to back-propagate. There is no potential for extra 665 // poison to be created by the scalar instruction. 666 if (auto *ScalarInst = dyn_cast<Instruction>(Scalar)) 667 ScalarInst->copyIRFlags(&I); 668 669 // Fold the vector constants in the original vectors into a new base vector. 670 Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1) 671 : ConstantExpr::get(Opcode, VecC0, VecC1); 672 Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index); 673 replaceValue(I, *Insert); 674 return true; 675 } 676 677 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of 678 /// a vector into vector operations followed by extract. Note: The SLP pass 679 /// may miss this pattern because of implementation problems. 680 bool VectorCombine::foldExtractedCmps(Instruction &I) { 681 // We are looking for a scalar binop of booleans. 682 // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1) 683 if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1)) 684 return false; 685 686 // The compare predicates should match, and each compare should have a 687 // constant operand. 688 // TODO: Relax the one-use constraints. 689 Value *B0 = I.getOperand(0), *B1 = I.getOperand(1); 690 Instruction *I0, *I1; 691 Constant *C0, *C1; 692 CmpInst::Predicate P0, P1; 693 if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) || 694 !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) || 695 P0 != P1) 696 return false; 697 698 // The compare operands must be extracts of the same vector with constant 699 // extract indexes. 700 // TODO: Relax the one-use constraints. 701 Value *X; 702 uint64_t Index0, Index1; 703 if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) || 704 !match(I1, m_OneUse(m_ExtractElt(m_Specific(X), m_ConstantInt(Index1))))) 705 return false; 706 707 auto *Ext0 = cast<ExtractElementInst>(I0); 708 auto *Ext1 = cast<ExtractElementInst>(I1); 709 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1); 710 if (!ConvertToShuf) 711 return false; 712 713 // The original scalar pattern is: 714 // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1) 715 CmpInst::Predicate Pred = P0; 716 unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp 717 : Instruction::ICmp; 718 auto *VecTy = dyn_cast<FixedVectorType>(X->getType()); 719 if (!VecTy) 720 return false; 721 722 InstructionCost OldCost = 723 TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); 724 OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); 725 OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2; 726 OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType()); 727 728 // The proposed vector pattern is: 729 // vcmp = cmp Pred X, VecC 730 // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0 731 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0; 732 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1; 733 auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType())); 734 InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType()); 735 SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem); 736 ShufMask[CheapIndex] = ExpensiveIndex; 737 NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy, 738 ShufMask); 739 NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy); 740 NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex); 741 742 // Aggressively form vector ops if the cost is equal because the transform 743 // may enable further optimization. 744 // Codegen can reverse this transform (scalarize) if it was not profitable. 745 if (OldCost < NewCost || !NewCost.isValid()) 746 return false; 747 748 // Create a vector constant from the 2 scalar constants. 749 SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(), 750 UndefValue::get(VecTy->getElementType())); 751 CmpC[Index0] = C0; 752 CmpC[Index1] = C1; 753 Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC)); 754 755 Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder); 756 Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(), 757 VCmp, Shuf); 758 Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex); 759 replaceValue(I, *NewExt); 760 ++NumVecCmpBO; 761 return true; 762 } 763 764 // Check if memory loc modified between two instrs in the same BB 765 static bool isMemModifiedBetween(BasicBlock::iterator Begin, 766 BasicBlock::iterator End, 767 const MemoryLocation &Loc, AAResults &AA) { 768 unsigned NumScanned = 0; 769 return std::any_of(Begin, End, [&](const Instruction &Instr) { 770 return isModSet(AA.getModRefInfo(&Instr, Loc)) || 771 ++NumScanned > MaxInstrsToScan; 772 }); 773 } 774 775 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p 776 /// Idx. \p Idx must access a valid vector element. 777 static bool canScalarizeAccess(FixedVectorType *VecTy, ConstantInt *Idx) { 778 return Idx->getValue().ult(VecTy->getNumElements()); 779 } 780 781 // Combine patterns like: 782 // %0 = load <4 x i32>, <4 x i32>* %a 783 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1 784 // store <4 x i32> %1, <4 x i32>* %a 785 // to: 786 // %0 = bitcast <4 x i32>* %a to i32* 787 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1 788 // store i32 %b, i32* %1 789 bool VectorCombine::foldSingleElementStore(Instruction &I) { 790 StoreInst *SI = dyn_cast<StoreInst>(&I); 791 if (!SI || !SI->isSimple() || 792 !isa<FixedVectorType>(SI->getValueOperand()->getType())) 793 return false; 794 795 // TODO: Combine more complicated patterns (multiple insert) by referencing 796 // TargetTransformInfo. 797 Instruction *Source; 798 Value *NewElement; 799 ConstantInt *Idx; 800 if (!match(SI->getValueOperand(), 801 m_InsertElt(m_Instruction(Source), m_Value(NewElement), 802 m_ConstantInt(Idx)))) 803 return false; 804 805 if (auto *Load = dyn_cast<LoadInst>(Source)) { 806 auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType()); 807 const DataLayout &DL = I.getModule()->getDataLayout(); 808 Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts(); 809 // Don't optimize for atomic/volatile load or store. Ensure memory is not 810 // modified between, vector type matches store size, and index is inbounds. 811 if (!Load->isSimple() || Load->getParent() != SI->getParent() || 812 !DL.typeSizeEqualsStoreSize(Load->getType()) || 813 !canScalarizeAccess(VecTy, Idx) || 814 SrcAddr != SI->getPointerOperand()->stripPointerCasts() || 815 isMemModifiedBetween(Load->getIterator(), SI->getIterator(), 816 MemoryLocation::get(SI), AA)) 817 return false; 818 819 Value *GEP = GetElementPtrInst::CreateInBounds( 820 SI->getPointerOperand(), {ConstantInt::get(Idx->getType(), 0), Idx}); 821 Builder.Insert(GEP); 822 StoreInst *NSI = Builder.CreateStore(NewElement, GEP); 823 NSI->copyMetadata(*SI); 824 if (SI->getAlign() < NSI->getAlign()) 825 NSI->setAlignment(SI->getAlign()); 826 replaceValue(I, *NSI); 827 // Need erasing the store manually. 828 I.eraseFromParent(); 829 return true; 830 } 831 832 return false; 833 } 834 835 /// Try to scalarize vector loads feeding extractelement instructions. 836 bool VectorCombine::scalarizeLoadExtract(Instruction &I) { 837 Value *Ptr; 838 ConstantInt *Idx; 839 if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_ConstantInt(Idx)))) 840 return false; 841 842 auto *LI = cast<LoadInst>(I.getOperand(0)); 843 const DataLayout &DL = I.getModule()->getDataLayout(); 844 if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType())) 845 return false; 846 847 auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType()); 848 if (!FixedVT) 849 return false; 850 851 if (!canScalarizeAccess(FixedVT, Idx)) 852 return false; 853 854 InstructionCost OriginalCost = TTI.getMemoryOpCost( 855 Instruction::Load, LI->getType(), Align(LI->getAlignment()), 856 LI->getPointerAddressSpace()); 857 InstructionCost ScalarizedCost = 0; 858 859 Instruction *LastCheckedInst = LI; 860 unsigned NumInstChecked = 0; 861 // Check if all users of the load are extracts with no memory modifications 862 // between the load and the extract. Compute the cost of both the original 863 // code and the scalarized version. 864 for (User *U : LI->users()) { 865 auto *UI = dyn_cast<ExtractElementInst>(U); 866 if (!UI || UI->getParent() != LI->getParent()) 867 return false; 868 869 // Check if any instruction between the load and the extract may modify 870 // memory. 871 if (LastCheckedInst->comesBefore(UI)) { 872 for (Instruction &I : 873 make_range(std::next(LI->getIterator()), UI->getIterator())) { 874 // Bail out if we reached the check limit or the instruction may write 875 // to memory. 876 if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory()) 877 return false; 878 NumInstChecked++; 879 } 880 } 881 882 if (!LastCheckedInst) 883 LastCheckedInst = UI; 884 else if (LastCheckedInst->comesBefore(UI)) 885 LastCheckedInst = UI; 886 887 auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1)); 888 OriginalCost += 889 TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(), 890 Index ? Index->getZExtValue() : -1); 891 ScalarizedCost += 892 TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(), 893 Align(1), LI->getPointerAddressSpace()); 894 ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType()); 895 } 896 897 if (ScalarizedCost >= OriginalCost) 898 return false; 899 900 // Replace extracts with narrow scalar loads. 901 for (User *U : LI->users()) { 902 auto *EI = cast<ExtractElementInst>(U); 903 IRBuilder<>::InsertPointGuard Guard(Builder); 904 Builder.SetInsertPoint(EI); 905 Value *GEP = Builder.CreateInBoundsGEP( 906 FixedVT, Ptr, {Builder.getInt32(0), EI->getOperand(1)}); 907 auto *NewLoad = cast<LoadInst>(Builder.CreateLoad( 908 FixedVT->getElementType(), GEP, EI->getName() + ".scalar")); 909 910 // Set the alignment for the new load. For index 0, we can use the original 911 // alignment. Otherwise choose the common alignment of the load's align and 912 // the alignment for the scalar type. 913 auto *ConstIdx = dyn_cast<ConstantInt>(EI->getOperand(1)); 914 if (ConstIdx && ConstIdx->isNullValue()) 915 NewLoad->setAlignment(LI->getAlign()); 916 else 917 NewLoad->setAlignment(commonAlignment( 918 DL.getABITypeAlign(NewLoad->getType()), LI->getAlign())); 919 replaceValue(*EI, *NewLoad); 920 } 921 922 return true; 923 } 924 925 /// This is the entry point for all transforms. Pass manager differences are 926 /// handled in the callers of this function. 927 bool VectorCombine::run() { 928 if (DisableVectorCombine) 929 return false; 930 931 // Don't attempt vectorization if the target does not support vectors. 932 if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true))) 933 return false; 934 935 bool MadeChange = false; 936 for (BasicBlock &BB : F) { 937 // Ignore unreachable basic blocks. 938 if (!DT.isReachableFromEntry(&BB)) 939 continue; 940 // Use early increment range so that we can erase instructions in loop. 941 for (Instruction &I : make_early_inc_range(BB)) { 942 if (isa<DbgInfoIntrinsic>(I)) 943 continue; 944 Builder.SetInsertPoint(&I); 945 MadeChange |= vectorizeLoadInsert(I); 946 MadeChange |= foldExtractExtract(I); 947 MadeChange |= foldBitcastShuf(I); 948 MadeChange |= scalarizeBinopOrCmp(I); 949 MadeChange |= foldExtractedCmps(I); 950 MadeChange |= scalarizeLoadExtract(I); 951 MadeChange |= foldSingleElementStore(I); 952 } 953 } 954 955 // We're done with transforms, so remove dead instructions. 956 if (MadeChange) 957 for (BasicBlock &BB : F) 958 SimplifyInstructionsInBlock(&BB); 959 960 return MadeChange; 961 } 962 963 // Pass manager boilerplate below here. 964 965 namespace { 966 class VectorCombineLegacyPass : public FunctionPass { 967 public: 968 static char ID; 969 VectorCombineLegacyPass() : FunctionPass(ID) { 970 initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry()); 971 } 972 973 void getAnalysisUsage(AnalysisUsage &AU) const override { 974 AU.addRequired<DominatorTreeWrapperPass>(); 975 AU.addRequired<TargetTransformInfoWrapperPass>(); 976 AU.addRequired<AAResultsWrapperPass>(); 977 AU.setPreservesCFG(); 978 AU.addPreserved<DominatorTreeWrapperPass>(); 979 AU.addPreserved<GlobalsAAWrapperPass>(); 980 AU.addPreserved<AAResultsWrapperPass>(); 981 AU.addPreserved<BasicAAWrapperPass>(); 982 FunctionPass::getAnalysisUsage(AU); 983 } 984 985 bool runOnFunction(Function &F) override { 986 if (skipFunction(F)) 987 return false; 988 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 989 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 990 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 991 VectorCombine Combiner(F, TTI, DT, AA); 992 return Combiner.run(); 993 } 994 }; 995 } // namespace 996 997 char VectorCombineLegacyPass::ID = 0; 998 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", 999 "Optimize scalar/vector ops", false, 1000 false) 1001 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1002 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine", 1003 "Optimize scalar/vector ops", false, false) 1004 Pass *llvm::createVectorCombinePass() { 1005 return new VectorCombineLegacyPass(); 1006 } 1007 1008 PreservedAnalyses VectorCombinePass::run(Function &F, 1009 FunctionAnalysisManager &FAM) { 1010 TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); 1011 DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1012 AAResults &AA = FAM.getResult<AAManager>(F); 1013 VectorCombine Combiner(F, TTI, DT, AA); 1014 if (!Combiner.run()) 1015 return PreservedAnalyses::all(); 1016 PreservedAnalyses PA; 1017 PA.preserveSet<CFGAnalyses>(); 1018 return PA; 1019 } 1020