1 //===- InstCombineVectorOps.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements instcombine for ExtractElement, InsertElement and 11 // ShuffleVector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/PatternMatch.h" 20 using namespace llvm; 21 using namespace PatternMatch; 22 23 #define DEBUG_TYPE "instcombine" 24 25 /// Return true if the value is cheaper to scalarize than it is to leave as a 26 /// vector operation. isConstant indicates whether we're extracting one known 27 /// element. If false we're extracting a variable index. 28 static bool cheapToScalarize(Value *V, bool isConstant) { 29 if (Constant *C = dyn_cast<Constant>(V)) { 30 if (isConstant) return true; 31 32 // If all elts are the same, we can extract it and use any of the values. 33 if (Constant *Op0 = C->getAggregateElement(0U)) { 34 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 35 ++i) 36 if (C->getAggregateElement(i) != Op0) 37 return false; 38 return true; 39 } 40 } 41 Instruction *I = dyn_cast<Instruction>(V); 42 if (!I) return false; 43 44 // Insert element gets simplified to the inserted element or is deleted if 45 // this is constant idx extract element and its a constant idx insertelt. 46 if (I->getOpcode() == Instruction::InsertElement && isConstant && 47 isa<ConstantInt>(I->getOperand(2))) 48 return true; 49 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 50 return true; 51 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 52 if (BO->hasOneUse() && 53 (cheapToScalarize(BO->getOperand(0), isConstant) || 54 cheapToScalarize(BO->getOperand(1), isConstant))) 55 return true; 56 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 57 if (CI->hasOneUse() && 58 (cheapToScalarize(CI->getOperand(0), isConstant) || 59 cheapToScalarize(CI->getOperand(1), isConstant))) 60 return true; 61 62 return false; 63 } 64 65 // If we have a PHI node with a vector type that has only 2 uses: feed 66 // itself and be an operand of extractelement at a constant location, 67 // try to replace the PHI of the vector type with a PHI of a scalar type. 68 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 69 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 70 if (!PN->hasNUses(2)) 71 return nullptr; 72 73 // If so, it's known at this point that one operand is PHI and the other is 74 // an extractelement node. Find the PHI user that is not the extractelement 75 // node. 76 auto iu = PN->user_begin(); 77 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 78 if (PHIUser == cast<Instruction>(&EI)) 79 PHIUser = cast<Instruction>(*(++iu)); 80 81 // Verify that this PHI user has one use, which is the PHI itself, 82 // and that it is a binary operation which is cheap to scalarize. 83 // otherwise return NULL. 84 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 85 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) 86 return nullptr; 87 88 // Create a scalar PHI node that will replace the vector PHI node 89 // just before the current PHI node. 90 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 91 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 92 // Scalarize each PHI operand. 93 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 94 Value *PHIInVal = PN->getIncomingValue(i); 95 BasicBlock *inBB = PN->getIncomingBlock(i); 96 Value *Elt = EI.getIndexOperand(); 97 // If the operand is the PHI induction variable: 98 if (PHIInVal == PHIUser) { 99 // Scalarize the binary operation. Its first operand is the 100 // scalar PHI, and the second operand is extracted from the other 101 // vector operand. 102 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 103 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 104 Value *Op = InsertNewInstWith( 105 ExtractElementInst::Create(B0->getOperand(opId), Elt, 106 B0->getOperand(opId)->getName() + ".Elt"), 107 *B0); 108 Value *newPHIUser = InsertNewInstWith( 109 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 110 scalarPHI->addIncoming(newPHIUser, inBB); 111 } else { 112 // Scalarize PHI input: 113 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 114 // Insert the new instruction into the predecessor basic block. 115 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 116 BasicBlock::iterator InsertPos; 117 if (pos && !isa<PHINode>(pos)) { 118 InsertPos = ++pos->getIterator(); 119 } else { 120 InsertPos = inBB->getFirstInsertionPt(); 121 } 122 123 InsertNewInstWith(newEI, *InsertPos); 124 125 scalarPHI->addIncoming(newEI, inBB); 126 } 127 } 128 return ReplaceInstUsesWith(EI, scalarPHI); 129 } 130 131 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 132 if (Value *V = SimplifyExtractElementInst( 133 EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC)) 134 return ReplaceInstUsesWith(EI, V); 135 136 // If vector val is constant with all elements the same, replace EI with 137 // that element. We handle a known element # below. 138 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 139 if (cheapToScalarize(C, false)) 140 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 141 142 // If extracting a specified index from the vector, see if we can recursively 143 // find a previously computed scalar that was inserted into the vector. 144 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 145 unsigned IndexVal = IdxC->getZExtValue(); 146 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 147 148 // InstSimplify handles cases where the index is invalid. 149 assert(IndexVal < VectorWidth); 150 151 // This instruction only demands the single element from the input vector. 152 // If the input vector has a single use, simplify it based on this use 153 // property. 154 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 155 APInt UndefElts(VectorWidth, 0); 156 APInt DemandedMask(VectorWidth, 0); 157 DemandedMask.setBit(IndexVal); 158 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, 159 UndefElts)) { 160 EI.setOperand(0, V); 161 return &EI; 162 } 163 } 164 165 // If the this extractelement is directly using a bitcast from a vector of 166 // the same number of elements, see if we can find the source element from 167 // it. In this case, we will end up needing to bitcast the scalars. 168 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 169 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 170 if (VT->getNumElements() == VectorWidth) 171 if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal)) 172 return new BitCastInst(Elt, EI.getType()); 173 } 174 175 // If there's a vector PHI feeding a scalar use through this extractelement 176 // instruction, try to scalarize the PHI. 177 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 178 Instruction *scalarPHI = scalarizePHI(EI, PN); 179 if (scalarPHI) 180 return scalarPHI; 181 } 182 } 183 184 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 185 // Push extractelement into predecessor operation if legal and 186 // profitable to do so 187 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 188 if (I->hasOneUse() && 189 cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 190 Value *newEI0 = 191 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 192 EI.getName()+".lhs"); 193 Value *newEI1 = 194 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 195 EI.getName()+".rhs"); 196 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 197 } 198 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 199 // Extracting the inserted element? 200 if (IE->getOperand(2) == EI.getOperand(1)) 201 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 202 // If the inserted and extracted elements are constants, they must not 203 // be the same value, extract from the pre-inserted value instead. 204 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 205 Worklist.AddValue(EI.getOperand(0)); 206 EI.setOperand(0, IE->getOperand(0)); 207 return &EI; 208 } 209 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 210 // If this is extracting an element from a shufflevector, figure out where 211 // it came from and extract from the appropriate input element instead. 212 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 213 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 214 Value *Src; 215 unsigned LHSWidth = 216 SVI->getOperand(0)->getType()->getVectorNumElements(); 217 218 if (SrcIdx < 0) 219 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 220 if (SrcIdx < (int)LHSWidth) 221 Src = SVI->getOperand(0); 222 else { 223 SrcIdx -= LHSWidth; 224 Src = SVI->getOperand(1); 225 } 226 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 227 return ExtractElementInst::Create(Src, 228 ConstantInt::get(Int32Ty, 229 SrcIdx, false)); 230 } 231 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 232 // Canonicalize extractelement(cast) -> cast(extractelement) 233 // bitcasts can change the number of vector elements and they cost nothing 234 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 235 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 236 EI.getIndexOperand()); 237 Worklist.AddValue(EE); 238 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 239 } 240 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 241 if (SI->hasOneUse()) { 242 // TODO: For a select on vectors, it might be useful to do this if it 243 // has multiple extractelement uses. For vector select, that seems to 244 // fight the vectorizer. 245 246 // If we are extracting an element from a vector select or a select on 247 // vectors, a select on the scalars extracted from the vector arguments. 248 Value *TrueVal = SI->getTrueValue(); 249 Value *FalseVal = SI->getFalseValue(); 250 251 Value *Cond = SI->getCondition(); 252 if (Cond->getType()->isVectorTy()) { 253 Cond = Builder->CreateExtractElement(Cond, 254 EI.getIndexOperand(), 255 Cond->getName() + ".elt"); 256 } 257 258 Value *V1Elem 259 = Builder->CreateExtractElement(TrueVal, 260 EI.getIndexOperand(), 261 TrueVal->getName() + ".elt"); 262 263 Value *V2Elem 264 = Builder->CreateExtractElement(FalseVal, 265 EI.getIndexOperand(), 266 FalseVal->getName() + ".elt"); 267 return SelectInst::Create(Cond, 268 V1Elem, 269 V2Elem, 270 SI->getName() + ".elt"); 271 } 272 } 273 } 274 return nullptr; 275 } 276 277 /// If V is a shuffle of values that ONLY returns elements from either LHS or 278 /// RHS, return the shuffle mask and true. Otherwise, return false. 279 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 280 SmallVectorImpl<Constant*> &Mask) { 281 assert(LHS->getType() == RHS->getType() && 282 "Invalid CollectSingleShuffleElements"); 283 unsigned NumElts = V->getType()->getVectorNumElements(); 284 285 if (isa<UndefValue>(V)) { 286 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 287 return true; 288 } 289 290 if (V == LHS) { 291 for (unsigned i = 0; i != NumElts; ++i) 292 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 293 return true; 294 } 295 296 if (V == RHS) { 297 for (unsigned i = 0; i != NumElts; ++i) 298 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 299 i+NumElts)); 300 return true; 301 } 302 303 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 304 // If this is an insert of an extract from some other vector, include it. 305 Value *VecOp = IEI->getOperand(0); 306 Value *ScalarOp = IEI->getOperand(1); 307 Value *IdxOp = IEI->getOperand(2); 308 309 if (!isa<ConstantInt>(IdxOp)) 310 return false; 311 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 312 313 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 314 // We can handle this if the vector we are inserting into is 315 // transitively ok. 316 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 317 // If so, update the mask to reflect the inserted undef. 318 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 319 return true; 320 } 321 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 322 if (isa<ConstantInt>(EI->getOperand(1))) { 323 unsigned ExtractedIdx = 324 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 325 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 326 327 // This must be extracting from either LHS or RHS. 328 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 329 // We can handle this if the vector we are inserting into is 330 // transitively ok. 331 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 332 // If so, update the mask to reflect the inserted value. 333 if (EI->getOperand(0) == LHS) { 334 Mask[InsertedIdx % NumElts] = 335 ConstantInt::get(Type::getInt32Ty(V->getContext()), 336 ExtractedIdx); 337 } else { 338 assert(EI->getOperand(0) == RHS); 339 Mask[InsertedIdx % NumElts] = 340 ConstantInt::get(Type::getInt32Ty(V->getContext()), 341 ExtractedIdx + NumLHSElts); 342 } 343 return true; 344 } 345 } 346 } 347 } 348 } 349 350 return false; 351 } 352 353 354 /// We are building a shuffle to create V, which is a sequence of insertelement, 355 /// extractelement pairs. If PermittedRHS is set, then we must either use it or 356 /// not rely on the second vector source. Return a std::pair containing the 357 /// left and right vectors of the proposed shuffle (or 0), and set the Mask 358 /// parameter as required. 359 /// 360 /// Note: we intentionally don't try to fold earlier shuffles since they have 361 /// often been chosen carefully to be efficiently implementable on the target. 362 typedef std::pair<Value *, Value *> ShuffleOps; 363 364 static ShuffleOps collectShuffleElements(Value *V, 365 SmallVectorImpl<Constant *> &Mask, 366 Value *PermittedRHS) { 367 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 368 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 369 370 if (isa<UndefValue>(V)) { 371 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 372 return std::make_pair( 373 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 374 } 375 376 if (isa<ConstantAggregateZero>(V)) { 377 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 378 return std::make_pair(V, nullptr); 379 } 380 381 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 382 // If this is an insert of an extract from some other vector, include it. 383 Value *VecOp = IEI->getOperand(0); 384 Value *ScalarOp = IEI->getOperand(1); 385 Value *IdxOp = IEI->getOperand(2); 386 387 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 388 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 389 unsigned ExtractedIdx = 390 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 391 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 392 393 // Either the extracted from or inserted into vector must be RHSVec, 394 // otherwise we'd end up with a shuffle of three inputs. 395 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { 396 Value *RHS = EI->getOperand(0); 397 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS); 398 assert(LR.second == nullptr || LR.second == RHS); 399 400 if (LR.first->getType() != RHS->getType()) { 401 // We tried our best, but we can't find anything compatible with RHS 402 // further up the chain. Return a trivial shuffle. 403 for (unsigned i = 0; i < NumElts; ++i) 404 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 405 return std::make_pair(V, nullptr); 406 } 407 408 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 409 Mask[InsertedIdx % NumElts] = 410 ConstantInt::get(Type::getInt32Ty(V->getContext()), 411 NumLHSElts+ExtractedIdx); 412 return std::make_pair(LR.first, RHS); 413 } 414 415 if (VecOp == PermittedRHS) { 416 // We've gone as far as we can: anything on the other side of the 417 // extractelement will already have been converted into a shuffle. 418 unsigned NumLHSElts = 419 EI->getOperand(0)->getType()->getVectorNumElements(); 420 for (unsigned i = 0; i != NumElts; ++i) 421 Mask.push_back(ConstantInt::get( 422 Type::getInt32Ty(V->getContext()), 423 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 424 return std::make_pair(EI->getOperand(0), PermittedRHS); 425 } 426 427 // If this insertelement is a chain that comes from exactly these two 428 // vectors, return the vector and the effective shuffle. 429 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 430 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 431 Mask)) 432 return std::make_pair(EI->getOperand(0), PermittedRHS); 433 } 434 } 435 } 436 437 // Otherwise, can't do anything fancy. Return an identity vector. 438 for (unsigned i = 0; i != NumElts; ++i) 439 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 440 return std::make_pair(V, nullptr); 441 } 442 443 /// Try to find redundant insertvalue instructions, like the following ones: 444 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 445 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 446 /// Here the second instruction inserts values at the same indices, as the 447 /// first one, making the first one redundant. 448 /// It should be transformed to: 449 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 450 Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { 451 bool IsRedundant = false; 452 ArrayRef<unsigned int> FirstIndices = I.getIndices(); 453 454 // If there is a chain of insertvalue instructions (each of them except the 455 // last one has only one use and it's another insertvalue insn from this 456 // chain), check if any of the 'children' uses the same indices as the first 457 // instruction. In this case, the first one is redundant. 458 Value *V = &I; 459 unsigned Depth = 0; 460 while (V->hasOneUse() && Depth < 10) { 461 User *U = V->user_back(); 462 auto UserInsInst = dyn_cast<InsertValueInst>(U); 463 if (!UserInsInst || U->getOperand(0) != V) 464 break; 465 if (UserInsInst->getIndices() == FirstIndices) { 466 IsRedundant = true; 467 break; 468 } 469 V = UserInsInst; 470 Depth++; 471 } 472 473 if (IsRedundant) 474 return ReplaceInstUsesWith(I, I.getOperand(0)); 475 return nullptr; 476 } 477 478 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 479 Value *VecOp = IE.getOperand(0); 480 Value *ScalarOp = IE.getOperand(1); 481 Value *IdxOp = IE.getOperand(2); 482 483 // Inserting an undef or into an undefined place, remove this. 484 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 485 ReplaceInstUsesWith(IE, VecOp); 486 487 // If the inserted element was extracted from some other vector, and if the 488 // indexes are constant, try to turn this into a shufflevector operation. 489 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 490 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 491 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 492 unsigned NumExtractVectorElts = 493 EI->getOperand(0)->getType()->getVectorNumElements(); 494 unsigned ExtractedIdx = 495 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 496 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 497 498 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 499 return ReplaceInstUsesWith(IE, VecOp); 500 501 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 502 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 503 504 // If we are extracting a value from a vector, then inserting it right 505 // back into the same place, just use the input vector. 506 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 507 return ReplaceInstUsesWith(IE, VecOp); 508 509 // If this insertelement isn't used by some other insertelement, turn it 510 // (and any insertelements it points to), into one big shuffle. 511 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 512 SmallVector<Constant*, 16> Mask; 513 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr); 514 515 // The proposed shuffle may be trivial, in which case we shouldn't 516 // perform the combine. 517 if (LR.first != &IE && LR.second != &IE) { 518 // We now have a shuffle of LHS, RHS, Mask. 519 if (LR.second == nullptr) 520 LR.second = UndefValue::get(LR.first->getType()); 521 return new ShuffleVectorInst(LR.first, LR.second, 522 ConstantVector::get(Mask)); 523 } 524 } 525 } 526 } 527 528 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 529 APInt UndefElts(VWidth, 0); 530 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 531 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 532 if (V != &IE) 533 return ReplaceInstUsesWith(IE, V); 534 return &IE; 535 } 536 537 return nullptr; 538 } 539 540 /// Return true if we can evaluate the specified expression tree if the vector 541 /// elements were shuffled in a different order. 542 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 543 unsigned Depth = 5) { 544 // We can always reorder the elements of a constant. 545 if (isa<Constant>(V)) 546 return true; 547 548 // We won't reorder vector arguments. No IPO here. 549 Instruction *I = dyn_cast<Instruction>(V); 550 if (!I) return false; 551 552 // Two users may expect different orders of the elements. Don't try it. 553 if (!I->hasOneUse()) 554 return false; 555 556 if (Depth == 0) return false; 557 558 switch (I->getOpcode()) { 559 case Instruction::Add: 560 case Instruction::FAdd: 561 case Instruction::Sub: 562 case Instruction::FSub: 563 case Instruction::Mul: 564 case Instruction::FMul: 565 case Instruction::UDiv: 566 case Instruction::SDiv: 567 case Instruction::FDiv: 568 case Instruction::URem: 569 case Instruction::SRem: 570 case Instruction::FRem: 571 case Instruction::Shl: 572 case Instruction::LShr: 573 case Instruction::AShr: 574 case Instruction::And: 575 case Instruction::Or: 576 case Instruction::Xor: 577 case Instruction::ICmp: 578 case Instruction::FCmp: 579 case Instruction::Trunc: 580 case Instruction::ZExt: 581 case Instruction::SExt: 582 case Instruction::FPToUI: 583 case Instruction::FPToSI: 584 case Instruction::UIToFP: 585 case Instruction::SIToFP: 586 case Instruction::FPTrunc: 587 case Instruction::FPExt: 588 case Instruction::GetElementPtr: { 589 for (Value *Operand : I->operands()) { 590 if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) 591 return false; 592 } 593 return true; 594 } 595 case Instruction::InsertElement: { 596 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 597 if (!CI) return false; 598 int ElementNumber = CI->getLimitedValue(); 599 600 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 601 // can't put an element into multiple indices. 602 bool SeenOnce = false; 603 for (int i = 0, e = Mask.size(); i != e; ++i) { 604 if (Mask[i] == ElementNumber) { 605 if (SeenOnce) 606 return false; 607 SeenOnce = true; 608 } 609 } 610 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 611 } 612 } 613 return false; 614 } 615 616 /// Rebuild a new instruction just like 'I' but with the new operands given. 617 /// In the event of type mismatch, the type of the operands is correct. 618 static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { 619 // We don't want to use the IRBuilder here because we want the replacement 620 // instructions to appear next to 'I', not the builder's insertion point. 621 switch (I->getOpcode()) { 622 case Instruction::Add: 623 case Instruction::FAdd: 624 case Instruction::Sub: 625 case Instruction::FSub: 626 case Instruction::Mul: 627 case Instruction::FMul: 628 case Instruction::UDiv: 629 case Instruction::SDiv: 630 case Instruction::FDiv: 631 case Instruction::URem: 632 case Instruction::SRem: 633 case Instruction::FRem: 634 case Instruction::Shl: 635 case Instruction::LShr: 636 case Instruction::AShr: 637 case Instruction::And: 638 case Instruction::Or: 639 case Instruction::Xor: { 640 BinaryOperator *BO = cast<BinaryOperator>(I); 641 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 642 BinaryOperator *New = 643 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 644 NewOps[0], NewOps[1], "", BO); 645 if (isa<OverflowingBinaryOperator>(BO)) { 646 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 647 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 648 } 649 if (isa<PossiblyExactOperator>(BO)) { 650 New->setIsExact(BO->isExact()); 651 } 652 if (isa<FPMathOperator>(BO)) 653 New->copyFastMathFlags(I); 654 return New; 655 } 656 case Instruction::ICmp: 657 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 658 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 659 NewOps[0], NewOps[1]); 660 case Instruction::FCmp: 661 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 662 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 663 NewOps[0], NewOps[1]); 664 case Instruction::Trunc: 665 case Instruction::ZExt: 666 case Instruction::SExt: 667 case Instruction::FPToUI: 668 case Instruction::FPToSI: 669 case Instruction::UIToFP: 670 case Instruction::SIToFP: 671 case Instruction::FPTrunc: 672 case Instruction::FPExt: { 673 // It's possible that the mask has a different number of elements from 674 // the original cast. We recompute the destination type to match the mask. 675 Type *DestTy = 676 VectorType::get(I->getType()->getScalarType(), 677 NewOps[0]->getType()->getVectorNumElements()); 678 assert(NewOps.size() == 1 && "cast with #ops != 1"); 679 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 680 "", I); 681 } 682 case Instruction::GetElementPtr: { 683 Value *Ptr = NewOps[0]; 684 ArrayRef<Value*> Idx = NewOps.slice(1); 685 GetElementPtrInst *GEP = GetElementPtrInst::Create( 686 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); 687 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 688 return GEP; 689 } 690 } 691 llvm_unreachable("failed to rebuild vector instructions"); 692 } 693 694 Value * 695 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 696 // Mask.size() does not need to be equal to the number of vector elements. 697 698 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 699 if (isa<UndefValue>(V)) { 700 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 701 Mask.size())); 702 } 703 if (isa<ConstantAggregateZero>(V)) { 704 return ConstantAggregateZero::get( 705 VectorType::get(V->getType()->getScalarType(), 706 Mask.size())); 707 } 708 if (Constant *C = dyn_cast<Constant>(V)) { 709 SmallVector<Constant *, 16> MaskValues; 710 for (int i = 0, e = Mask.size(); i != e; ++i) { 711 if (Mask[i] == -1) 712 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 713 else 714 MaskValues.push_back(Builder->getInt32(Mask[i])); 715 } 716 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 717 ConstantVector::get(MaskValues)); 718 } 719 720 Instruction *I = cast<Instruction>(V); 721 switch (I->getOpcode()) { 722 case Instruction::Add: 723 case Instruction::FAdd: 724 case Instruction::Sub: 725 case Instruction::FSub: 726 case Instruction::Mul: 727 case Instruction::FMul: 728 case Instruction::UDiv: 729 case Instruction::SDiv: 730 case Instruction::FDiv: 731 case Instruction::URem: 732 case Instruction::SRem: 733 case Instruction::FRem: 734 case Instruction::Shl: 735 case Instruction::LShr: 736 case Instruction::AShr: 737 case Instruction::And: 738 case Instruction::Or: 739 case Instruction::Xor: 740 case Instruction::ICmp: 741 case Instruction::FCmp: 742 case Instruction::Trunc: 743 case Instruction::ZExt: 744 case Instruction::SExt: 745 case Instruction::FPToUI: 746 case Instruction::FPToSI: 747 case Instruction::UIToFP: 748 case Instruction::SIToFP: 749 case Instruction::FPTrunc: 750 case Instruction::FPExt: 751 case Instruction::Select: 752 case Instruction::GetElementPtr: { 753 SmallVector<Value*, 8> NewOps; 754 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 755 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 756 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 757 NewOps.push_back(V); 758 NeedsRebuild |= (V != I->getOperand(i)); 759 } 760 if (NeedsRebuild) { 761 return buildNew(I, NewOps); 762 } 763 return I; 764 } 765 case Instruction::InsertElement: { 766 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 767 768 // The insertelement was inserting at Element. Figure out which element 769 // that becomes after shuffling. The answer is guaranteed to be unique 770 // by CanEvaluateShuffled. 771 bool Found = false; 772 int Index = 0; 773 for (int e = Mask.size(); Index != e; ++Index) { 774 if (Mask[Index] == Element) { 775 Found = true; 776 break; 777 } 778 } 779 780 // If element is not in Mask, no need to handle the operand 1 (element to 781 // be inserted). Just evaluate values in operand 0 according to Mask. 782 if (!Found) 783 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 784 785 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 786 return InsertElementInst::Create(V, I->getOperand(1), 787 Builder->getInt32(Index), "", I); 788 } 789 } 790 llvm_unreachable("failed to reorder elements of vector instruction!"); 791 } 792 793 static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask, 794 bool &isLHSID, bool &isRHSID) { 795 isLHSID = isRHSID = true; 796 797 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 798 if (Mask[i] < 0) continue; // Ignore undef values. 799 // Is this an identity shuffle of the LHS value? 800 isLHSID &= (Mask[i] == (int)i); 801 802 // Is this an identity shuffle of the RHS value? 803 isRHSID &= (Mask[i]-e == i); 804 } 805 } 806 807 // Returns true if the shuffle is extracting a contiguous range of values from 808 // LHS, for example: 809 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 810 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| 811 // Shuffles to: |EE|FF|GG|HH| 812 // +--+--+--+--+ 813 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, 814 SmallVector<int, 16> &Mask) { 815 unsigned LHSElems = 816 cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements(); 817 unsigned MaskElems = Mask.size(); 818 unsigned BegIdx = Mask.front(); 819 unsigned EndIdx = Mask.back(); 820 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) 821 return false; 822 for (unsigned I = 0; I != MaskElems; ++I) 823 if (static_cast<unsigned>(Mask[I]) != BegIdx + I) 824 return false; 825 return true; 826 } 827 828 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 829 Value *LHS = SVI.getOperand(0); 830 Value *RHS = SVI.getOperand(1); 831 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 832 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 833 834 bool MadeChange = false; 835 836 // Undefined shuffle mask -> undefined value. 837 if (isa<UndefValue>(SVI.getOperand(2))) 838 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 839 840 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 841 842 APInt UndefElts(VWidth, 0); 843 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 844 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 845 if (V != &SVI) 846 return ReplaceInstUsesWith(SVI, V); 847 LHS = SVI.getOperand(0); 848 RHS = SVI.getOperand(1); 849 MadeChange = true; 850 } 851 852 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 853 854 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 855 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 856 if (LHS == RHS || isa<UndefValue>(LHS)) { 857 if (isa<UndefValue>(LHS) && LHS == RHS) { 858 // shuffle(undef,undef,mask) -> undef. 859 Value *Result = (VWidth == LHSWidth) 860 ? LHS : UndefValue::get(SVI.getType()); 861 return ReplaceInstUsesWith(SVI, Result); 862 } 863 864 // Remap any references to RHS to use LHS. 865 SmallVector<Constant*, 16> Elts; 866 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 867 if (Mask[i] < 0) { 868 Elts.push_back(UndefValue::get(Int32Ty)); 869 continue; 870 } 871 872 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 873 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 874 Mask[i] = -1; // Turn into undef. 875 Elts.push_back(UndefValue::get(Int32Ty)); 876 } else { 877 Mask[i] = Mask[i] % e; // Force to LHS. 878 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i])); 879 } 880 } 881 SVI.setOperand(0, SVI.getOperand(1)); 882 SVI.setOperand(1, UndefValue::get(RHS->getType())); 883 SVI.setOperand(2, ConstantVector::get(Elts)); 884 LHS = SVI.getOperand(0); 885 RHS = SVI.getOperand(1); 886 MadeChange = true; 887 } 888 889 if (VWidth == LHSWidth) { 890 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 891 bool isLHSID, isRHSID; 892 recognizeIdentityMask(Mask, isLHSID, isRHSID); 893 894 // Eliminate identity shuffles. 895 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 896 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 897 } 898 899 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 900 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 901 return ReplaceInstUsesWith(SVI, V); 902 } 903 904 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to 905 // a non-vector type. We can instead bitcast the original vector followed by 906 // an extract of the desired element: 907 // 908 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, 909 // <4 x i32> <i32 0, i32 1, i32 2, i32 3> 910 // %1 = bitcast <4 x i8> %sroa to i32 911 // Becomes: 912 // %bc = bitcast <16 x i8> %in to <4 x i32> 913 // %ext = extractelement <4 x i32> %bc, i32 0 914 // 915 // If the shuffle is extracting a contiguous range of values from the input 916 // vector then each use which is a bitcast of the extracted size can be 917 // replaced. This will work if the vector types are compatible, and the begin 918 // index is aligned to a value in the casted vector type. If the begin index 919 // isn't aligned then we can shuffle the original vector (keeping the same 920 // vector type) before extracting. 921 // 922 // This code will bail out if the target type is fundamentally incompatible 923 // with vectors of the source type. 924 // 925 // Example of <16 x i8>, target type i32: 926 // Index range [4,8): v-----------v Will work. 927 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 928 // <16 x i8>: | | | | | | | | | | | | | | | | | 929 // <4 x i32>: | | | | | 930 // +-----------+-----------+-----------+-----------+ 931 // Index range [6,10): ^-----------^ Needs an extra shuffle. 932 // Target type i40: ^--------------^ Won't work, bail. 933 if (isShuffleExtractingFromLHS(SVI, Mask)) { 934 Value *V = LHS; 935 unsigned MaskElems = Mask.size(); 936 unsigned BegIdx = Mask.front(); 937 VectorType *SrcTy = cast<VectorType>(V->getType()); 938 unsigned VecBitWidth = SrcTy->getBitWidth(); 939 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); 940 assert(SrcElemBitWidth && "vector elements must have a bitwidth"); 941 unsigned SrcNumElems = SrcTy->getNumElements(); 942 SmallVector<BitCastInst *, 8> BCs; 943 DenseMap<Type *, Value *> NewBCs; 944 for (User *U : SVI.users()) 945 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) 946 if (!BC->use_empty()) 947 // Only visit bitcasts that weren't previously handled. 948 BCs.push_back(BC); 949 for (BitCastInst *BC : BCs) { 950 Type *TgtTy = BC->getDestTy(); 951 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); 952 if (!TgtElemBitWidth) 953 continue; 954 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; 955 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; 956 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); 957 if (!VecBitWidthsEqual) 958 continue; 959 if (!VectorType::isValidElementType(TgtTy)) 960 continue; 961 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); 962 if (!BegIsAligned) { 963 // Shuffle the input so [0,NumElements) contains the output, and 964 // [NumElems,SrcNumElems) is undef. 965 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, 966 UndefValue::get(Int32Ty)); 967 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) 968 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); 969 V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), 970 ConstantVector::get(ShuffleMask), 971 SVI.getName() + ".extract"); 972 BegIdx = 0; 973 } 974 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; 975 assert(SrcElemsPerTgtElem); 976 BegIdx /= SrcElemsPerTgtElem; 977 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); 978 auto *NewBC = 979 BCAlreadyExists 980 ? NewBCs[CastSrcTy] 981 : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); 982 if (!BCAlreadyExists) 983 NewBCs[CastSrcTy] = NewBC; 984 auto *Ext = Builder->CreateExtractElement( 985 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); 986 // The shufflevector isn't being replaced: the bitcast that used it 987 // is. InstCombine will visit the newly-created instructions. 988 ReplaceInstUsesWith(*BC, Ext); 989 MadeChange = true; 990 } 991 } 992 993 // If the LHS is a shufflevector itself, see if we can combine it with this 994 // one without producing an unusual shuffle. 995 // Cases that might be simplified: 996 // 1. 997 // x1=shuffle(v1,v2,mask1) 998 // x=shuffle(x1,undef,mask) 999 // ==> 1000 // x=shuffle(v1,undef,newMask) 1001 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 1002 // 2. 1003 // x1=shuffle(v1,undef,mask1) 1004 // x=shuffle(x1,x2,mask) 1005 // where v1.size() == mask1.size() 1006 // ==> 1007 // x=shuffle(v1,x2,newMask) 1008 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 1009 // 3. 1010 // x2=shuffle(v2,undef,mask2) 1011 // x=shuffle(x1,x2,mask) 1012 // where v2.size() == mask2.size() 1013 // ==> 1014 // x=shuffle(x1,v2,newMask) 1015 // newMask[i] = (mask[i] < x1.size()) 1016 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 1017 // 4. 1018 // x1=shuffle(v1,undef,mask1) 1019 // x2=shuffle(v2,undef,mask2) 1020 // x=shuffle(x1,x2,mask) 1021 // where v1.size() == v2.size() 1022 // ==> 1023 // x=shuffle(v1,v2,newMask) 1024 // newMask[i] = (mask[i] < x1.size()) 1025 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 1026 // 1027 // Here we are really conservative: 1028 // we are absolutely afraid of producing a shuffle mask not in the input 1029 // program, because the code gen may not be smart enough to turn a merged 1030 // shuffle into two specific shuffles: it may produce worse code. As such, 1031 // we only merge two shuffles if the result is either a splat or one of the 1032 // input shuffle masks. In this case, merging the shuffles just removes 1033 // one instruction, which we know is safe. This is good for things like 1034 // turning: (splat(splat)) -> splat, or 1035 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 1036 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 1037 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 1038 if (LHSShuffle) 1039 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 1040 LHSShuffle = nullptr; 1041 if (RHSShuffle) 1042 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 1043 RHSShuffle = nullptr; 1044 if (!LHSShuffle && !RHSShuffle) 1045 return MadeChange ? &SVI : nullptr; 1046 1047 Value* LHSOp0 = nullptr; 1048 Value* LHSOp1 = nullptr; 1049 Value* RHSOp0 = nullptr; 1050 unsigned LHSOp0Width = 0; 1051 unsigned RHSOp0Width = 0; 1052 if (LHSShuffle) { 1053 LHSOp0 = LHSShuffle->getOperand(0); 1054 LHSOp1 = LHSShuffle->getOperand(1); 1055 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 1056 } 1057 if (RHSShuffle) { 1058 RHSOp0 = RHSShuffle->getOperand(0); 1059 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 1060 } 1061 Value* newLHS = LHS; 1062 Value* newRHS = RHS; 1063 if (LHSShuffle) { 1064 // case 1 1065 if (isa<UndefValue>(RHS)) { 1066 newLHS = LHSOp0; 1067 newRHS = LHSOp1; 1068 } 1069 // case 2 or 4 1070 else if (LHSOp0Width == LHSWidth) { 1071 newLHS = LHSOp0; 1072 } 1073 } 1074 // case 3 or 4 1075 if (RHSShuffle && RHSOp0Width == LHSWidth) { 1076 newRHS = RHSOp0; 1077 } 1078 // case 4 1079 if (LHSOp0 == RHSOp0) { 1080 newLHS = LHSOp0; 1081 newRHS = nullptr; 1082 } 1083 1084 if (newLHS == LHS && newRHS == RHS) 1085 return MadeChange ? &SVI : nullptr; 1086 1087 SmallVector<int, 16> LHSMask; 1088 SmallVector<int, 16> RHSMask; 1089 if (newLHS != LHS) 1090 LHSMask = LHSShuffle->getShuffleMask(); 1091 if (RHSShuffle && newRHS != RHS) 1092 RHSMask = RHSShuffle->getShuffleMask(); 1093 1094 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 1095 SmallVector<int, 16> newMask; 1096 bool isSplat = true; 1097 int SplatElt = -1; 1098 // Create a new mask for the new ShuffleVectorInst so that the new 1099 // ShuffleVectorInst is equivalent to the original one. 1100 for (unsigned i = 0; i < VWidth; ++i) { 1101 int eltMask; 1102 if (Mask[i] < 0) { 1103 // This element is an undef value. 1104 eltMask = -1; 1105 } else if (Mask[i] < (int)LHSWidth) { 1106 // This element is from left hand side vector operand. 1107 // 1108 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1109 // new mask value for the element. 1110 if (newLHS != LHS) { 1111 eltMask = LHSMask[Mask[i]]; 1112 // If the value selected is an undef value, explicitly specify it 1113 // with a -1 mask value. 1114 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1115 eltMask = -1; 1116 } else 1117 eltMask = Mask[i]; 1118 } else { 1119 // This element is from right hand side vector operand 1120 // 1121 // If the value selected is an undef value, explicitly specify it 1122 // with a -1 mask value. (case 1) 1123 if (isa<UndefValue>(RHS)) 1124 eltMask = -1; 1125 // If RHS is going to be replaced (case 3 or 4), calculate the 1126 // new mask value for the element. 1127 else if (newRHS != RHS) { 1128 eltMask = RHSMask[Mask[i]-LHSWidth]; 1129 // If the value selected is an undef value, explicitly specify it 1130 // with a -1 mask value. 1131 if (eltMask >= (int)RHSOp0Width) { 1132 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1133 && "should have been check above"); 1134 eltMask = -1; 1135 } 1136 } else 1137 eltMask = Mask[i]-LHSWidth; 1138 1139 // If LHS's width is changed, shift the mask value accordingly. 1140 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1141 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1142 // If newRHS == newLHS, we want to remap any references from newRHS to 1143 // newLHS so that we can properly identify splats that may occur due to 1144 // obfuscation across the two vectors. 1145 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) 1146 eltMask += newLHSWidth; 1147 } 1148 1149 // Check if this could still be a splat. 1150 if (eltMask >= 0) { 1151 if (SplatElt >= 0 && SplatElt != eltMask) 1152 isSplat = false; 1153 SplatElt = eltMask; 1154 } 1155 1156 newMask.push_back(eltMask); 1157 } 1158 1159 // If the result mask is equal to one of the original shuffle masks, 1160 // or is a splat, do the replacement. 1161 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1162 SmallVector<Constant*, 16> Elts; 1163 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1164 if (newMask[i] < 0) { 1165 Elts.push_back(UndefValue::get(Int32Ty)); 1166 } else { 1167 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1168 } 1169 } 1170 if (!newRHS) 1171 newRHS = UndefValue::get(newLHS->getType()); 1172 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1173 } 1174 1175 // If the result mask is an identity, replace uses of this instruction with 1176 // corresponding argument. 1177 bool isLHSID, isRHSID; 1178 recognizeIdentityMask(newMask, isLHSID, isRHSID); 1179 if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); 1180 if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); 1181 1182 return MadeChange ? &SVI : nullptr; 1183 } 1184