1 //===- InstCombineVectorOps.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements instcombine for ExtractElement, InsertElement and 11 // ShuffleVector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/PatternMatch.h" 20 using namespace llvm; 21 using namespace PatternMatch; 22 23 #define DEBUG_TYPE "instcombine" 24 25 /// Return true if the value is cheaper to scalarize than it is to leave as a 26 /// vector operation. isConstant indicates whether we're extracting one known 27 /// element. If false we're extracting a variable index. 28 static bool cheapToScalarize(Value *V, bool isConstant) { 29 if (Constant *C = dyn_cast<Constant>(V)) { 30 if (isConstant) return true; 31 32 // If all elts are the same, we can extract it and use any of the values. 33 if (Constant *Op0 = C->getAggregateElement(0U)) { 34 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 35 ++i) 36 if (C->getAggregateElement(i) != Op0) 37 return false; 38 return true; 39 } 40 } 41 Instruction *I = dyn_cast<Instruction>(V); 42 if (!I) return false; 43 44 // Insert element gets simplified to the inserted element or is deleted if 45 // this is constant idx extract element and its a constant idx insertelt. 46 if (I->getOpcode() == Instruction::InsertElement && isConstant && 47 isa<ConstantInt>(I->getOperand(2))) 48 return true; 49 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 50 return true; 51 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 52 if (BO->hasOneUse() && 53 (cheapToScalarize(BO->getOperand(0), isConstant) || 54 cheapToScalarize(BO->getOperand(1), isConstant))) 55 return true; 56 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 57 if (CI->hasOneUse() && 58 (cheapToScalarize(CI->getOperand(0), isConstant) || 59 cheapToScalarize(CI->getOperand(1), isConstant))) 60 return true; 61 62 return false; 63 } 64 65 // If we have a PHI node with a vector type that has only 2 uses: feed 66 // itself and be an operand of extractelement at a constant location, 67 // try to replace the PHI of the vector type with a PHI of a scalar type. 68 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 69 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 70 if (!PN->hasNUses(2)) 71 return nullptr; 72 73 // If so, it's known at this point that one operand is PHI and the other is 74 // an extractelement node. Find the PHI user that is not the extractelement 75 // node. 76 auto iu = PN->user_begin(); 77 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 78 if (PHIUser == cast<Instruction>(&EI)) 79 PHIUser = cast<Instruction>(*(++iu)); 80 81 // Verify that this PHI user has one use, which is the PHI itself, 82 // and that it is a binary operation which is cheap to scalarize. 83 // otherwise return NULL. 84 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 85 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) 86 return nullptr; 87 88 // Create a scalar PHI node that will replace the vector PHI node 89 // just before the current PHI node. 90 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 91 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 92 // Scalarize each PHI operand. 93 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 94 Value *PHIInVal = PN->getIncomingValue(i); 95 BasicBlock *inBB = PN->getIncomingBlock(i); 96 Value *Elt = EI.getIndexOperand(); 97 // If the operand is the PHI induction variable: 98 if (PHIInVal == PHIUser) { 99 // Scalarize the binary operation. Its first operand is the 100 // scalar PHI, and the second operand is extracted from the other 101 // vector operand. 102 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 103 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 104 Value *Op = InsertNewInstWith( 105 ExtractElementInst::Create(B0->getOperand(opId), Elt, 106 B0->getOperand(opId)->getName() + ".Elt"), 107 *B0); 108 Value *newPHIUser = InsertNewInstWith( 109 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 110 scalarPHI->addIncoming(newPHIUser, inBB); 111 } else { 112 // Scalarize PHI input: 113 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 114 // Insert the new instruction into the predecessor basic block. 115 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 116 BasicBlock::iterator InsertPos; 117 if (pos && !isa<PHINode>(pos)) { 118 InsertPos = ++pos->getIterator(); 119 } else { 120 InsertPos = inBB->getFirstInsertionPt(); 121 } 122 123 InsertNewInstWith(newEI, *InsertPos); 124 125 scalarPHI->addIncoming(newEI, inBB); 126 } 127 } 128 return ReplaceInstUsesWith(EI, scalarPHI); 129 } 130 131 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 132 if (Value *V = SimplifyExtractElementInst( 133 EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC)) 134 return ReplaceInstUsesWith(EI, V); 135 136 // If vector val is constant with all elements the same, replace EI with 137 // that element. We handle a known element # below. 138 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 139 if (cheapToScalarize(C, false)) 140 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 141 142 // If extracting a specified index from the vector, see if we can recursively 143 // find a previously computed scalar that was inserted into the vector. 144 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 145 unsigned IndexVal = IdxC->getZExtValue(); 146 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 147 148 // InstSimplify handles cases where the index is invalid. 149 assert(IndexVal < VectorWidth); 150 151 // This instruction only demands the single element from the input vector. 152 // If the input vector has a single use, simplify it based on this use 153 // property. 154 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 155 APInt UndefElts(VectorWidth, 0); 156 APInt DemandedMask(VectorWidth, 0); 157 DemandedMask.setBit(IndexVal); 158 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, 159 UndefElts)) { 160 EI.setOperand(0, V); 161 return &EI; 162 } 163 } 164 165 // If this extractelement is directly using a bitcast from a vector of 166 // the same number of elements, see if we can find the source element from 167 // it. In this case, we will end up needing to bitcast the scalars. 168 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 169 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 170 if (VT->getNumElements() == VectorWidth) 171 if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal)) 172 return new BitCastInst(Elt, EI.getType()); 173 } 174 175 // If there's a vector PHI feeding a scalar use through this extractelement 176 // instruction, try to scalarize the PHI. 177 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 178 Instruction *scalarPHI = scalarizePHI(EI, PN); 179 if (scalarPHI) 180 return scalarPHI; 181 } 182 } 183 184 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 185 // Push extractelement into predecessor operation if legal and 186 // profitable to do so. 187 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 188 if (I->hasOneUse() && 189 cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 190 Value *newEI0 = 191 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 192 EI.getName()+".lhs"); 193 Value *newEI1 = 194 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 195 EI.getName()+".rhs"); 196 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 197 } 198 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 199 // Extracting the inserted element? 200 if (IE->getOperand(2) == EI.getOperand(1)) 201 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 202 // If the inserted and extracted elements are constants, they must not 203 // be the same value, extract from the pre-inserted value instead. 204 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 205 Worklist.AddValue(EI.getOperand(0)); 206 EI.setOperand(0, IE->getOperand(0)); 207 return &EI; 208 } 209 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 210 // If this is extracting an element from a shufflevector, figure out where 211 // it came from and extract from the appropriate input element instead. 212 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 213 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 214 Value *Src; 215 unsigned LHSWidth = 216 SVI->getOperand(0)->getType()->getVectorNumElements(); 217 218 if (SrcIdx < 0) 219 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 220 if (SrcIdx < (int)LHSWidth) 221 Src = SVI->getOperand(0); 222 else { 223 SrcIdx -= LHSWidth; 224 Src = SVI->getOperand(1); 225 } 226 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 227 return ExtractElementInst::Create(Src, 228 ConstantInt::get(Int32Ty, 229 SrcIdx, false)); 230 } 231 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 232 // Canonicalize extractelement(cast) -> cast(extractelement). 233 // Bitcasts can change the number of vector elements, and they cost 234 // nothing. 235 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 236 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 237 EI.getIndexOperand()); 238 Worklist.AddValue(EE); 239 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 240 } 241 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 242 if (SI->hasOneUse()) { 243 // TODO: For a select on vectors, it might be useful to do this if it 244 // has multiple extractelement uses. For vector select, that seems to 245 // fight the vectorizer. 246 247 // If we are extracting an element from a vector select or a select on 248 // vectors, create a select on the scalars extracted from the vector 249 // arguments. 250 Value *TrueVal = SI->getTrueValue(); 251 Value *FalseVal = SI->getFalseValue(); 252 253 Value *Cond = SI->getCondition(); 254 if (Cond->getType()->isVectorTy()) { 255 Cond = Builder->CreateExtractElement(Cond, 256 EI.getIndexOperand(), 257 Cond->getName() + ".elt"); 258 } 259 260 Value *V1Elem 261 = Builder->CreateExtractElement(TrueVal, 262 EI.getIndexOperand(), 263 TrueVal->getName() + ".elt"); 264 265 Value *V2Elem 266 = Builder->CreateExtractElement(FalseVal, 267 EI.getIndexOperand(), 268 FalseVal->getName() + ".elt"); 269 return SelectInst::Create(Cond, 270 V1Elem, 271 V2Elem, 272 SI->getName() + ".elt"); 273 } 274 } 275 } 276 return nullptr; 277 } 278 279 /// If V is a shuffle of values that ONLY returns elements from either LHS or 280 /// RHS, return the shuffle mask and true. Otherwise, return false. 281 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 282 SmallVectorImpl<Constant*> &Mask) { 283 assert(LHS->getType() == RHS->getType() && 284 "Invalid CollectSingleShuffleElements"); 285 unsigned NumElts = V->getType()->getVectorNumElements(); 286 287 if (isa<UndefValue>(V)) { 288 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 289 return true; 290 } 291 292 if (V == LHS) { 293 for (unsigned i = 0; i != NumElts; ++i) 294 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 295 return true; 296 } 297 298 if (V == RHS) { 299 for (unsigned i = 0; i != NumElts; ++i) 300 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 301 i+NumElts)); 302 return true; 303 } 304 305 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 306 // If this is an insert of an extract from some other vector, include it. 307 Value *VecOp = IEI->getOperand(0); 308 Value *ScalarOp = IEI->getOperand(1); 309 Value *IdxOp = IEI->getOperand(2); 310 311 if (!isa<ConstantInt>(IdxOp)) 312 return false; 313 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 314 315 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 316 // We can handle this if the vector we are inserting into is 317 // transitively ok. 318 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 319 // If so, update the mask to reflect the inserted undef. 320 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 321 return true; 322 } 323 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 324 if (isa<ConstantInt>(EI->getOperand(1))) { 325 unsigned ExtractedIdx = 326 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 327 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 328 329 // This must be extracting from either LHS or RHS. 330 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 331 // We can handle this if the vector we are inserting into is 332 // transitively ok. 333 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 334 // If so, update the mask to reflect the inserted value. 335 if (EI->getOperand(0) == LHS) { 336 Mask[InsertedIdx % NumElts] = 337 ConstantInt::get(Type::getInt32Ty(V->getContext()), 338 ExtractedIdx); 339 } else { 340 assert(EI->getOperand(0) == RHS); 341 Mask[InsertedIdx % NumElts] = 342 ConstantInt::get(Type::getInt32Ty(V->getContext()), 343 ExtractedIdx + NumLHSElts); 344 } 345 return true; 346 } 347 } 348 } 349 } 350 } 351 352 return false; 353 } 354 355 /// If we have insertion into a vector that is wider than the vector that we 356 /// are extracting from, try to widen the source vector to allow a single 357 /// shufflevector to replace one or more insert/extract pairs. 358 static void replaceExtractElements(InsertElementInst *InsElt, 359 ExtractElementInst *ExtElt, 360 InstCombiner &IC) { 361 VectorType *InsVecType = InsElt->getType(); 362 VectorType *ExtVecType = ExtElt->getVectorOperandType(); 363 unsigned NumInsElts = InsVecType->getVectorNumElements(); 364 unsigned NumExtElts = ExtVecType->getVectorNumElements(); 365 366 // The inserted-to vector must be wider than the extracted-from vector. 367 if (InsVecType->getElementType() != ExtVecType->getElementType() || 368 NumExtElts >= NumInsElts) 369 return; 370 371 // Create a shuffle mask to widen the extended-from vector using undefined 372 // values. The mask selects all of the values of the original vector followed 373 // by as many undefined values as needed to create a vector of the same length 374 // as the inserted-to vector. 375 SmallVector<Constant *, 16> ExtendMask; 376 IntegerType *IntType = Type::getInt32Ty(InsElt->getContext()); 377 for (unsigned i = 0; i < NumExtElts; ++i) 378 ExtendMask.push_back(ConstantInt::get(IntType, i)); 379 for (unsigned i = NumExtElts; i < NumInsElts; ++i) 380 ExtendMask.push_back(UndefValue::get(IntType)); 381 382 Value *ExtVecOp = ExtElt->getVectorOperand(); 383 auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), 384 ConstantVector::get(ExtendMask)); 385 386 // Insert the new shuffle after the vector operand of the extract is defined 387 // (as long as it's not a PHI) or at the start of the basic block of the 388 // extract, so any subsequent extracts in the same basic block can use it. 389 // TODO: Insert before the earliest ExtractElementInst that is replaced. 390 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); 391 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) 392 WideVec->insertAfter(ExtVecOpInst); 393 else 394 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); 395 396 // Replace extracts from the original narrow vector with extracts from the new 397 // wide vector. 398 for (User *U : ExtVecOp->users()) { 399 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); 400 if (!OldExt || OldExt->getParent() != WideVec->getParent()) 401 continue; 402 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); 403 NewExt->insertAfter(WideVec); 404 IC.ReplaceInstUsesWith(*OldExt, NewExt); 405 } 406 } 407 408 /// We are building a shuffle to create V, which is a sequence of insertelement, 409 /// extractelement pairs. If PermittedRHS is set, then we must either use it or 410 /// not rely on the second vector source. Return a std::pair containing the 411 /// left and right vectors of the proposed shuffle (or 0), and set the Mask 412 /// parameter as required. 413 /// 414 /// Note: we intentionally don't try to fold earlier shuffles since they have 415 /// often been chosen carefully to be efficiently implementable on the target. 416 typedef std::pair<Value *, Value *> ShuffleOps; 417 418 static ShuffleOps collectShuffleElements(Value *V, 419 SmallVectorImpl<Constant *> &Mask, 420 Value *PermittedRHS, 421 InstCombiner &IC) { 422 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 423 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 424 425 if (isa<UndefValue>(V)) { 426 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 427 return std::make_pair( 428 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 429 } 430 431 if (isa<ConstantAggregateZero>(V)) { 432 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 433 return std::make_pair(V, nullptr); 434 } 435 436 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 437 // If this is an insert of an extract from some other vector, include it. 438 Value *VecOp = IEI->getOperand(0); 439 Value *ScalarOp = IEI->getOperand(1); 440 Value *IdxOp = IEI->getOperand(2); 441 442 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 443 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 444 unsigned ExtractedIdx = 445 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 446 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 447 448 // Either the extracted from or inserted into vector must be RHSVec, 449 // otherwise we'd end up with a shuffle of three inputs. 450 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { 451 Value *RHS = EI->getOperand(0); 452 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC); 453 assert(LR.second == nullptr || LR.second == RHS); 454 455 if (LR.first->getType() != RHS->getType()) { 456 // Although we are giving up for now, see if we can create extracts 457 // that match the inserts for another round of combining. 458 replaceExtractElements(IEI, EI, IC); 459 460 // We tried our best, but we can't find anything compatible with RHS 461 // further up the chain. Return a trivial shuffle. 462 for (unsigned i = 0; i < NumElts; ++i) 463 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 464 return std::make_pair(V, nullptr); 465 } 466 467 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 468 Mask[InsertedIdx % NumElts] = 469 ConstantInt::get(Type::getInt32Ty(V->getContext()), 470 NumLHSElts+ExtractedIdx); 471 return std::make_pair(LR.first, RHS); 472 } 473 474 if (VecOp == PermittedRHS) { 475 // We've gone as far as we can: anything on the other side of the 476 // extractelement will already have been converted into a shuffle. 477 unsigned NumLHSElts = 478 EI->getOperand(0)->getType()->getVectorNumElements(); 479 for (unsigned i = 0; i != NumElts; ++i) 480 Mask.push_back(ConstantInt::get( 481 Type::getInt32Ty(V->getContext()), 482 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 483 return std::make_pair(EI->getOperand(0), PermittedRHS); 484 } 485 486 // If this insertelement is a chain that comes from exactly these two 487 // vectors, return the vector and the effective shuffle. 488 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 489 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 490 Mask)) 491 return std::make_pair(EI->getOperand(0), PermittedRHS); 492 } 493 } 494 } 495 496 // Otherwise, we can't do anything fancy. Return an identity vector. 497 for (unsigned i = 0; i != NumElts; ++i) 498 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 499 return std::make_pair(V, nullptr); 500 } 501 502 /// Try to find redundant insertvalue instructions, like the following ones: 503 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 504 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 505 /// Here the second instruction inserts values at the same indices, as the 506 /// first one, making the first one redundant. 507 /// It should be transformed to: 508 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 509 Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { 510 bool IsRedundant = false; 511 ArrayRef<unsigned int> FirstIndices = I.getIndices(); 512 513 // If there is a chain of insertvalue instructions (each of them except the 514 // last one has only one use and it's another insertvalue insn from this 515 // chain), check if any of the 'children' uses the same indices as the first 516 // instruction. In this case, the first one is redundant. 517 Value *V = &I; 518 unsigned Depth = 0; 519 while (V->hasOneUse() && Depth < 10) { 520 User *U = V->user_back(); 521 auto UserInsInst = dyn_cast<InsertValueInst>(U); 522 if (!UserInsInst || U->getOperand(0) != V) 523 break; 524 if (UserInsInst->getIndices() == FirstIndices) { 525 IsRedundant = true; 526 break; 527 } 528 V = UserInsInst; 529 Depth++; 530 } 531 532 if (IsRedundant) 533 return ReplaceInstUsesWith(I, I.getOperand(0)); 534 return nullptr; 535 } 536 537 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 538 Value *VecOp = IE.getOperand(0); 539 Value *ScalarOp = IE.getOperand(1); 540 Value *IdxOp = IE.getOperand(2); 541 542 // Inserting an undef or into an undefined place, remove this. 543 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 544 ReplaceInstUsesWith(IE, VecOp); 545 546 // If the inserted element was extracted from some other vector, and if the 547 // indexes are constant, try to turn this into a shufflevector operation. 548 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 549 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 550 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 551 unsigned NumExtractVectorElts = 552 EI->getOperand(0)->getType()->getVectorNumElements(); 553 unsigned ExtractedIdx = 554 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 555 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 556 557 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 558 return ReplaceInstUsesWith(IE, VecOp); 559 560 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 561 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 562 563 // If we are extracting a value from a vector, then inserting it right 564 // back into the same place, just use the input vector. 565 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 566 return ReplaceInstUsesWith(IE, VecOp); 567 568 // If this insertelement isn't used by some other insertelement, turn it 569 // (and any insertelements it points to), into one big shuffle. 570 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 571 SmallVector<Constant*, 16> Mask; 572 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this); 573 574 // The proposed shuffle may be trivial, in which case we shouldn't 575 // perform the combine. 576 if (LR.first != &IE && LR.second != &IE) { 577 // We now have a shuffle of LHS, RHS, Mask. 578 if (LR.second == nullptr) 579 LR.second = UndefValue::get(LR.first->getType()); 580 return new ShuffleVectorInst(LR.first, LR.second, 581 ConstantVector::get(Mask)); 582 } 583 } 584 } 585 } 586 587 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 588 APInt UndefElts(VWidth, 0); 589 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 590 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 591 if (V != &IE) 592 return ReplaceInstUsesWith(IE, V); 593 return &IE; 594 } 595 596 return nullptr; 597 } 598 599 /// Return true if we can evaluate the specified expression tree if the vector 600 /// elements were shuffled in a different order. 601 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 602 unsigned Depth = 5) { 603 // We can always reorder the elements of a constant. 604 if (isa<Constant>(V)) 605 return true; 606 607 // We won't reorder vector arguments. No IPO here. 608 Instruction *I = dyn_cast<Instruction>(V); 609 if (!I) return false; 610 611 // Two users may expect different orders of the elements. Don't try it. 612 if (!I->hasOneUse()) 613 return false; 614 615 if (Depth == 0) return false; 616 617 switch (I->getOpcode()) { 618 case Instruction::Add: 619 case Instruction::FAdd: 620 case Instruction::Sub: 621 case Instruction::FSub: 622 case Instruction::Mul: 623 case Instruction::FMul: 624 case Instruction::UDiv: 625 case Instruction::SDiv: 626 case Instruction::FDiv: 627 case Instruction::URem: 628 case Instruction::SRem: 629 case Instruction::FRem: 630 case Instruction::Shl: 631 case Instruction::LShr: 632 case Instruction::AShr: 633 case Instruction::And: 634 case Instruction::Or: 635 case Instruction::Xor: 636 case Instruction::ICmp: 637 case Instruction::FCmp: 638 case Instruction::Trunc: 639 case Instruction::ZExt: 640 case Instruction::SExt: 641 case Instruction::FPToUI: 642 case Instruction::FPToSI: 643 case Instruction::UIToFP: 644 case Instruction::SIToFP: 645 case Instruction::FPTrunc: 646 case Instruction::FPExt: 647 case Instruction::GetElementPtr: { 648 for (Value *Operand : I->operands()) { 649 if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) 650 return false; 651 } 652 return true; 653 } 654 case Instruction::InsertElement: { 655 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 656 if (!CI) return false; 657 int ElementNumber = CI->getLimitedValue(); 658 659 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 660 // can't put an element into multiple indices. 661 bool SeenOnce = false; 662 for (int i = 0, e = Mask.size(); i != e; ++i) { 663 if (Mask[i] == ElementNumber) { 664 if (SeenOnce) 665 return false; 666 SeenOnce = true; 667 } 668 } 669 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 670 } 671 } 672 return false; 673 } 674 675 /// Rebuild a new instruction just like 'I' but with the new operands given. 676 /// In the event of type mismatch, the type of the operands is correct. 677 static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { 678 // We don't want to use the IRBuilder here because we want the replacement 679 // instructions to appear next to 'I', not the builder's insertion point. 680 switch (I->getOpcode()) { 681 case Instruction::Add: 682 case Instruction::FAdd: 683 case Instruction::Sub: 684 case Instruction::FSub: 685 case Instruction::Mul: 686 case Instruction::FMul: 687 case Instruction::UDiv: 688 case Instruction::SDiv: 689 case Instruction::FDiv: 690 case Instruction::URem: 691 case Instruction::SRem: 692 case Instruction::FRem: 693 case Instruction::Shl: 694 case Instruction::LShr: 695 case Instruction::AShr: 696 case Instruction::And: 697 case Instruction::Or: 698 case Instruction::Xor: { 699 BinaryOperator *BO = cast<BinaryOperator>(I); 700 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 701 BinaryOperator *New = 702 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 703 NewOps[0], NewOps[1], "", BO); 704 if (isa<OverflowingBinaryOperator>(BO)) { 705 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 706 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 707 } 708 if (isa<PossiblyExactOperator>(BO)) { 709 New->setIsExact(BO->isExact()); 710 } 711 if (isa<FPMathOperator>(BO)) 712 New->copyFastMathFlags(I); 713 return New; 714 } 715 case Instruction::ICmp: 716 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 717 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 718 NewOps[0], NewOps[1]); 719 case Instruction::FCmp: 720 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 721 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 722 NewOps[0], NewOps[1]); 723 case Instruction::Trunc: 724 case Instruction::ZExt: 725 case Instruction::SExt: 726 case Instruction::FPToUI: 727 case Instruction::FPToSI: 728 case Instruction::UIToFP: 729 case Instruction::SIToFP: 730 case Instruction::FPTrunc: 731 case Instruction::FPExt: { 732 // It's possible that the mask has a different number of elements from 733 // the original cast. We recompute the destination type to match the mask. 734 Type *DestTy = 735 VectorType::get(I->getType()->getScalarType(), 736 NewOps[0]->getType()->getVectorNumElements()); 737 assert(NewOps.size() == 1 && "cast with #ops != 1"); 738 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 739 "", I); 740 } 741 case Instruction::GetElementPtr: { 742 Value *Ptr = NewOps[0]; 743 ArrayRef<Value*> Idx = NewOps.slice(1); 744 GetElementPtrInst *GEP = GetElementPtrInst::Create( 745 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); 746 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 747 return GEP; 748 } 749 } 750 llvm_unreachable("failed to rebuild vector instructions"); 751 } 752 753 Value * 754 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 755 // Mask.size() does not need to be equal to the number of vector elements. 756 757 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 758 if (isa<UndefValue>(V)) { 759 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 760 Mask.size())); 761 } 762 if (isa<ConstantAggregateZero>(V)) { 763 return ConstantAggregateZero::get( 764 VectorType::get(V->getType()->getScalarType(), 765 Mask.size())); 766 } 767 if (Constant *C = dyn_cast<Constant>(V)) { 768 SmallVector<Constant *, 16> MaskValues; 769 for (int i = 0, e = Mask.size(); i != e; ++i) { 770 if (Mask[i] == -1) 771 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 772 else 773 MaskValues.push_back(Builder->getInt32(Mask[i])); 774 } 775 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 776 ConstantVector::get(MaskValues)); 777 } 778 779 Instruction *I = cast<Instruction>(V); 780 switch (I->getOpcode()) { 781 case Instruction::Add: 782 case Instruction::FAdd: 783 case Instruction::Sub: 784 case Instruction::FSub: 785 case Instruction::Mul: 786 case Instruction::FMul: 787 case Instruction::UDiv: 788 case Instruction::SDiv: 789 case Instruction::FDiv: 790 case Instruction::URem: 791 case Instruction::SRem: 792 case Instruction::FRem: 793 case Instruction::Shl: 794 case Instruction::LShr: 795 case Instruction::AShr: 796 case Instruction::And: 797 case Instruction::Or: 798 case Instruction::Xor: 799 case Instruction::ICmp: 800 case Instruction::FCmp: 801 case Instruction::Trunc: 802 case Instruction::ZExt: 803 case Instruction::SExt: 804 case Instruction::FPToUI: 805 case Instruction::FPToSI: 806 case Instruction::UIToFP: 807 case Instruction::SIToFP: 808 case Instruction::FPTrunc: 809 case Instruction::FPExt: 810 case Instruction::Select: 811 case Instruction::GetElementPtr: { 812 SmallVector<Value*, 8> NewOps; 813 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 814 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 815 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 816 NewOps.push_back(V); 817 NeedsRebuild |= (V != I->getOperand(i)); 818 } 819 if (NeedsRebuild) { 820 return buildNew(I, NewOps); 821 } 822 return I; 823 } 824 case Instruction::InsertElement: { 825 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 826 827 // The insertelement was inserting at Element. Figure out which element 828 // that becomes after shuffling. The answer is guaranteed to be unique 829 // by CanEvaluateShuffled. 830 bool Found = false; 831 int Index = 0; 832 for (int e = Mask.size(); Index != e; ++Index) { 833 if (Mask[Index] == Element) { 834 Found = true; 835 break; 836 } 837 } 838 839 // If element is not in Mask, no need to handle the operand 1 (element to 840 // be inserted). Just evaluate values in operand 0 according to Mask. 841 if (!Found) 842 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 843 844 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 845 return InsertElementInst::Create(V, I->getOperand(1), 846 Builder->getInt32(Index), "", I); 847 } 848 } 849 llvm_unreachable("failed to reorder elements of vector instruction!"); 850 } 851 852 static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask, 853 bool &isLHSID, bool &isRHSID) { 854 isLHSID = isRHSID = true; 855 856 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 857 if (Mask[i] < 0) continue; // Ignore undef values. 858 // Is this an identity shuffle of the LHS value? 859 isLHSID &= (Mask[i] == (int)i); 860 861 // Is this an identity shuffle of the RHS value? 862 isRHSID &= (Mask[i]-e == i); 863 } 864 } 865 866 // Returns true if the shuffle is extracting a contiguous range of values from 867 // LHS, for example: 868 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 869 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| 870 // Shuffles to: |EE|FF|GG|HH| 871 // +--+--+--+--+ 872 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, 873 SmallVector<int, 16> &Mask) { 874 unsigned LHSElems = 875 cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements(); 876 unsigned MaskElems = Mask.size(); 877 unsigned BegIdx = Mask.front(); 878 unsigned EndIdx = Mask.back(); 879 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) 880 return false; 881 for (unsigned I = 0; I != MaskElems; ++I) 882 if (static_cast<unsigned>(Mask[I]) != BegIdx + I) 883 return false; 884 return true; 885 } 886 887 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 888 Value *LHS = SVI.getOperand(0); 889 Value *RHS = SVI.getOperand(1); 890 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 891 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 892 893 bool MadeChange = false; 894 895 // Undefined shuffle mask -> undefined value. 896 if (isa<UndefValue>(SVI.getOperand(2))) 897 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 898 899 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 900 901 APInt UndefElts(VWidth, 0); 902 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 903 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 904 if (V != &SVI) 905 return ReplaceInstUsesWith(SVI, V); 906 LHS = SVI.getOperand(0); 907 RHS = SVI.getOperand(1); 908 MadeChange = true; 909 } 910 911 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 912 913 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 914 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 915 if (LHS == RHS || isa<UndefValue>(LHS)) { 916 if (isa<UndefValue>(LHS) && LHS == RHS) { 917 // shuffle(undef,undef,mask) -> undef. 918 Value *Result = (VWidth == LHSWidth) 919 ? LHS : UndefValue::get(SVI.getType()); 920 return ReplaceInstUsesWith(SVI, Result); 921 } 922 923 // Remap any references to RHS to use LHS. 924 SmallVector<Constant*, 16> Elts; 925 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 926 if (Mask[i] < 0) { 927 Elts.push_back(UndefValue::get(Int32Ty)); 928 continue; 929 } 930 931 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 932 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 933 Mask[i] = -1; // Turn into undef. 934 Elts.push_back(UndefValue::get(Int32Ty)); 935 } else { 936 Mask[i] = Mask[i] % e; // Force to LHS. 937 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i])); 938 } 939 } 940 SVI.setOperand(0, SVI.getOperand(1)); 941 SVI.setOperand(1, UndefValue::get(RHS->getType())); 942 SVI.setOperand(2, ConstantVector::get(Elts)); 943 LHS = SVI.getOperand(0); 944 RHS = SVI.getOperand(1); 945 MadeChange = true; 946 } 947 948 if (VWidth == LHSWidth) { 949 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 950 bool isLHSID, isRHSID; 951 recognizeIdentityMask(Mask, isLHSID, isRHSID); 952 953 // Eliminate identity shuffles. 954 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 955 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 956 } 957 958 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 959 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 960 return ReplaceInstUsesWith(SVI, V); 961 } 962 963 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to 964 // a non-vector type. We can instead bitcast the original vector followed by 965 // an extract of the desired element: 966 // 967 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, 968 // <4 x i32> <i32 0, i32 1, i32 2, i32 3> 969 // %1 = bitcast <4 x i8> %sroa to i32 970 // Becomes: 971 // %bc = bitcast <16 x i8> %in to <4 x i32> 972 // %ext = extractelement <4 x i32> %bc, i32 0 973 // 974 // If the shuffle is extracting a contiguous range of values from the input 975 // vector then each use which is a bitcast of the extracted size can be 976 // replaced. This will work if the vector types are compatible, and the begin 977 // index is aligned to a value in the casted vector type. If the begin index 978 // isn't aligned then we can shuffle the original vector (keeping the same 979 // vector type) before extracting. 980 // 981 // This code will bail out if the target type is fundamentally incompatible 982 // with vectors of the source type. 983 // 984 // Example of <16 x i8>, target type i32: 985 // Index range [4,8): v-----------v Will work. 986 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 987 // <16 x i8>: | | | | | | | | | | | | | | | | | 988 // <4 x i32>: | | | | | 989 // +-----------+-----------+-----------+-----------+ 990 // Index range [6,10): ^-----------^ Needs an extra shuffle. 991 // Target type i40: ^--------------^ Won't work, bail. 992 if (isShuffleExtractingFromLHS(SVI, Mask)) { 993 Value *V = LHS; 994 unsigned MaskElems = Mask.size(); 995 unsigned BegIdx = Mask.front(); 996 VectorType *SrcTy = cast<VectorType>(V->getType()); 997 unsigned VecBitWidth = SrcTy->getBitWidth(); 998 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); 999 assert(SrcElemBitWidth && "vector elements must have a bitwidth"); 1000 unsigned SrcNumElems = SrcTy->getNumElements(); 1001 SmallVector<BitCastInst *, 8> BCs; 1002 DenseMap<Type *, Value *> NewBCs; 1003 for (User *U : SVI.users()) 1004 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) 1005 if (!BC->use_empty()) 1006 // Only visit bitcasts that weren't previously handled. 1007 BCs.push_back(BC); 1008 for (BitCastInst *BC : BCs) { 1009 Type *TgtTy = BC->getDestTy(); 1010 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); 1011 if (!TgtElemBitWidth) 1012 continue; 1013 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; 1014 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; 1015 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); 1016 if (!VecBitWidthsEqual) 1017 continue; 1018 if (!VectorType::isValidElementType(TgtTy)) 1019 continue; 1020 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); 1021 if (!BegIsAligned) { 1022 // Shuffle the input so [0,NumElements) contains the output, and 1023 // [NumElems,SrcNumElems) is undef. 1024 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, 1025 UndefValue::get(Int32Ty)); 1026 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) 1027 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); 1028 V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), 1029 ConstantVector::get(ShuffleMask), 1030 SVI.getName() + ".extract"); 1031 BegIdx = 0; 1032 } 1033 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; 1034 assert(SrcElemsPerTgtElem); 1035 BegIdx /= SrcElemsPerTgtElem; 1036 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); 1037 auto *NewBC = 1038 BCAlreadyExists 1039 ? NewBCs[CastSrcTy] 1040 : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); 1041 if (!BCAlreadyExists) 1042 NewBCs[CastSrcTy] = NewBC; 1043 auto *Ext = Builder->CreateExtractElement( 1044 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); 1045 // The shufflevector isn't being replaced: the bitcast that used it 1046 // is. InstCombine will visit the newly-created instructions. 1047 ReplaceInstUsesWith(*BC, Ext); 1048 MadeChange = true; 1049 } 1050 } 1051 1052 // If the LHS is a shufflevector itself, see if we can combine it with this 1053 // one without producing an unusual shuffle. 1054 // Cases that might be simplified: 1055 // 1. 1056 // x1=shuffle(v1,v2,mask1) 1057 // x=shuffle(x1,undef,mask) 1058 // ==> 1059 // x=shuffle(v1,undef,newMask) 1060 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 1061 // 2. 1062 // x1=shuffle(v1,undef,mask1) 1063 // x=shuffle(x1,x2,mask) 1064 // where v1.size() == mask1.size() 1065 // ==> 1066 // x=shuffle(v1,x2,newMask) 1067 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 1068 // 3. 1069 // x2=shuffle(v2,undef,mask2) 1070 // x=shuffle(x1,x2,mask) 1071 // where v2.size() == mask2.size() 1072 // ==> 1073 // x=shuffle(x1,v2,newMask) 1074 // newMask[i] = (mask[i] < x1.size()) 1075 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 1076 // 4. 1077 // x1=shuffle(v1,undef,mask1) 1078 // x2=shuffle(v2,undef,mask2) 1079 // x=shuffle(x1,x2,mask) 1080 // where v1.size() == v2.size() 1081 // ==> 1082 // x=shuffle(v1,v2,newMask) 1083 // newMask[i] = (mask[i] < x1.size()) 1084 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 1085 // 1086 // Here we are really conservative: 1087 // we are absolutely afraid of producing a shuffle mask not in the input 1088 // program, because the code gen may not be smart enough to turn a merged 1089 // shuffle into two specific shuffles: it may produce worse code. As such, 1090 // we only merge two shuffles if the result is either a splat or one of the 1091 // input shuffle masks. In this case, merging the shuffles just removes 1092 // one instruction, which we know is safe. This is good for things like 1093 // turning: (splat(splat)) -> splat, or 1094 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 1095 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 1096 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 1097 if (LHSShuffle) 1098 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 1099 LHSShuffle = nullptr; 1100 if (RHSShuffle) 1101 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 1102 RHSShuffle = nullptr; 1103 if (!LHSShuffle && !RHSShuffle) 1104 return MadeChange ? &SVI : nullptr; 1105 1106 Value* LHSOp0 = nullptr; 1107 Value* LHSOp1 = nullptr; 1108 Value* RHSOp0 = nullptr; 1109 unsigned LHSOp0Width = 0; 1110 unsigned RHSOp0Width = 0; 1111 if (LHSShuffle) { 1112 LHSOp0 = LHSShuffle->getOperand(0); 1113 LHSOp1 = LHSShuffle->getOperand(1); 1114 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 1115 } 1116 if (RHSShuffle) { 1117 RHSOp0 = RHSShuffle->getOperand(0); 1118 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 1119 } 1120 Value* newLHS = LHS; 1121 Value* newRHS = RHS; 1122 if (LHSShuffle) { 1123 // case 1 1124 if (isa<UndefValue>(RHS)) { 1125 newLHS = LHSOp0; 1126 newRHS = LHSOp1; 1127 } 1128 // case 2 or 4 1129 else if (LHSOp0Width == LHSWidth) { 1130 newLHS = LHSOp0; 1131 } 1132 } 1133 // case 3 or 4 1134 if (RHSShuffle && RHSOp0Width == LHSWidth) { 1135 newRHS = RHSOp0; 1136 } 1137 // case 4 1138 if (LHSOp0 == RHSOp0) { 1139 newLHS = LHSOp0; 1140 newRHS = nullptr; 1141 } 1142 1143 if (newLHS == LHS && newRHS == RHS) 1144 return MadeChange ? &SVI : nullptr; 1145 1146 SmallVector<int, 16> LHSMask; 1147 SmallVector<int, 16> RHSMask; 1148 if (newLHS != LHS) 1149 LHSMask = LHSShuffle->getShuffleMask(); 1150 if (RHSShuffle && newRHS != RHS) 1151 RHSMask = RHSShuffle->getShuffleMask(); 1152 1153 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 1154 SmallVector<int, 16> newMask; 1155 bool isSplat = true; 1156 int SplatElt = -1; 1157 // Create a new mask for the new ShuffleVectorInst so that the new 1158 // ShuffleVectorInst is equivalent to the original one. 1159 for (unsigned i = 0; i < VWidth; ++i) { 1160 int eltMask; 1161 if (Mask[i] < 0) { 1162 // This element is an undef value. 1163 eltMask = -1; 1164 } else if (Mask[i] < (int)LHSWidth) { 1165 // This element is from left hand side vector operand. 1166 // 1167 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1168 // new mask value for the element. 1169 if (newLHS != LHS) { 1170 eltMask = LHSMask[Mask[i]]; 1171 // If the value selected is an undef value, explicitly specify it 1172 // with a -1 mask value. 1173 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1174 eltMask = -1; 1175 } else 1176 eltMask = Mask[i]; 1177 } else { 1178 // This element is from right hand side vector operand 1179 // 1180 // If the value selected is an undef value, explicitly specify it 1181 // with a -1 mask value. (case 1) 1182 if (isa<UndefValue>(RHS)) 1183 eltMask = -1; 1184 // If RHS is going to be replaced (case 3 or 4), calculate the 1185 // new mask value for the element. 1186 else if (newRHS != RHS) { 1187 eltMask = RHSMask[Mask[i]-LHSWidth]; 1188 // If the value selected is an undef value, explicitly specify it 1189 // with a -1 mask value. 1190 if (eltMask >= (int)RHSOp0Width) { 1191 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1192 && "should have been check above"); 1193 eltMask = -1; 1194 } 1195 } else 1196 eltMask = Mask[i]-LHSWidth; 1197 1198 // If LHS's width is changed, shift the mask value accordingly. 1199 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1200 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1201 // If newRHS == newLHS, we want to remap any references from newRHS to 1202 // newLHS so that we can properly identify splats that may occur due to 1203 // obfuscation across the two vectors. 1204 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) 1205 eltMask += newLHSWidth; 1206 } 1207 1208 // Check if this could still be a splat. 1209 if (eltMask >= 0) { 1210 if (SplatElt >= 0 && SplatElt != eltMask) 1211 isSplat = false; 1212 SplatElt = eltMask; 1213 } 1214 1215 newMask.push_back(eltMask); 1216 } 1217 1218 // If the result mask is equal to one of the original shuffle masks, 1219 // or is a splat, do the replacement. 1220 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1221 SmallVector<Constant*, 16> Elts; 1222 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1223 if (newMask[i] < 0) { 1224 Elts.push_back(UndefValue::get(Int32Ty)); 1225 } else { 1226 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1227 } 1228 } 1229 if (!newRHS) 1230 newRHS = UndefValue::get(newLHS->getType()); 1231 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1232 } 1233 1234 // If the result mask is an identity, replace uses of this instruction with 1235 // corresponding argument. 1236 bool isLHSID, isRHSID; 1237 recognizeIdentityMask(newMask, isLHSID, isRHSID); 1238 if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); 1239 if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); 1240 1241 return MadeChange ? &SVI : nullptr; 1242 } 1243