1 //===- InstCombineVectorOps.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements instcombine for ExtractElement, InsertElement and 11 // ShuffleVector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombine.h" 16 #include "llvm/IR/PatternMatch.h" 17 using namespace llvm; 18 using namespace PatternMatch; 19 20 #define DEBUG_TYPE "instcombine" 21 22 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it 23 /// is to leave as a vector operation. isConstant indicates whether we're 24 /// extracting one known element. If false we're extracting a variable index. 25 static bool CheapToScalarize(Value *V, bool isConstant) { 26 if (Constant *C = dyn_cast<Constant>(V)) { 27 if (isConstant) return true; 28 29 // If all elts are the same, we can extract it and use any of the values. 30 if (Constant *Op0 = C->getAggregateElement(0U)) { 31 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 32 ++i) 33 if (C->getAggregateElement(i) != Op0) 34 return false; 35 return true; 36 } 37 } 38 Instruction *I = dyn_cast<Instruction>(V); 39 if (!I) return false; 40 41 // Insert element gets simplified to the inserted element or is deleted if 42 // this is constant idx extract element and its a constant idx insertelt. 43 if (I->getOpcode() == Instruction::InsertElement && isConstant && 44 isa<ConstantInt>(I->getOperand(2))) 45 return true; 46 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 47 return true; 48 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 49 if (BO->hasOneUse() && 50 (CheapToScalarize(BO->getOperand(0), isConstant) || 51 CheapToScalarize(BO->getOperand(1), isConstant))) 52 return true; 53 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 54 if (CI->hasOneUse() && 55 (CheapToScalarize(CI->getOperand(0), isConstant) || 56 CheapToScalarize(CI->getOperand(1), isConstant))) 57 return true; 58 59 return false; 60 } 61 62 /// FindScalarElement - Given a vector and an element number, see if the scalar 63 /// value is already around as a register, for example if it were inserted then 64 /// extracted from the vector. 65 static Value *FindScalarElement(Value *V, unsigned EltNo) { 66 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); 67 VectorType *VTy = cast<VectorType>(V->getType()); 68 unsigned Width = VTy->getNumElements(); 69 if (EltNo >= Width) // Out of range access. 70 return UndefValue::get(VTy->getElementType()); 71 72 if (Constant *C = dyn_cast<Constant>(V)) 73 return C->getAggregateElement(EltNo); 74 75 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 76 // If this is an insert to a variable element, we don't know what it is. 77 if (!isa<ConstantInt>(III->getOperand(2))) 78 return nullptr; 79 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 80 81 // If this is an insert to the element we are looking for, return the 82 // inserted value. 83 if (EltNo == IIElt) 84 return III->getOperand(1); 85 86 // Otherwise, the insertelement doesn't modify the value, recurse on its 87 // vector input. 88 return FindScalarElement(III->getOperand(0), EltNo); 89 } 90 91 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 92 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); 93 int InEl = SVI->getMaskValue(EltNo); 94 if (InEl < 0) 95 return UndefValue::get(VTy->getElementType()); 96 if (InEl < (int)LHSWidth) 97 return FindScalarElement(SVI->getOperand(0), InEl); 98 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); 99 } 100 101 // Extract a value from a vector add operation with a constant zero. 102 Value *Val = nullptr; Constant *Con = nullptr; 103 if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { 104 if (Con->getAggregateElement(EltNo)->isNullValue()) 105 return FindScalarElement(Val, EltNo); 106 } 107 108 // Otherwise, we don't know. 109 return nullptr; 110 } 111 112 // If we have a PHI node with a vector type that has only 2 uses: feed 113 // itself and be an operand of extractelement at a constant location, 114 // try to replace the PHI of the vector type with a PHI of a scalar type. 115 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 116 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 117 if (!PN->hasNUses(2)) 118 return nullptr; 119 120 // If so, it's known at this point that one operand is PHI and the other is 121 // an extractelement node. Find the PHI user that is not the extractelement 122 // node. 123 auto iu = PN->user_begin(); 124 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 125 if (PHIUser == cast<Instruction>(&EI)) 126 PHIUser = cast<Instruction>(*(++iu)); 127 128 // Verify that this PHI user has one use, which is the PHI itself, 129 // and that it is a binary operation which is cheap to scalarize. 130 // otherwise return NULL. 131 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 132 !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) 133 return nullptr; 134 135 // Create a scalar PHI node that will replace the vector PHI node 136 // just before the current PHI node. 137 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 138 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 139 // Scalarize each PHI operand. 140 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 141 Value *PHIInVal = PN->getIncomingValue(i); 142 BasicBlock *inBB = PN->getIncomingBlock(i); 143 Value *Elt = EI.getIndexOperand(); 144 // If the operand is the PHI induction variable: 145 if (PHIInVal == PHIUser) { 146 // Scalarize the binary operation. Its first operand is the 147 // scalar PHI and the second operand is extracted from the other 148 // vector operand. 149 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 150 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 151 Value *Op = InsertNewInstWith( 152 ExtractElementInst::Create(B0->getOperand(opId), Elt, 153 B0->getOperand(opId)->getName() + ".Elt"), 154 *B0); 155 Value *newPHIUser = InsertNewInstWith( 156 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 157 scalarPHI->addIncoming(newPHIUser, inBB); 158 } else { 159 // Scalarize PHI input: 160 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 161 // Insert the new instruction into the predecessor basic block. 162 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 163 BasicBlock::iterator InsertPos; 164 if (pos && !isa<PHINode>(pos)) { 165 InsertPos = pos; 166 ++InsertPos; 167 } else { 168 InsertPos = inBB->getFirstInsertionPt(); 169 } 170 171 InsertNewInstWith(newEI, *InsertPos); 172 173 scalarPHI->addIncoming(newEI, inBB); 174 } 175 } 176 return ReplaceInstUsesWith(EI, scalarPHI); 177 } 178 179 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 180 // If vector val is constant with all elements the same, replace EI with 181 // that element. We handle a known element # below. 182 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 183 if (CheapToScalarize(C, false)) 184 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 185 186 // If extracting a specified index from the vector, see if we can recursively 187 // find a previously computed scalar that was inserted into the vector. 188 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 189 unsigned IndexVal = IdxC->getZExtValue(); 190 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 191 192 // If this is extracting an invalid index, turn this into undef, to avoid 193 // crashing the code below. 194 if (IndexVal >= VectorWidth) 195 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 196 197 // This instruction only demands the single element from the input vector. 198 // If the input vector has a single use, simplify it based on this use 199 // property. 200 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 201 APInt UndefElts(VectorWidth, 0); 202 APInt DemandedMask(VectorWidth, 0); 203 DemandedMask.setBit(IndexVal); 204 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 205 DemandedMask, UndefElts)) { 206 EI.setOperand(0, V); 207 return &EI; 208 } 209 } 210 211 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) 212 return ReplaceInstUsesWith(EI, Elt); 213 214 // If the this extractelement is directly using a bitcast from a vector of 215 // the same number of elements, see if we can find the source element from 216 // it. In this case, we will end up needing to bitcast the scalars. 217 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 218 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 219 if (VT->getNumElements() == VectorWidth) 220 if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) 221 return new BitCastInst(Elt, EI.getType()); 222 } 223 224 // If there's a vector PHI feeding a scalar use through this extractelement 225 // instruction, try to scalarize the PHI. 226 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 227 Instruction *scalarPHI = scalarizePHI(EI, PN); 228 if (scalarPHI) 229 return scalarPHI; 230 } 231 } 232 233 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 234 // Push extractelement into predecessor operation if legal and 235 // profitable to do so 236 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 237 if (I->hasOneUse() && 238 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 239 Value *newEI0 = 240 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 241 EI.getName()+".lhs"); 242 Value *newEI1 = 243 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 244 EI.getName()+".rhs"); 245 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 246 } 247 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 248 // Extracting the inserted element? 249 if (IE->getOperand(2) == EI.getOperand(1)) 250 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 251 // If the inserted and extracted elements are constants, they must not 252 // be the same value, extract from the pre-inserted value instead. 253 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 254 Worklist.AddValue(EI.getOperand(0)); 255 EI.setOperand(0, IE->getOperand(0)); 256 return &EI; 257 } 258 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 259 // If this is extracting an element from a shufflevector, figure out where 260 // it came from and extract from the appropriate input element instead. 261 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 262 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 263 Value *Src; 264 unsigned LHSWidth = 265 SVI->getOperand(0)->getType()->getVectorNumElements(); 266 267 if (SrcIdx < 0) 268 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 269 if (SrcIdx < (int)LHSWidth) 270 Src = SVI->getOperand(0); 271 else { 272 SrcIdx -= LHSWidth; 273 Src = SVI->getOperand(1); 274 } 275 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 276 return ExtractElementInst::Create(Src, 277 ConstantInt::get(Int32Ty, 278 SrcIdx, false)); 279 } 280 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 281 // Canonicalize extractelement(cast) -> cast(extractelement) 282 // bitcasts can change the number of vector elements and they cost nothing 283 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 284 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 285 EI.getIndexOperand()); 286 Worklist.AddValue(EE); 287 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 288 } 289 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 290 if (SI->hasOneUse()) { 291 // TODO: For a select on vectors, it might be useful to do this if it 292 // has multiple extractelement uses. For vector select, that seems to 293 // fight the vectorizer. 294 295 // If we are extracting an element from a vector select or a select on 296 // vectors, a select on the scalars extracted from the vector arguments. 297 Value *TrueVal = SI->getTrueValue(); 298 Value *FalseVal = SI->getFalseValue(); 299 300 Value *Cond = SI->getCondition(); 301 if (Cond->getType()->isVectorTy()) { 302 Cond = Builder->CreateExtractElement(Cond, 303 EI.getIndexOperand(), 304 Cond->getName() + ".elt"); 305 } 306 307 Value *V1Elem 308 = Builder->CreateExtractElement(TrueVal, 309 EI.getIndexOperand(), 310 TrueVal->getName() + ".elt"); 311 312 Value *V2Elem 313 = Builder->CreateExtractElement(FalseVal, 314 EI.getIndexOperand(), 315 FalseVal->getName() + ".elt"); 316 return SelectInst::Create(Cond, 317 V1Elem, 318 V2Elem, 319 SI->getName() + ".elt"); 320 } 321 } 322 } 323 return nullptr; 324 } 325 326 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 327 /// elements from either LHS or RHS, return the shuffle mask and true. 328 /// Otherwise, return false. 329 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 330 SmallVectorImpl<Constant*> &Mask) { 331 assert(LHS->getType() == RHS->getType() && 332 "Invalid CollectSingleShuffleElements"); 333 unsigned NumElts = V->getType()->getVectorNumElements(); 334 335 if (isa<UndefValue>(V)) { 336 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 337 return true; 338 } 339 340 if (V == LHS) { 341 for (unsigned i = 0; i != NumElts; ++i) 342 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 343 return true; 344 } 345 346 if (V == RHS) { 347 for (unsigned i = 0; i != NumElts; ++i) 348 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 349 i+NumElts)); 350 return true; 351 } 352 353 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 354 // If this is an insert of an extract from some other vector, include it. 355 Value *VecOp = IEI->getOperand(0); 356 Value *ScalarOp = IEI->getOperand(1); 357 Value *IdxOp = IEI->getOperand(2); 358 359 if (!isa<ConstantInt>(IdxOp)) 360 return false; 361 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 362 363 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 364 // Okay, we can handle this if the vector we are insertinting into is 365 // transitively ok. 366 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 367 // If so, update the mask to reflect the inserted undef. 368 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 369 return true; 370 } 371 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 372 if (isa<ConstantInt>(EI->getOperand(1))) { 373 unsigned ExtractedIdx = 374 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 375 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 376 377 // This must be extracting from either LHS or RHS. 378 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 379 // Okay, we can handle this if the vector we are insertinting into is 380 // transitively ok. 381 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 382 // If so, update the mask to reflect the inserted value. 383 if (EI->getOperand(0) == LHS) { 384 Mask[InsertedIdx % NumElts] = 385 ConstantInt::get(Type::getInt32Ty(V->getContext()), 386 ExtractedIdx); 387 } else { 388 assert(EI->getOperand(0) == RHS); 389 Mask[InsertedIdx % NumElts] = 390 ConstantInt::get(Type::getInt32Ty(V->getContext()), 391 ExtractedIdx + NumLHSElts); 392 } 393 return true; 394 } 395 } 396 } 397 } 398 } 399 400 return false; 401 } 402 403 404 /// We are building a shuffle to create V, which is a sequence of insertelement, 405 /// extractelement pairs. If PermittedRHS is set, then we must either use it or 406 /// not rely on the second vector source. Return an std::pair containing the 407 /// left and right vectors of the proposed shuffle (or 0), and set the Mask 408 /// parameter as required. 409 /// 410 /// Note: we intentionally don't try to fold earlier shuffles since they have 411 /// often been chosen carefully to be efficiently implementable on the target. 412 typedef std::pair<Value *, Value *> ShuffleOps; 413 414 static ShuffleOps CollectShuffleElements(Value *V, 415 SmallVectorImpl<Constant *> &Mask, 416 Value *PermittedRHS) { 417 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 418 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 419 420 if (isa<UndefValue>(V)) { 421 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 422 return std::make_pair( 423 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 424 } 425 426 if (isa<ConstantAggregateZero>(V)) { 427 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 428 return std::make_pair(V, nullptr); 429 } 430 431 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 432 // If this is an insert of an extract from some other vector, include it. 433 Value *VecOp = IEI->getOperand(0); 434 Value *ScalarOp = IEI->getOperand(1); 435 Value *IdxOp = IEI->getOperand(2); 436 437 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 438 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 439 unsigned ExtractedIdx = 440 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 441 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 442 443 // Either the extracted from or inserted into vector must be RHSVec, 444 // otherwise we'd end up with a shuffle of three inputs. 445 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { 446 Value *RHS = EI->getOperand(0); 447 ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); 448 assert(LR.second == nullptr || LR.second == RHS); 449 450 if (LR.first->getType() != RHS->getType()) { 451 // We tried our best, but we can't find anything compatible with RHS 452 // further up the chain. Return a trivial shuffle. 453 for (unsigned i = 0; i < NumElts; ++i) 454 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 455 return std::make_pair(V, nullptr); 456 } 457 458 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 459 Mask[InsertedIdx % NumElts] = 460 ConstantInt::get(Type::getInt32Ty(V->getContext()), 461 NumLHSElts+ExtractedIdx); 462 return std::make_pair(LR.first, RHS); 463 } 464 465 if (VecOp == PermittedRHS) { 466 // We've gone as far as we can: anything on the other side of the 467 // extractelement will already have been converted into a shuffle. 468 unsigned NumLHSElts = 469 EI->getOperand(0)->getType()->getVectorNumElements(); 470 for (unsigned i = 0; i != NumElts; ++i) 471 Mask.push_back(ConstantInt::get( 472 Type::getInt32Ty(V->getContext()), 473 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 474 return std::make_pair(EI->getOperand(0), PermittedRHS); 475 } 476 477 // If this insertelement is a chain that comes from exactly these two 478 // vectors, return the vector and the effective shuffle. 479 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 480 CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 481 Mask)) 482 return std::make_pair(EI->getOperand(0), PermittedRHS); 483 } 484 } 485 } 486 487 // Otherwise, can't do anything fancy. Return an identity vector. 488 for (unsigned i = 0; i != NumElts; ++i) 489 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 490 return std::make_pair(V, nullptr); 491 } 492 493 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 494 Value *VecOp = IE.getOperand(0); 495 Value *ScalarOp = IE.getOperand(1); 496 Value *IdxOp = IE.getOperand(2); 497 498 // Inserting an undef or into an undefined place, remove this. 499 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 500 ReplaceInstUsesWith(IE, VecOp); 501 502 // If the inserted element was extracted from some other vector, and if the 503 // indexes are constant, try to turn this into a shufflevector operation. 504 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 505 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 506 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 507 unsigned NumExtractVectorElts = 508 EI->getOperand(0)->getType()->getVectorNumElements(); 509 unsigned ExtractedIdx = 510 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 511 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 512 513 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 514 return ReplaceInstUsesWith(IE, VecOp); 515 516 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 517 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 518 519 // If we are extracting a value from a vector, then inserting it right 520 // back into the same place, just use the input vector. 521 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 522 return ReplaceInstUsesWith(IE, VecOp); 523 524 // If this insertelement isn't used by some other insertelement, turn it 525 // (and any insertelements it points to), into one big shuffle. 526 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 527 SmallVector<Constant*, 16> Mask; 528 ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr); 529 530 // The proposed shuffle may be trivial, in which case we shouldn't 531 // perform the combine. 532 if (LR.first != &IE && LR.second != &IE) { 533 // We now have a shuffle of LHS, RHS, Mask. 534 if (LR.second == nullptr) 535 LR.second = UndefValue::get(LR.first->getType()); 536 return new ShuffleVectorInst(LR.first, LR.second, 537 ConstantVector::get(Mask)); 538 } 539 } 540 } 541 } 542 543 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 544 APInt UndefElts(VWidth, 0); 545 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 546 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 547 if (V != &IE) 548 return ReplaceInstUsesWith(IE, V); 549 return &IE; 550 } 551 552 return nullptr; 553 } 554 555 /// Return true if we can evaluate the specified expression tree if the vector 556 /// elements were shuffled in a different order. 557 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 558 unsigned Depth = 5) { 559 // We can always reorder the elements of a constant. 560 if (isa<Constant>(V)) 561 return true; 562 563 // We won't reorder vector arguments. No IPO here. 564 Instruction *I = dyn_cast<Instruction>(V); 565 if (!I) return false; 566 567 // Two users may expect different orders of the elements. Don't try it. 568 if (!I->hasOneUse()) 569 return false; 570 571 if (Depth == 0) return false; 572 573 switch (I->getOpcode()) { 574 case Instruction::Add: 575 case Instruction::FAdd: 576 case Instruction::Sub: 577 case Instruction::FSub: 578 case Instruction::Mul: 579 case Instruction::FMul: 580 case Instruction::UDiv: 581 case Instruction::SDiv: 582 case Instruction::FDiv: 583 case Instruction::URem: 584 case Instruction::SRem: 585 case Instruction::FRem: 586 case Instruction::Shl: 587 case Instruction::LShr: 588 case Instruction::AShr: 589 case Instruction::And: 590 case Instruction::Or: 591 case Instruction::Xor: 592 case Instruction::ICmp: 593 case Instruction::FCmp: 594 case Instruction::Trunc: 595 case Instruction::ZExt: 596 case Instruction::SExt: 597 case Instruction::FPToUI: 598 case Instruction::FPToSI: 599 case Instruction::UIToFP: 600 case Instruction::SIToFP: 601 case Instruction::FPTrunc: 602 case Instruction::FPExt: 603 case Instruction::GetElementPtr: { 604 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 605 if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1)) 606 return false; 607 } 608 return true; 609 } 610 case Instruction::InsertElement: { 611 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 612 if (!CI) return false; 613 int ElementNumber = CI->getLimitedValue(); 614 615 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 616 // can't put an element into multiple indices. 617 bool SeenOnce = false; 618 for (int i = 0, e = Mask.size(); i != e; ++i) { 619 if (Mask[i] == ElementNumber) { 620 if (SeenOnce) 621 return false; 622 SeenOnce = true; 623 } 624 } 625 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 626 } 627 } 628 return false; 629 } 630 631 /// Rebuild a new instruction just like 'I' but with the new operands given. 632 /// In the event of type mismatch, the type of the operands is correct. 633 static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { 634 // We don't want to use the IRBuilder here because we want the replacement 635 // instructions to appear next to 'I', not the builder's insertion point. 636 switch (I->getOpcode()) { 637 case Instruction::Add: 638 case Instruction::FAdd: 639 case Instruction::Sub: 640 case Instruction::FSub: 641 case Instruction::Mul: 642 case Instruction::FMul: 643 case Instruction::UDiv: 644 case Instruction::SDiv: 645 case Instruction::FDiv: 646 case Instruction::URem: 647 case Instruction::SRem: 648 case Instruction::FRem: 649 case Instruction::Shl: 650 case Instruction::LShr: 651 case Instruction::AShr: 652 case Instruction::And: 653 case Instruction::Or: 654 case Instruction::Xor: { 655 BinaryOperator *BO = cast<BinaryOperator>(I); 656 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 657 BinaryOperator *New = 658 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 659 NewOps[0], NewOps[1], "", BO); 660 if (isa<OverflowingBinaryOperator>(BO)) { 661 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 662 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 663 } 664 if (isa<PossiblyExactOperator>(BO)) { 665 New->setIsExact(BO->isExact()); 666 } 667 if (isa<FPMathOperator>(BO)) 668 New->copyFastMathFlags(I); 669 return New; 670 } 671 case Instruction::ICmp: 672 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 673 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 674 NewOps[0], NewOps[1]); 675 case Instruction::FCmp: 676 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 677 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 678 NewOps[0], NewOps[1]); 679 case Instruction::Trunc: 680 case Instruction::ZExt: 681 case Instruction::SExt: 682 case Instruction::FPToUI: 683 case Instruction::FPToSI: 684 case Instruction::UIToFP: 685 case Instruction::SIToFP: 686 case Instruction::FPTrunc: 687 case Instruction::FPExt: { 688 // It's possible that the mask has a different number of elements from 689 // the original cast. We recompute the destination type to match the mask. 690 Type *DestTy = 691 VectorType::get(I->getType()->getScalarType(), 692 NewOps[0]->getType()->getVectorNumElements()); 693 assert(NewOps.size() == 1 && "cast with #ops != 1"); 694 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 695 "", I); 696 } 697 case Instruction::GetElementPtr: { 698 Value *Ptr = NewOps[0]; 699 ArrayRef<Value*> Idx = NewOps.slice(1); 700 GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); 701 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 702 return GEP; 703 } 704 } 705 llvm_unreachable("failed to rebuild vector instructions"); 706 } 707 708 Value * 709 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 710 // Mask.size() does not need to be equal to the number of vector elements. 711 712 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 713 if (isa<UndefValue>(V)) { 714 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 715 Mask.size())); 716 } 717 if (isa<ConstantAggregateZero>(V)) { 718 return ConstantAggregateZero::get( 719 VectorType::get(V->getType()->getScalarType(), 720 Mask.size())); 721 } 722 if (Constant *C = dyn_cast<Constant>(V)) { 723 SmallVector<Constant *, 16> MaskValues; 724 for (int i = 0, e = Mask.size(); i != e; ++i) { 725 if (Mask[i] == -1) 726 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 727 else 728 MaskValues.push_back(Builder->getInt32(Mask[i])); 729 } 730 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 731 ConstantVector::get(MaskValues)); 732 } 733 734 Instruction *I = cast<Instruction>(V); 735 switch (I->getOpcode()) { 736 case Instruction::Add: 737 case Instruction::FAdd: 738 case Instruction::Sub: 739 case Instruction::FSub: 740 case Instruction::Mul: 741 case Instruction::FMul: 742 case Instruction::UDiv: 743 case Instruction::SDiv: 744 case Instruction::FDiv: 745 case Instruction::URem: 746 case Instruction::SRem: 747 case Instruction::FRem: 748 case Instruction::Shl: 749 case Instruction::LShr: 750 case Instruction::AShr: 751 case Instruction::And: 752 case Instruction::Or: 753 case Instruction::Xor: 754 case Instruction::ICmp: 755 case Instruction::FCmp: 756 case Instruction::Trunc: 757 case Instruction::ZExt: 758 case Instruction::SExt: 759 case Instruction::FPToUI: 760 case Instruction::FPToSI: 761 case Instruction::UIToFP: 762 case Instruction::SIToFP: 763 case Instruction::FPTrunc: 764 case Instruction::FPExt: 765 case Instruction::Select: 766 case Instruction::GetElementPtr: { 767 SmallVector<Value*, 8> NewOps; 768 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 769 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 770 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 771 NewOps.push_back(V); 772 NeedsRebuild |= (V != I->getOperand(i)); 773 } 774 if (NeedsRebuild) { 775 return BuildNew(I, NewOps); 776 } 777 return I; 778 } 779 case Instruction::InsertElement: { 780 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 781 782 // The insertelement was inserting at Element. Figure out which element 783 // that becomes after shuffling. The answer is guaranteed to be unique 784 // by CanEvaluateShuffled. 785 bool Found = false; 786 int Index = 0; 787 for (int e = Mask.size(); Index != e; ++Index) { 788 if (Mask[Index] == Element) { 789 Found = true; 790 break; 791 } 792 } 793 794 // If element is not in Mask, no need to handle the operand 1 (element to 795 // be inserted). Just evaluate values in operand 0 according to Mask. 796 if (!Found) 797 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 798 799 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 800 return InsertElementInst::Create(V, I->getOperand(1), 801 Builder->getInt32(Index), "", I); 802 } 803 } 804 llvm_unreachable("failed to reorder elements of vector instruction!"); 805 } 806 807 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 808 Value *LHS = SVI.getOperand(0); 809 Value *RHS = SVI.getOperand(1); 810 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 811 812 bool MadeChange = false; 813 814 // Undefined shuffle mask -> undefined value. 815 if (isa<UndefValue>(SVI.getOperand(2))) 816 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 817 818 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 819 820 APInt UndefElts(VWidth, 0); 821 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 822 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 823 if (V != &SVI) 824 return ReplaceInstUsesWith(SVI, V); 825 LHS = SVI.getOperand(0); 826 RHS = SVI.getOperand(1); 827 MadeChange = true; 828 } 829 830 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 831 832 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 833 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 834 if (LHS == RHS || isa<UndefValue>(LHS)) { 835 if (isa<UndefValue>(LHS) && LHS == RHS) { 836 // shuffle(undef,undef,mask) -> undef. 837 Value *Result = (VWidth == LHSWidth) 838 ? LHS : UndefValue::get(SVI.getType()); 839 return ReplaceInstUsesWith(SVI, Result); 840 } 841 842 // Remap any references to RHS to use LHS. 843 SmallVector<Constant*, 16> Elts; 844 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 845 if (Mask[i] < 0) { 846 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 847 continue; 848 } 849 850 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 851 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 852 Mask[i] = -1; // Turn into undef. 853 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 854 } else { 855 Mask[i] = Mask[i] % e; // Force to LHS. 856 Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), 857 Mask[i])); 858 } 859 } 860 SVI.setOperand(0, SVI.getOperand(1)); 861 SVI.setOperand(1, UndefValue::get(RHS->getType())); 862 SVI.setOperand(2, ConstantVector::get(Elts)); 863 LHS = SVI.getOperand(0); 864 RHS = SVI.getOperand(1); 865 MadeChange = true; 866 } 867 868 if (VWidth == LHSWidth) { 869 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 870 bool isLHSID = true, isRHSID = true; 871 872 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 873 if (Mask[i] < 0) continue; // Ignore undef values. 874 // Is this an identity shuffle of the LHS value? 875 isLHSID &= (Mask[i] == (int)i); 876 877 // Is this an identity shuffle of the RHS value? 878 isRHSID &= (Mask[i]-e == i); 879 } 880 881 // Eliminate identity shuffles. 882 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 883 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 884 } 885 886 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 887 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 888 return ReplaceInstUsesWith(SVI, V); 889 } 890 891 // If the LHS is a shufflevector itself, see if we can combine it with this 892 // one without producing an unusual shuffle. 893 // Cases that might be simplified: 894 // 1. 895 // x1=shuffle(v1,v2,mask1) 896 // x=shuffle(x1,undef,mask) 897 // ==> 898 // x=shuffle(v1,undef,newMask) 899 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 900 // 2. 901 // x1=shuffle(v1,undef,mask1) 902 // x=shuffle(x1,x2,mask) 903 // where v1.size() == mask1.size() 904 // ==> 905 // x=shuffle(v1,x2,newMask) 906 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 907 // 3. 908 // x2=shuffle(v2,undef,mask2) 909 // x=shuffle(x1,x2,mask) 910 // where v2.size() == mask2.size() 911 // ==> 912 // x=shuffle(x1,v2,newMask) 913 // newMask[i] = (mask[i] < x1.size()) 914 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 915 // 4. 916 // x1=shuffle(v1,undef,mask1) 917 // x2=shuffle(v2,undef,mask2) 918 // x=shuffle(x1,x2,mask) 919 // where v1.size() == v2.size() 920 // ==> 921 // x=shuffle(v1,v2,newMask) 922 // newMask[i] = (mask[i] < x1.size()) 923 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 924 // 925 // Here we are really conservative: 926 // we are absolutely afraid of producing a shuffle mask not in the input 927 // program, because the code gen may not be smart enough to turn a merged 928 // shuffle into two specific shuffles: it may produce worse code. As such, 929 // we only merge two shuffles if the result is either a splat or one of the 930 // input shuffle masks. In this case, merging the shuffles just removes 931 // one instruction, which we know is safe. This is good for things like 932 // turning: (splat(splat)) -> splat, or 933 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 934 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 935 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 936 if (LHSShuffle) 937 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 938 LHSShuffle = nullptr; 939 if (RHSShuffle) 940 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 941 RHSShuffle = nullptr; 942 if (!LHSShuffle && !RHSShuffle) 943 return MadeChange ? &SVI : nullptr; 944 945 Value* LHSOp0 = nullptr; 946 Value* LHSOp1 = nullptr; 947 Value* RHSOp0 = nullptr; 948 unsigned LHSOp0Width = 0; 949 unsigned RHSOp0Width = 0; 950 if (LHSShuffle) { 951 LHSOp0 = LHSShuffle->getOperand(0); 952 LHSOp1 = LHSShuffle->getOperand(1); 953 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 954 } 955 if (RHSShuffle) { 956 RHSOp0 = RHSShuffle->getOperand(0); 957 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 958 } 959 Value* newLHS = LHS; 960 Value* newRHS = RHS; 961 if (LHSShuffle) { 962 // case 1 963 if (isa<UndefValue>(RHS)) { 964 newLHS = LHSOp0; 965 newRHS = LHSOp1; 966 } 967 // case 2 or 4 968 else if (LHSOp0Width == LHSWidth) { 969 newLHS = LHSOp0; 970 } 971 } 972 // case 3 or 4 973 if (RHSShuffle && RHSOp0Width == LHSWidth) { 974 newRHS = RHSOp0; 975 } 976 // case 4 977 if (LHSOp0 == RHSOp0) { 978 newLHS = LHSOp0; 979 newRHS = nullptr; 980 } 981 982 if (newLHS == LHS && newRHS == RHS) 983 return MadeChange ? &SVI : nullptr; 984 985 SmallVector<int, 16> LHSMask; 986 SmallVector<int, 16> RHSMask; 987 if (newLHS != LHS) 988 LHSMask = LHSShuffle->getShuffleMask(); 989 if (RHSShuffle && newRHS != RHS) 990 RHSMask = RHSShuffle->getShuffleMask(); 991 992 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 993 SmallVector<int, 16> newMask; 994 bool isSplat = true; 995 int SplatElt = -1; 996 // Create a new mask for the new ShuffleVectorInst so that the new 997 // ShuffleVectorInst is equivalent to the original one. 998 for (unsigned i = 0; i < VWidth; ++i) { 999 int eltMask; 1000 if (Mask[i] < 0) { 1001 // This element is an undef value. 1002 eltMask = -1; 1003 } else if (Mask[i] < (int)LHSWidth) { 1004 // This element is from left hand side vector operand. 1005 // 1006 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1007 // new mask value for the element. 1008 if (newLHS != LHS) { 1009 eltMask = LHSMask[Mask[i]]; 1010 // If the value selected is an undef value, explicitly specify it 1011 // with a -1 mask value. 1012 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1013 eltMask = -1; 1014 } else 1015 eltMask = Mask[i]; 1016 } else { 1017 // This element is from right hand side vector operand 1018 // 1019 // If the value selected is an undef value, explicitly specify it 1020 // with a -1 mask value. (case 1) 1021 if (isa<UndefValue>(RHS)) 1022 eltMask = -1; 1023 // If RHS is going to be replaced (case 3 or 4), calculate the 1024 // new mask value for the element. 1025 else if (newRHS != RHS) { 1026 eltMask = RHSMask[Mask[i]-LHSWidth]; 1027 // If the value selected is an undef value, explicitly specify it 1028 // with a -1 mask value. 1029 if (eltMask >= (int)RHSOp0Width) { 1030 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1031 && "should have been check above"); 1032 eltMask = -1; 1033 } 1034 } else 1035 eltMask = Mask[i]-LHSWidth; 1036 1037 // If LHS's width is changed, shift the mask value accordingly. 1038 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1039 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1040 // If newRHS == newLHS, we want to remap any references from newRHS to 1041 // newLHS so that we can properly identify splats that may occur due to 1042 // obfuscation across the two vectors. 1043 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) 1044 eltMask += newLHSWidth; 1045 } 1046 1047 // Check if this could still be a splat. 1048 if (eltMask >= 0) { 1049 if (SplatElt >= 0 && SplatElt != eltMask) 1050 isSplat = false; 1051 SplatElt = eltMask; 1052 } 1053 1054 newMask.push_back(eltMask); 1055 } 1056 1057 // If the result mask is equal to one of the original shuffle masks, 1058 // or is a splat, do the replacement. 1059 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1060 SmallVector<Constant*, 16> Elts; 1061 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 1062 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1063 if (newMask[i] < 0) { 1064 Elts.push_back(UndefValue::get(Int32Ty)); 1065 } else { 1066 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1067 } 1068 } 1069 if (!newRHS) 1070 newRHS = UndefValue::get(newLHS->getType()); 1071 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1072 } 1073 1074 return MadeChange ? &SVI : nullptr; 1075 } 1076