1 //===- InstCombineVectorOps.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements instcombine for ExtractElement, InsertElement and 11 // ShuffleVector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombine.h" 16 #include "llvm/IR/PatternMatch.h" 17 using namespace llvm; 18 using namespace PatternMatch; 19 20 #define DEBUG_TYPE "instcombine" 21 22 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it 23 /// is to leave as a vector operation. isConstant indicates whether we're 24 /// extracting one known element. If false we're extracting a variable index. 25 static bool CheapToScalarize(Value *V, bool isConstant) { 26 if (Constant *C = dyn_cast<Constant>(V)) { 27 if (isConstant) return true; 28 29 // If all elts are the same, we can extract it and use any of the values. 30 if (Constant *Op0 = C->getAggregateElement(0U)) { 31 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 32 ++i) 33 if (C->getAggregateElement(i) != Op0) 34 return false; 35 return true; 36 } 37 } 38 Instruction *I = dyn_cast<Instruction>(V); 39 if (!I) return false; 40 41 // Insert element gets simplified to the inserted element or is deleted if 42 // this is constant idx extract element and its a constant idx insertelt. 43 if (I->getOpcode() == Instruction::InsertElement && isConstant && 44 isa<ConstantInt>(I->getOperand(2))) 45 return true; 46 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 47 return true; 48 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 49 if (BO->hasOneUse() && 50 (CheapToScalarize(BO->getOperand(0), isConstant) || 51 CheapToScalarize(BO->getOperand(1), isConstant))) 52 return true; 53 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 54 if (CI->hasOneUse() && 55 (CheapToScalarize(CI->getOperand(0), isConstant) || 56 CheapToScalarize(CI->getOperand(1), isConstant))) 57 return true; 58 59 return false; 60 } 61 62 /// FindScalarElement - Given a vector and an element number, see if the scalar 63 /// value is already around as a register, for example if it were inserted then 64 /// extracted from the vector. 65 static Value *FindScalarElement(Value *V, unsigned EltNo) { 66 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); 67 VectorType *VTy = cast<VectorType>(V->getType()); 68 unsigned Width = VTy->getNumElements(); 69 if (EltNo >= Width) // Out of range access. 70 return UndefValue::get(VTy->getElementType()); 71 72 if (Constant *C = dyn_cast<Constant>(V)) 73 return C->getAggregateElement(EltNo); 74 75 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 76 // If this is an insert to a variable element, we don't know what it is. 77 if (!isa<ConstantInt>(III->getOperand(2))) 78 return 0; 79 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 80 81 // If this is an insert to the element we are looking for, return the 82 // inserted value. 83 if (EltNo == IIElt) 84 return III->getOperand(1); 85 86 // Otherwise, the insertelement doesn't modify the value, recurse on its 87 // vector input. 88 return FindScalarElement(III->getOperand(0), EltNo); 89 } 90 91 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 92 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); 93 int InEl = SVI->getMaskValue(EltNo); 94 if (InEl < 0) 95 return UndefValue::get(VTy->getElementType()); 96 if (InEl < (int)LHSWidth) 97 return FindScalarElement(SVI->getOperand(0), InEl); 98 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); 99 } 100 101 // Extract a value from a vector add operation with a constant zero. 102 Value *Val = 0; Constant *Con = 0; 103 if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { 104 if (Con->getAggregateElement(EltNo)->isNullValue()) 105 return FindScalarElement(Val, EltNo); 106 } 107 108 // Otherwise, we don't know. 109 return 0; 110 } 111 112 // If we have a PHI node with a vector type that has only 2 uses: feed 113 // itself and be an operand of extractelement at a constant location, 114 // try to replace the PHI of the vector type with a PHI of a scalar type. 115 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 116 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 117 if (!PN->hasNUses(2)) 118 return NULL; 119 120 // If so, it's known at this point that one operand is PHI and the other is 121 // an extractelement node. Find the PHI user that is not the extractelement 122 // node. 123 auto iu = PN->user_begin(); 124 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 125 if (PHIUser == cast<Instruction>(&EI)) 126 PHIUser = cast<Instruction>(*(++iu)); 127 128 // Verify that this PHI user has one use, which is the PHI itself, 129 // and that it is a binary operation which is cheap to scalarize. 130 // otherwise return NULL. 131 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 132 !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) 133 return NULL; 134 135 // Create a scalar PHI node that will replace the vector PHI node 136 // just before the current PHI node. 137 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 138 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 139 // Scalarize each PHI operand. 140 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 141 Value *PHIInVal = PN->getIncomingValue(i); 142 BasicBlock *inBB = PN->getIncomingBlock(i); 143 Value *Elt = EI.getIndexOperand(); 144 // If the operand is the PHI induction variable: 145 if (PHIInVal == PHIUser) { 146 // Scalarize the binary operation. Its first operand is the 147 // scalar PHI and the second operand is extracted from the other 148 // vector operand. 149 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 150 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 151 Value *Op = InsertNewInstWith( 152 ExtractElementInst::Create(B0->getOperand(opId), Elt, 153 B0->getOperand(opId)->getName() + ".Elt"), 154 *B0); 155 Value *newPHIUser = InsertNewInstWith( 156 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 157 scalarPHI->addIncoming(newPHIUser, inBB); 158 } else { 159 // Scalarize PHI input: 160 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 161 // Insert the new instruction into the predecessor basic block. 162 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 163 BasicBlock::iterator InsertPos; 164 if (pos && !isa<PHINode>(pos)) { 165 InsertPos = pos; 166 ++InsertPos; 167 } else { 168 InsertPos = inBB->getFirstInsertionPt(); 169 } 170 171 InsertNewInstWith(newEI, *InsertPos); 172 173 scalarPHI->addIncoming(newEI, inBB); 174 } 175 } 176 return ReplaceInstUsesWith(EI, scalarPHI); 177 } 178 179 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 180 // If vector val is constant with all elements the same, replace EI with 181 // that element. We handle a known element # below. 182 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 183 if (CheapToScalarize(C, false)) 184 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 185 186 // If extracting a specified index from the vector, see if we can recursively 187 // find a previously computed scalar that was inserted into the vector. 188 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 189 unsigned IndexVal = IdxC->getZExtValue(); 190 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 191 192 // If this is extracting an invalid index, turn this into undef, to avoid 193 // crashing the code below. 194 if (IndexVal >= VectorWidth) 195 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 196 197 // This instruction only demands the single element from the input vector. 198 // If the input vector has a single use, simplify it based on this use 199 // property. 200 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 201 APInt UndefElts(VectorWidth, 0); 202 APInt DemandedMask(VectorWidth, 0); 203 DemandedMask.setBit(IndexVal); 204 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 205 DemandedMask, UndefElts)) { 206 EI.setOperand(0, V); 207 return &EI; 208 } 209 } 210 211 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) 212 return ReplaceInstUsesWith(EI, Elt); 213 214 // If the this extractelement is directly using a bitcast from a vector of 215 // the same number of elements, see if we can find the source element from 216 // it. In this case, we will end up needing to bitcast the scalars. 217 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 218 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 219 if (VT->getNumElements() == VectorWidth) 220 if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) 221 return new BitCastInst(Elt, EI.getType()); 222 } 223 224 // If there's a vector PHI feeding a scalar use through this extractelement 225 // instruction, try to scalarize the PHI. 226 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 227 Instruction *scalarPHI = scalarizePHI(EI, PN); 228 if (scalarPHI) 229 return scalarPHI; 230 } 231 } 232 233 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 234 // Push extractelement into predecessor operation if legal and 235 // profitable to do so 236 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 237 if (I->hasOneUse() && 238 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 239 Value *newEI0 = 240 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 241 EI.getName()+".lhs"); 242 Value *newEI1 = 243 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 244 EI.getName()+".rhs"); 245 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 246 } 247 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 248 // Extracting the inserted element? 249 if (IE->getOperand(2) == EI.getOperand(1)) 250 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 251 // If the inserted and extracted elements are constants, they must not 252 // be the same value, extract from the pre-inserted value instead. 253 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 254 Worklist.AddValue(EI.getOperand(0)); 255 EI.setOperand(0, IE->getOperand(0)); 256 return &EI; 257 } 258 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 259 // If this is extracting an element from a shufflevector, figure out where 260 // it came from and extract from the appropriate input element instead. 261 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 262 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 263 Value *Src; 264 unsigned LHSWidth = 265 SVI->getOperand(0)->getType()->getVectorNumElements(); 266 267 if (SrcIdx < 0) 268 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 269 if (SrcIdx < (int)LHSWidth) 270 Src = SVI->getOperand(0); 271 else { 272 SrcIdx -= LHSWidth; 273 Src = SVI->getOperand(1); 274 } 275 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 276 return ExtractElementInst::Create(Src, 277 ConstantInt::get(Int32Ty, 278 SrcIdx, false)); 279 } 280 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 281 // Canonicalize extractelement(cast) -> cast(extractelement) 282 // bitcasts can change the number of vector elements and they cost nothing 283 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 284 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 285 EI.getIndexOperand()); 286 Worklist.AddValue(EE); 287 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 288 } 289 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 290 if (SI->hasOneUse()) { 291 // TODO: For a select on vectors, it might be useful to do this if it 292 // has multiple extractelement uses. For vector select, that seems to 293 // fight the vectorizer. 294 295 // If we are extracting an element from a vector select or a select on 296 // vectors, a select on the scalars extracted from the vector arguments. 297 Value *TrueVal = SI->getTrueValue(); 298 Value *FalseVal = SI->getFalseValue(); 299 300 Value *Cond = SI->getCondition(); 301 if (Cond->getType()->isVectorTy()) { 302 Cond = Builder->CreateExtractElement(Cond, 303 EI.getIndexOperand(), 304 Cond->getName() + ".elt"); 305 } 306 307 Value *V1Elem 308 = Builder->CreateExtractElement(TrueVal, 309 EI.getIndexOperand(), 310 TrueVal->getName() + ".elt"); 311 312 Value *V2Elem 313 = Builder->CreateExtractElement(FalseVal, 314 EI.getIndexOperand(), 315 FalseVal->getName() + ".elt"); 316 return SelectInst::Create(Cond, 317 V1Elem, 318 V2Elem, 319 SI->getName() + ".elt"); 320 } 321 } 322 } 323 return 0; 324 } 325 326 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 327 /// elements from either LHS or RHS, return the shuffle mask and true. 328 /// Otherwise, return false. 329 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 330 SmallVectorImpl<Constant*> &Mask) { 331 assert(LHS->getType() == RHS->getType() && 332 "Invalid CollectSingleShuffleElements"); 333 unsigned NumElts = V->getType()->getVectorNumElements(); 334 335 if (isa<UndefValue>(V)) { 336 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 337 return true; 338 } 339 340 if (V == LHS) { 341 for (unsigned i = 0; i != NumElts; ++i) 342 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 343 return true; 344 } 345 346 if (V == RHS) { 347 for (unsigned i = 0; i != NumElts; ++i) 348 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 349 i+NumElts)); 350 return true; 351 } 352 353 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 354 // If this is an insert of an extract from some other vector, include it. 355 Value *VecOp = IEI->getOperand(0); 356 Value *ScalarOp = IEI->getOperand(1); 357 Value *IdxOp = IEI->getOperand(2); 358 359 if (!isa<ConstantInt>(IdxOp)) 360 return false; 361 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 362 363 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 364 // Okay, we can handle this if the vector we are insertinting into is 365 // transitively ok. 366 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 367 // If so, update the mask to reflect the inserted undef. 368 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 369 return true; 370 } 371 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 372 if (isa<ConstantInt>(EI->getOperand(1))) { 373 unsigned ExtractedIdx = 374 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 375 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 376 377 // This must be extracting from either LHS or RHS. 378 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 379 // Okay, we can handle this if the vector we are insertinting into is 380 // transitively ok. 381 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 382 // If so, update the mask to reflect the inserted value. 383 if (EI->getOperand(0) == LHS) { 384 Mask[InsertedIdx % NumElts] = 385 ConstantInt::get(Type::getInt32Ty(V->getContext()), 386 ExtractedIdx); 387 } else { 388 assert(EI->getOperand(0) == RHS); 389 Mask[InsertedIdx % NumElts] = 390 ConstantInt::get(Type::getInt32Ty(V->getContext()), 391 ExtractedIdx + NumLHSElts); 392 } 393 return true; 394 } 395 } 396 } 397 } 398 } 399 400 return false; 401 } 402 403 404 /// We are building a shuffle to create V, which is a sequence of insertelement, 405 /// extractelement pairs. If PermittedRHS is set, then we must either use it or 406 /// not rely on the second vector source. Return an std::pair containing the 407 /// left and right vectors of the proposed shuffle (or 0), and set the Mask 408 /// parameter as required. 409 /// 410 /// Note: we intentionally don't try to fold earlier shuffles since they have 411 /// often been chosen carefully to be efficiently implementable on the target. 412 typedef std::pair<Value *, Value *> ShuffleOps; 413 414 static ShuffleOps CollectShuffleElements(Value *V, 415 SmallVectorImpl<Constant *> &Mask, 416 Value *PermittedRHS) { 417 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 418 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 419 420 if (isa<UndefValue>(V)) { 421 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 422 return std::make_pair( 423 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 424 } 425 426 if (isa<ConstantAggregateZero>(V)) { 427 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 428 return std::make_pair(V, nullptr); 429 } 430 431 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 432 // If this is an insert of an extract from some other vector, include it. 433 Value *VecOp = IEI->getOperand(0); 434 Value *ScalarOp = IEI->getOperand(1); 435 Value *IdxOp = IEI->getOperand(2); 436 437 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 438 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 439 unsigned ExtractedIdx = 440 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 441 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 442 443 // Either the extracted from or inserted into vector must be RHSVec, 444 // otherwise we'd end up with a shuffle of three inputs. 445 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { 446 Value *RHS = EI->getOperand(0); 447 ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); 448 assert(LR.second == 0 || LR.second == RHS); 449 450 if (LR.first->getType() != RHS->getType()) { 451 // We tried our best, but we can't find anything compatible with RHS 452 // further up the chain. Return a trivial shuffle. 453 for (unsigned i = 0; i < NumElts; ++i) 454 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 455 return std::make_pair(V, nullptr); 456 } 457 458 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 459 Mask[InsertedIdx % NumElts] = 460 ConstantInt::get(Type::getInt32Ty(V->getContext()), 461 NumLHSElts+ExtractedIdx); 462 return std::make_pair(LR.first, RHS); 463 } 464 465 if (VecOp == PermittedRHS) { 466 // We've gone as far as we can: anything on the other side of the 467 // extractelement will already have been converted into a shuffle. 468 unsigned NumLHSElts = 469 EI->getOperand(0)->getType()->getVectorNumElements(); 470 for (unsigned i = 0; i != NumElts; ++i) 471 Mask.push_back(ConstantInt::get( 472 Type::getInt32Ty(V->getContext()), 473 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 474 return std::make_pair(EI->getOperand(0), PermittedRHS); 475 } 476 477 // If this insertelement is a chain that comes from exactly these two 478 // vectors, return the vector and the effective shuffle. 479 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 480 CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 481 Mask)) 482 return std::make_pair(EI->getOperand(0), PermittedRHS); 483 } 484 } 485 } 486 487 // Otherwise, can't do anything fancy. Return an identity vector. 488 for (unsigned i = 0; i != NumElts; ++i) 489 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 490 return std::make_pair(V, nullptr); 491 } 492 493 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 494 Value *VecOp = IE.getOperand(0); 495 Value *ScalarOp = IE.getOperand(1); 496 Value *IdxOp = IE.getOperand(2); 497 498 // Inserting an undef or into an undefined place, remove this. 499 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 500 ReplaceInstUsesWith(IE, VecOp); 501 502 // If the inserted element was extracted from some other vector, and if the 503 // indexes are constant, try to turn this into a shufflevector operation. 504 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 505 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 506 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 507 unsigned NumExtractVectorElts = 508 EI->getOperand(0)->getType()->getVectorNumElements(); 509 unsigned ExtractedIdx = 510 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 511 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 512 513 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 514 return ReplaceInstUsesWith(IE, VecOp); 515 516 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 517 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 518 519 // If we are extracting a value from a vector, then inserting it right 520 // back into the same place, just use the input vector. 521 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 522 return ReplaceInstUsesWith(IE, VecOp); 523 524 // If this insertelement isn't used by some other insertelement, turn it 525 // (and any insertelements it points to), into one big shuffle. 526 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 527 SmallVector<Constant*, 16> Mask; 528 ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); 529 530 // The proposed shuffle may be trivial, in which case we shouldn't 531 // perform the combine. 532 if (LR.first != &IE && LR.second != &IE) { 533 // We now have a shuffle of LHS, RHS, Mask. 534 if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); 535 return new ShuffleVectorInst(LR.first, LR.second, 536 ConstantVector::get(Mask)); 537 } 538 } 539 } 540 } 541 542 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 543 APInt UndefElts(VWidth, 0); 544 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 545 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 546 if (V != &IE) 547 return ReplaceInstUsesWith(IE, V); 548 return &IE; 549 } 550 551 return 0; 552 } 553 554 /// Return true if we can evaluate the specified expression tree if the vector 555 /// elements were shuffled in a different order. 556 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 557 unsigned Depth = 5) { 558 // We can always reorder the elements of a constant. 559 if (isa<Constant>(V)) 560 return true; 561 562 // We won't reorder vector arguments. No IPO here. 563 Instruction *I = dyn_cast<Instruction>(V); 564 if (!I) return false; 565 566 // Two users may expect different orders of the elements. Don't try it. 567 if (!I->hasOneUse()) 568 return false; 569 570 if (Depth == 0) return false; 571 572 switch (I->getOpcode()) { 573 case Instruction::Add: 574 case Instruction::FAdd: 575 case Instruction::Sub: 576 case Instruction::FSub: 577 case Instruction::Mul: 578 case Instruction::FMul: 579 case Instruction::UDiv: 580 case Instruction::SDiv: 581 case Instruction::FDiv: 582 case Instruction::URem: 583 case Instruction::SRem: 584 case Instruction::FRem: 585 case Instruction::Shl: 586 case Instruction::LShr: 587 case Instruction::AShr: 588 case Instruction::And: 589 case Instruction::Or: 590 case Instruction::Xor: 591 case Instruction::ICmp: 592 case Instruction::FCmp: 593 case Instruction::Trunc: 594 case Instruction::ZExt: 595 case Instruction::SExt: 596 case Instruction::FPToUI: 597 case Instruction::FPToSI: 598 case Instruction::UIToFP: 599 case Instruction::SIToFP: 600 case Instruction::FPTrunc: 601 case Instruction::FPExt: 602 case Instruction::GetElementPtr: { 603 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 604 if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1)) 605 return false; 606 } 607 return true; 608 } 609 case Instruction::InsertElement: { 610 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 611 if (!CI) return false; 612 int ElementNumber = CI->getLimitedValue(); 613 614 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 615 // can't put an element into multiple indices. 616 bool SeenOnce = false; 617 for (int i = 0, e = Mask.size(); i != e; ++i) { 618 if (Mask[i] == ElementNumber) { 619 if (SeenOnce) 620 return false; 621 SeenOnce = true; 622 } 623 } 624 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 625 } 626 } 627 return false; 628 } 629 630 /// Rebuild a new instruction just like 'I' but with the new operands given. 631 /// In the event of type mismatch, the type of the operands is correct. 632 static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { 633 // We don't want to use the IRBuilder here because we want the replacement 634 // instructions to appear next to 'I', not the builder's insertion point. 635 switch (I->getOpcode()) { 636 case Instruction::Add: 637 case Instruction::FAdd: 638 case Instruction::Sub: 639 case Instruction::FSub: 640 case Instruction::Mul: 641 case Instruction::FMul: 642 case Instruction::UDiv: 643 case Instruction::SDiv: 644 case Instruction::FDiv: 645 case Instruction::URem: 646 case Instruction::SRem: 647 case Instruction::FRem: 648 case Instruction::Shl: 649 case Instruction::LShr: 650 case Instruction::AShr: 651 case Instruction::And: 652 case Instruction::Or: 653 case Instruction::Xor: { 654 BinaryOperator *BO = cast<BinaryOperator>(I); 655 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 656 BinaryOperator *New = 657 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 658 NewOps[0], NewOps[1], "", BO); 659 if (isa<OverflowingBinaryOperator>(BO)) { 660 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 661 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 662 } 663 if (isa<PossiblyExactOperator>(BO)) { 664 New->setIsExact(BO->isExact()); 665 } 666 if (isa<FPMathOperator>(BO)) 667 New->copyFastMathFlags(I); 668 return New; 669 } 670 case Instruction::ICmp: 671 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 672 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 673 NewOps[0], NewOps[1]); 674 case Instruction::FCmp: 675 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 676 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 677 NewOps[0], NewOps[1]); 678 case Instruction::Trunc: 679 case Instruction::ZExt: 680 case Instruction::SExt: 681 case Instruction::FPToUI: 682 case Instruction::FPToSI: 683 case Instruction::UIToFP: 684 case Instruction::SIToFP: 685 case Instruction::FPTrunc: 686 case Instruction::FPExt: { 687 // It's possible that the mask has a different number of elements from 688 // the original cast. We recompute the destination type to match the mask. 689 Type *DestTy = 690 VectorType::get(I->getType()->getScalarType(), 691 NewOps[0]->getType()->getVectorNumElements()); 692 assert(NewOps.size() == 1 && "cast with #ops != 1"); 693 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 694 "", I); 695 } 696 case Instruction::GetElementPtr: { 697 Value *Ptr = NewOps[0]; 698 ArrayRef<Value*> Idx = NewOps.slice(1); 699 GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); 700 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 701 return GEP; 702 } 703 } 704 llvm_unreachable("failed to rebuild vector instructions"); 705 } 706 707 Value * 708 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 709 // Mask.size() does not need to be equal to the number of vector elements. 710 711 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 712 if (isa<UndefValue>(V)) { 713 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 714 Mask.size())); 715 } 716 if (isa<ConstantAggregateZero>(V)) { 717 return ConstantAggregateZero::get( 718 VectorType::get(V->getType()->getScalarType(), 719 Mask.size())); 720 } 721 if (Constant *C = dyn_cast<Constant>(V)) { 722 SmallVector<Constant *, 16> MaskValues; 723 for (int i = 0, e = Mask.size(); i != e; ++i) { 724 if (Mask[i] == -1) 725 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 726 else 727 MaskValues.push_back(Builder->getInt32(Mask[i])); 728 } 729 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 730 ConstantVector::get(MaskValues)); 731 } 732 733 Instruction *I = cast<Instruction>(V); 734 switch (I->getOpcode()) { 735 case Instruction::Add: 736 case Instruction::FAdd: 737 case Instruction::Sub: 738 case Instruction::FSub: 739 case Instruction::Mul: 740 case Instruction::FMul: 741 case Instruction::UDiv: 742 case Instruction::SDiv: 743 case Instruction::FDiv: 744 case Instruction::URem: 745 case Instruction::SRem: 746 case Instruction::FRem: 747 case Instruction::Shl: 748 case Instruction::LShr: 749 case Instruction::AShr: 750 case Instruction::And: 751 case Instruction::Or: 752 case Instruction::Xor: 753 case Instruction::ICmp: 754 case Instruction::FCmp: 755 case Instruction::Trunc: 756 case Instruction::ZExt: 757 case Instruction::SExt: 758 case Instruction::FPToUI: 759 case Instruction::FPToSI: 760 case Instruction::UIToFP: 761 case Instruction::SIToFP: 762 case Instruction::FPTrunc: 763 case Instruction::FPExt: 764 case Instruction::Select: 765 case Instruction::GetElementPtr: { 766 SmallVector<Value*, 8> NewOps; 767 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 768 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 769 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 770 NewOps.push_back(V); 771 NeedsRebuild |= (V != I->getOperand(i)); 772 } 773 if (NeedsRebuild) { 774 return BuildNew(I, NewOps); 775 } 776 return I; 777 } 778 case Instruction::InsertElement: { 779 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 780 781 // The insertelement was inserting at Element. Figure out which element 782 // that becomes after shuffling. The answer is guaranteed to be unique 783 // by CanEvaluateShuffled. 784 bool Found = false; 785 int Index = 0; 786 for (int e = Mask.size(); Index != e; ++Index) { 787 if (Mask[Index] == Element) { 788 Found = true; 789 break; 790 } 791 } 792 793 // If element is not in Mask, no need to handle the operand 1 (element to 794 // be inserted). Just evaluate values in operand 0 according to Mask. 795 if (!Found) 796 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 797 798 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 799 return InsertElementInst::Create(V, I->getOperand(1), 800 Builder->getInt32(Index), "", I); 801 } 802 } 803 llvm_unreachable("failed to reorder elements of vector instruction!"); 804 } 805 806 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 807 Value *LHS = SVI.getOperand(0); 808 Value *RHS = SVI.getOperand(1); 809 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 810 811 bool MadeChange = false; 812 813 // Undefined shuffle mask -> undefined value. 814 if (isa<UndefValue>(SVI.getOperand(2))) 815 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 816 817 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 818 819 APInt UndefElts(VWidth, 0); 820 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 821 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 822 if (V != &SVI) 823 return ReplaceInstUsesWith(SVI, V); 824 LHS = SVI.getOperand(0); 825 RHS = SVI.getOperand(1); 826 MadeChange = true; 827 } 828 829 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 830 831 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 832 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 833 if (LHS == RHS || isa<UndefValue>(LHS)) { 834 if (isa<UndefValue>(LHS) && LHS == RHS) { 835 // shuffle(undef,undef,mask) -> undef. 836 Value *Result = (VWidth == LHSWidth) 837 ? LHS : UndefValue::get(SVI.getType()); 838 return ReplaceInstUsesWith(SVI, Result); 839 } 840 841 // Remap any references to RHS to use LHS. 842 SmallVector<Constant*, 16> Elts; 843 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 844 if (Mask[i] < 0) { 845 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 846 continue; 847 } 848 849 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 850 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 851 Mask[i] = -1; // Turn into undef. 852 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 853 } else { 854 Mask[i] = Mask[i] % e; // Force to LHS. 855 Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), 856 Mask[i])); 857 } 858 } 859 SVI.setOperand(0, SVI.getOperand(1)); 860 SVI.setOperand(1, UndefValue::get(RHS->getType())); 861 SVI.setOperand(2, ConstantVector::get(Elts)); 862 LHS = SVI.getOperand(0); 863 RHS = SVI.getOperand(1); 864 MadeChange = true; 865 } 866 867 if (VWidth == LHSWidth) { 868 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 869 bool isLHSID = true, isRHSID = true; 870 871 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 872 if (Mask[i] < 0) continue; // Ignore undef values. 873 // Is this an identity shuffle of the LHS value? 874 isLHSID &= (Mask[i] == (int)i); 875 876 // Is this an identity shuffle of the RHS value? 877 isRHSID &= (Mask[i]-e == i); 878 } 879 880 // Eliminate identity shuffles. 881 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 882 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 883 } 884 885 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 886 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 887 return ReplaceInstUsesWith(SVI, V); 888 } 889 890 // If the LHS is a shufflevector itself, see if we can combine it with this 891 // one without producing an unusual shuffle. 892 // Cases that might be simplified: 893 // 1. 894 // x1=shuffle(v1,v2,mask1) 895 // x=shuffle(x1,undef,mask) 896 // ==> 897 // x=shuffle(v1,undef,newMask) 898 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 899 // 2. 900 // x1=shuffle(v1,undef,mask1) 901 // x=shuffle(x1,x2,mask) 902 // where v1.size() == mask1.size() 903 // ==> 904 // x=shuffle(v1,x2,newMask) 905 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 906 // 3. 907 // x2=shuffle(v2,undef,mask2) 908 // x=shuffle(x1,x2,mask) 909 // where v2.size() == mask2.size() 910 // ==> 911 // x=shuffle(x1,v2,newMask) 912 // newMask[i] = (mask[i] < x1.size()) 913 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 914 // 4. 915 // x1=shuffle(v1,undef,mask1) 916 // x2=shuffle(v2,undef,mask2) 917 // x=shuffle(x1,x2,mask) 918 // where v1.size() == v2.size() 919 // ==> 920 // x=shuffle(v1,v2,newMask) 921 // newMask[i] = (mask[i] < x1.size()) 922 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 923 // 924 // Here we are really conservative: 925 // we are absolutely afraid of producing a shuffle mask not in the input 926 // program, because the code gen may not be smart enough to turn a merged 927 // shuffle into two specific shuffles: it may produce worse code. As such, 928 // we only merge two shuffles if the result is either a splat or one of the 929 // input shuffle masks. In this case, merging the shuffles just removes 930 // one instruction, which we know is safe. This is good for things like 931 // turning: (splat(splat)) -> splat, or 932 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 933 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 934 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 935 if (LHSShuffle) 936 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 937 LHSShuffle = NULL; 938 if (RHSShuffle) 939 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 940 RHSShuffle = NULL; 941 if (!LHSShuffle && !RHSShuffle) 942 return MadeChange ? &SVI : 0; 943 944 Value* LHSOp0 = NULL; 945 Value* LHSOp1 = NULL; 946 Value* RHSOp0 = NULL; 947 unsigned LHSOp0Width = 0; 948 unsigned RHSOp0Width = 0; 949 if (LHSShuffle) { 950 LHSOp0 = LHSShuffle->getOperand(0); 951 LHSOp1 = LHSShuffle->getOperand(1); 952 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 953 } 954 if (RHSShuffle) { 955 RHSOp0 = RHSShuffle->getOperand(0); 956 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 957 } 958 Value* newLHS = LHS; 959 Value* newRHS = RHS; 960 if (LHSShuffle) { 961 // case 1 962 if (isa<UndefValue>(RHS)) { 963 newLHS = LHSOp0; 964 newRHS = LHSOp1; 965 } 966 // case 2 or 4 967 else if (LHSOp0Width == LHSWidth) { 968 newLHS = LHSOp0; 969 } 970 } 971 // case 3 or 4 972 if (RHSShuffle && RHSOp0Width == LHSWidth) { 973 newRHS = RHSOp0; 974 } 975 // case 4 976 if (LHSOp0 == RHSOp0) { 977 newLHS = LHSOp0; 978 newRHS = NULL; 979 } 980 981 if (newLHS == LHS && newRHS == RHS) 982 return MadeChange ? &SVI : 0; 983 984 SmallVector<int, 16> LHSMask; 985 SmallVector<int, 16> RHSMask; 986 if (newLHS != LHS) 987 LHSMask = LHSShuffle->getShuffleMask(); 988 if (RHSShuffle && newRHS != RHS) 989 RHSMask = RHSShuffle->getShuffleMask(); 990 991 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 992 SmallVector<int, 16> newMask; 993 bool isSplat = true; 994 int SplatElt = -1; 995 // Create a new mask for the new ShuffleVectorInst so that the new 996 // ShuffleVectorInst is equivalent to the original one. 997 for (unsigned i = 0; i < VWidth; ++i) { 998 int eltMask; 999 if (Mask[i] < 0) { 1000 // This element is an undef value. 1001 eltMask = -1; 1002 } else if (Mask[i] < (int)LHSWidth) { 1003 // This element is from left hand side vector operand. 1004 // 1005 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1006 // new mask value for the element. 1007 if (newLHS != LHS) { 1008 eltMask = LHSMask[Mask[i]]; 1009 // If the value selected is an undef value, explicitly specify it 1010 // with a -1 mask value. 1011 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1012 eltMask = -1; 1013 } else 1014 eltMask = Mask[i]; 1015 } else { 1016 // This element is from right hand side vector operand 1017 // 1018 // If the value selected is an undef value, explicitly specify it 1019 // with a -1 mask value. (case 1) 1020 if (isa<UndefValue>(RHS)) 1021 eltMask = -1; 1022 // If RHS is going to be replaced (case 3 or 4), calculate the 1023 // new mask value for the element. 1024 else if (newRHS != RHS) { 1025 eltMask = RHSMask[Mask[i]-LHSWidth]; 1026 // If the value selected is an undef value, explicitly specify it 1027 // with a -1 mask value. 1028 if (eltMask >= (int)RHSOp0Width) { 1029 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1030 && "should have been check above"); 1031 eltMask = -1; 1032 } 1033 } else 1034 eltMask = Mask[i]-LHSWidth; 1035 1036 // If LHS's width is changed, shift the mask value accordingly. 1037 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1038 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1039 // If newRHS == newLHS, we want to remap any references from newRHS to 1040 // newLHS so that we can properly identify splats that may occur due to 1041 // obfuscation across the two vectors. 1042 if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) 1043 eltMask += newLHSWidth; 1044 } 1045 1046 // Check if this could still be a splat. 1047 if (eltMask >= 0) { 1048 if (SplatElt >= 0 && SplatElt != eltMask) 1049 isSplat = false; 1050 SplatElt = eltMask; 1051 } 1052 1053 newMask.push_back(eltMask); 1054 } 1055 1056 // If the result mask is equal to one of the original shuffle masks, 1057 // or is a splat, do the replacement. 1058 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1059 SmallVector<Constant*, 16> Elts; 1060 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 1061 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1062 if (newMask[i] < 0) { 1063 Elts.push_back(UndefValue::get(Int32Ty)); 1064 } else { 1065 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1066 } 1067 } 1068 if (newRHS == NULL) 1069 newRHS = UndefValue::get(newLHS->getType()); 1070 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1071 } 1072 1073 return MadeChange ? &SVI : 0; 1074 } 1075