1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains logic for simplifying instructions based on information 11 // about how they are used. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/IntrinsicInst.h" 18 #include "llvm/IR/PatternMatch.h" 19 #include "llvm/Support/KnownBits.h" 20 21 using namespace llvm; 22 using namespace llvm::PatternMatch; 23 24 #define DEBUG_TYPE "instcombine" 25 26 namespace { 27 28 struct AMDGPUImageDMaskIntrinsic { 29 unsigned Intr; 30 }; 31 32 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL 33 #include "InstCombineTables.inc" 34 35 } // end anonymous namespace 36 37 /// Check to see if the specified operand of the specified instruction is a 38 /// constant integer. If so, check to see if there are any bits set in the 39 /// constant that are not demanded. If so, shrink the constant and return true. 40 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 41 const APInt &Demanded) { 42 assert(I && "No instruction?"); 43 assert(OpNo < I->getNumOperands() && "Operand index too large"); 44 45 // The operand must be a constant integer or splat integer. 46 Value *Op = I->getOperand(OpNo); 47 const APInt *C; 48 if (!match(Op, m_APInt(C))) 49 return false; 50 51 // If there are no bits set that aren't demanded, nothing to do. 52 if (C->isSubsetOf(Demanded)) 53 return false; 54 55 // This instruction is producing bits that are not demanded. Shrink the RHS. 56 I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); 57 58 return true; 59 } 60 61 62 63 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if 64 /// the instruction has any properties that allow us to simplify its operands. 65 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { 66 unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); 67 KnownBits Known(BitWidth); 68 APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); 69 70 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 71 0, &Inst); 72 if (!V) return false; 73 if (V == &Inst) return true; 74 replaceInstUsesWith(Inst, V); 75 return true; 76 } 77 78 /// This form of SimplifyDemandedBits simplifies the specified instruction 79 /// operand if possible, updating it in place. It returns true if it made any 80 /// change and false otherwise. 81 bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, 82 const APInt &DemandedMask, 83 KnownBits &Known, 84 unsigned Depth) { 85 Use &U = I->getOperandUse(OpNo); 86 Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, 87 Depth, I); 88 if (!NewVal) return false; 89 U = NewVal; 90 return true; 91 } 92 93 94 /// This function attempts to replace V with a simpler value based on the 95 /// demanded bits. When this function is called, it is known that only the bits 96 /// set in DemandedMask of the result of V are ever used downstream. 97 /// Consequently, depending on the mask and V, it may be possible to replace V 98 /// with a constant or one of its operands. In such cases, this function does 99 /// the replacement and returns true. In all other cases, it returns false after 100 /// analyzing the expression and setting KnownOne and known to be one in the 101 /// expression. Known.Zero contains all the bits that are known to be zero in 102 /// the expression. These are provided to potentially allow the caller (which 103 /// might recursively be SimplifyDemandedBits itself) to simplify the 104 /// expression. 105 /// Known.One and Known.Zero always follow the invariant that: 106 /// Known.One & Known.Zero == 0. 107 /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and 108 /// Known.Zero may only be accurate for those bits set in DemandedMask. Note 109 /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all 110 /// be the same. 111 /// 112 /// This returns null if it did not change anything and it permits no 113 /// simplification. This returns V itself if it did some simplification of V's 114 /// operands based on the information about what bits are demanded. This returns 115 /// some other non-null value if it found out that V is equal to another value 116 /// in the context where the specified bits are demanded, but not for all users. 117 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 118 KnownBits &Known, unsigned Depth, 119 Instruction *CxtI) { 120 assert(V != nullptr && "Null pointer of Value???"); 121 assert(Depth <= 6 && "Limit Search Depth"); 122 uint32_t BitWidth = DemandedMask.getBitWidth(); 123 Type *VTy = V->getType(); 124 assert( 125 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && 126 Known.getBitWidth() == BitWidth && 127 "Value *V, DemandedMask and Known must have same BitWidth"); 128 129 if (isa<Constant>(V)) { 130 computeKnownBits(V, Known, Depth, CxtI); 131 return nullptr; 132 } 133 134 Known.resetAll(); 135 if (DemandedMask.isNullValue()) // Not demanding any bits from V. 136 return UndefValue::get(VTy); 137 138 if (Depth == 6) // Limit search depth. 139 return nullptr; 140 141 Instruction *I = dyn_cast<Instruction>(V); 142 if (!I) { 143 computeKnownBits(V, Known, Depth, CxtI); 144 return nullptr; // Only analyze instructions. 145 } 146 147 // If there are multiple uses of this value and we aren't at the root, then 148 // we can't do any simplifications of the operands, because DemandedMask 149 // only reflects the bits demanded by *one* of the users. 150 if (Depth != 0 && !I->hasOneUse()) 151 return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); 152 153 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); 154 155 // If this is the root being simplified, allow it to have multiple uses, 156 // just set the DemandedMask to all bits so that we can try to simplify the 157 // operands. This allows visitTruncInst (for example) to simplify the 158 // operand of a trunc without duplicating all the logic below. 159 if (Depth == 0 && !V->hasOneUse()) 160 DemandedMask.setAllBits(); 161 162 switch (I->getOpcode()) { 163 default: 164 computeKnownBits(I, Known, Depth, CxtI); 165 break; 166 case Instruction::And: { 167 // If either the LHS or the RHS are Zero, the result is zero. 168 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 169 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, 170 Depth + 1)) 171 return I; 172 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 173 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 174 175 // Output known-0 are known to be clear if zero in either the LHS | RHS. 176 APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; 177 // Output known-1 bits are only known if set in both the LHS & RHS. 178 APInt IKnownOne = RHSKnown.One & LHSKnown.One; 179 180 // If the client is only demanding bits that we know, return the known 181 // constant. 182 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 183 return Constant::getIntegerValue(VTy, IKnownOne); 184 185 // If all of the demanded bits are known 1 on one side, return the other. 186 // These bits cannot contribute to the result of the 'and'. 187 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) 188 return I->getOperand(0); 189 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) 190 return I->getOperand(1); 191 192 // If the RHS is a constant, see if we can simplify it. 193 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) 194 return I; 195 196 Known.Zero = std::move(IKnownZero); 197 Known.One = std::move(IKnownOne); 198 break; 199 } 200 case Instruction::Or: { 201 // If either the LHS or the RHS are One, the result is One. 202 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 203 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, 204 Depth + 1)) 205 return I; 206 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 207 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 208 209 // Output known-0 bits are only known if clear in both the LHS & RHS. 210 APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; 211 // Output known-1 are known. to be set if s.et in either the LHS | RHS. 212 APInt IKnownOne = RHSKnown.One | LHSKnown.One; 213 214 // If the client is only demanding bits that we know, return the known 215 // constant. 216 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 217 return Constant::getIntegerValue(VTy, IKnownOne); 218 219 // If all of the demanded bits are known zero on one side, return the other. 220 // These bits cannot contribute to the result of the 'or'. 221 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) 222 return I->getOperand(0); 223 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) 224 return I->getOperand(1); 225 226 // If the RHS is a constant, see if we can simplify it. 227 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 228 return I; 229 230 Known.Zero = std::move(IKnownZero); 231 Known.One = std::move(IKnownOne); 232 break; 233 } 234 case Instruction::Xor: { 235 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 236 SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) 237 return I; 238 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 239 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 240 241 // Output known-0 bits are known if clear or set in both the LHS & RHS. 242 APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | 243 (RHSKnown.One & LHSKnown.One); 244 // Output known-1 are known to be set if set in only one of the LHS, RHS. 245 APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | 246 (RHSKnown.One & LHSKnown.Zero); 247 248 // If the client is only demanding bits that we know, return the known 249 // constant. 250 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 251 return Constant::getIntegerValue(VTy, IKnownOne); 252 253 // If all of the demanded bits are known zero on one side, return the other. 254 // These bits cannot contribute to the result of the 'xor'. 255 if (DemandedMask.isSubsetOf(RHSKnown.Zero)) 256 return I->getOperand(0); 257 if (DemandedMask.isSubsetOf(LHSKnown.Zero)) 258 return I->getOperand(1); 259 260 // If all of the demanded bits are known to be zero on one side or the 261 // other, turn this into an *inclusive* or. 262 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 263 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) { 264 Instruction *Or = 265 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 266 I->getName()); 267 return InsertNewInstWith(Or, *I); 268 } 269 270 // If all of the demanded bits on one side are known, and all of the set 271 // bits on that side are also known to be set on the other side, turn this 272 // into an AND, as we know the bits will be cleared. 273 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 274 if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) && 275 RHSKnown.One.isSubsetOf(LHSKnown.One)) { 276 Constant *AndC = Constant::getIntegerValue(VTy, 277 ~RHSKnown.One & DemandedMask); 278 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 279 return InsertNewInstWith(And, *I); 280 } 281 282 // If the RHS is a constant, see if we can simplify it. 283 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 284 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 285 return I; 286 287 // If our LHS is an 'and' and if it has one use, and if any of the bits we 288 // are flipping are known to be set, then the xor is just resetting those 289 // bits to zero. We can just knock out bits from the 'and' and the 'xor', 290 // simplifying both of them. 291 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) 292 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && 293 isa<ConstantInt>(I->getOperand(1)) && 294 isa<ConstantInt>(LHSInst->getOperand(1)) && 295 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { 296 ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); 297 ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); 298 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); 299 300 Constant *AndC = 301 ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); 302 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 303 InsertNewInstWith(NewAnd, *I); 304 305 Constant *XorC = 306 ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); 307 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); 308 return InsertNewInstWith(NewXor, *I); 309 } 310 311 // Output known-0 bits are known if clear or set in both the LHS & RHS. 312 Known.Zero = std::move(IKnownZero); 313 // Output known-1 are known to be set if set in only one of the LHS, RHS. 314 Known.One = std::move(IKnownOne); 315 break; 316 } 317 case Instruction::Select: { 318 Value *LHS, *RHS; 319 SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; 320 if (SPF == SPF_UMAX) { 321 // UMax(A, C) == A if ... 322 // The lowest non-zero bit of DemandMask is higher than the highest 323 // non-zero bit of C. 324 const APInt *C; 325 unsigned CTZ = DemandedMask.countTrailingZeros(); 326 if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits()) 327 return LHS; 328 } else if (SPF == SPF_UMIN) { 329 // UMin(A, C) == A if ... 330 // The lowest non-zero bit of DemandMask is higher than the highest 331 // non-one bit of C. 332 // This comes from using DeMorgans on the above umax example. 333 const APInt *C; 334 unsigned CTZ = DemandedMask.countTrailingZeros(); 335 if (match(RHS, m_APInt(C)) && 336 CTZ >= C->getBitWidth() - C->countLeadingOnes()) 337 return LHS; 338 } 339 340 // If this is a select as part of any other min/max pattern, don't simplify 341 // any further in case we break the structure. 342 if (SPF != SPF_UNKNOWN) 343 return nullptr; 344 345 if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || 346 SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) 347 return I; 348 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 349 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 350 351 // If the operands are constants, see if we can simplify them. 352 if (ShrinkDemandedConstant(I, 1, DemandedMask) || 353 ShrinkDemandedConstant(I, 2, DemandedMask)) 354 return I; 355 356 // Only known if known in both the LHS and RHS. 357 Known.One = RHSKnown.One & LHSKnown.One; 358 Known.Zero = RHSKnown.Zero & LHSKnown.Zero; 359 break; 360 } 361 case Instruction::ZExt: 362 case Instruction::Trunc: { 363 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 364 365 APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth); 366 KnownBits InputKnown(SrcBitWidth); 367 if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) 368 return I; 369 Known = InputKnown.zextOrTrunc(BitWidth); 370 // Any top bits are known to be zero. 371 if (BitWidth > SrcBitWidth) 372 Known.Zero.setBitsFrom(SrcBitWidth); 373 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 374 break; 375 } 376 case Instruction::BitCast: 377 if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) 378 return nullptr; // vector->int or fp->int? 379 380 if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { 381 if (VectorType *SrcVTy = 382 dyn_cast<VectorType>(I->getOperand(0)->getType())) { 383 if (DstVTy->getNumElements() != SrcVTy->getNumElements()) 384 // Don't touch a bitcast between vectors of different element counts. 385 return nullptr; 386 } else 387 // Don't touch a scalar-to-vector bitcast. 388 return nullptr; 389 } else if (I->getOperand(0)->getType()->isVectorTy()) 390 // Don't touch a vector-to-scalar bitcast. 391 return nullptr; 392 393 if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) 394 return I; 395 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 396 break; 397 case Instruction::SExt: { 398 // Compute the bits in the result that are not present in the input. 399 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 400 401 APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth); 402 403 // If any of the sign extended bits are demanded, we know that the sign 404 // bit is demanded. 405 if (DemandedMask.getActiveBits() > SrcBitWidth) 406 InputDemandedBits.setBit(SrcBitWidth-1); 407 408 KnownBits InputKnown(SrcBitWidth); 409 if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1)) 410 return I; 411 412 // If the input sign bit is known zero, or if the NewBits are not demanded 413 // convert this into a zero extension. 414 if (InputKnown.isNonNegative() || 415 DemandedMask.getActiveBits() <= SrcBitWidth) { 416 // Convert to ZExt cast. 417 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); 418 return InsertNewInstWith(NewCast, *I); 419 } 420 421 // If the sign bit of the input is known set or clear, then we know the 422 // top bits of the result. 423 Known = InputKnown.sext(BitWidth); 424 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 425 break; 426 } 427 case Instruction::Add: 428 case Instruction::Sub: { 429 /// If the high-bits of an ADD/SUB are not demanded, then we do not care 430 /// about the high bits of the operands. 431 unsigned NLZ = DemandedMask.countLeadingZeros(); 432 // Right fill the mask of bits for this ADD/SUB to demand the most 433 // significant bit and all those below it. 434 APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); 435 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || 436 SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || 437 ShrinkDemandedConstant(I, 1, DemandedFromOps) || 438 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { 439 if (NLZ > 0) { 440 // Disable the nsw and nuw flags here: We can no longer guarantee that 441 // we won't wrap after simplification. Removing the nsw/nuw flags is 442 // legal here because the top bit is not demanded. 443 BinaryOperator &BinOP = *cast<BinaryOperator>(I); 444 BinOP.setHasNoSignedWrap(false); 445 BinOP.setHasNoUnsignedWrap(false); 446 } 447 return I; 448 } 449 450 // If we are known to be adding/subtracting zeros to every bit below 451 // the highest demanded bit, we just return the other side. 452 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) 453 return I->getOperand(0); 454 // We can't do this with the LHS for subtraction, unless we are only 455 // demanding the LSB. 456 if ((I->getOpcode() == Instruction::Add || 457 DemandedFromOps.isOneValue()) && 458 DemandedFromOps.isSubsetOf(LHSKnown.Zero)) 459 return I->getOperand(1); 460 461 // Otherwise just compute the known bits of the result. 462 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); 463 Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add, 464 NSW, LHSKnown, RHSKnown); 465 break; 466 } 467 case Instruction::Shl: { 468 const APInt *SA; 469 if (match(I->getOperand(1), m_APInt(SA))) { 470 const APInt *ShrAmt; 471 if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) 472 if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0))) 473 if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA, 474 DemandedMask, Known)) 475 return R; 476 477 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 478 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); 479 480 // If the shift is NUW/NSW, then it does demand the high bits. 481 ShlOperator *IOp = cast<ShlOperator>(I); 482 if (IOp->hasNoSignedWrap()) 483 DemandedMaskIn.setHighBits(ShiftAmt+1); 484 else if (IOp->hasNoUnsignedWrap()) 485 DemandedMaskIn.setHighBits(ShiftAmt); 486 487 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 488 return I; 489 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 490 Known.Zero <<= ShiftAmt; 491 Known.One <<= ShiftAmt; 492 // low bits known zero. 493 if (ShiftAmt) 494 Known.Zero.setLowBits(ShiftAmt); 495 } 496 break; 497 } 498 case Instruction::LShr: { 499 const APInt *SA; 500 if (match(I->getOperand(1), m_APInt(SA))) { 501 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 502 503 // Unsigned shift right. 504 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 505 506 // If the shift is exact, then it does demand the low bits (and knows that 507 // they are zero). 508 if (cast<LShrOperator>(I)->isExact()) 509 DemandedMaskIn.setLowBits(ShiftAmt); 510 511 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 512 return I; 513 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 514 Known.Zero.lshrInPlace(ShiftAmt); 515 Known.One.lshrInPlace(ShiftAmt); 516 if (ShiftAmt) 517 Known.Zero.setHighBits(ShiftAmt); // high bits known zero. 518 } 519 break; 520 } 521 case Instruction::AShr: { 522 // If this is an arithmetic shift right and only the low-bit is set, we can 523 // always convert this into a logical shr, even if the shift amount is 524 // variable. The low bit of the shift cannot be an input sign bit unless 525 // the shift amount is >= the size of the datatype, which is undefined. 526 if (DemandedMask.isOneValue()) { 527 // Perform the logical shift right. 528 Instruction *NewVal = BinaryOperator::CreateLShr( 529 I->getOperand(0), I->getOperand(1), I->getName()); 530 return InsertNewInstWith(NewVal, *I); 531 } 532 533 // If the sign bit is the only bit demanded by this ashr, then there is no 534 // need to do it, the shift doesn't change the high bit. 535 if (DemandedMask.isSignMask()) 536 return I->getOperand(0); 537 538 const APInt *SA; 539 if (match(I->getOperand(1), m_APInt(SA))) { 540 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 541 542 // Signed shift right. 543 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 544 // If any of the high bits are demanded, we should set the sign bit as 545 // demanded. 546 if (DemandedMask.countLeadingZeros() <= ShiftAmt) 547 DemandedMaskIn.setSignBit(); 548 549 // If the shift is exact, then it does demand the low bits (and knows that 550 // they are zero). 551 if (cast<AShrOperator>(I)->isExact()) 552 DemandedMaskIn.setLowBits(ShiftAmt); 553 554 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 555 return I; 556 557 unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); 558 559 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 560 // Compute the new bits that are at the top now plus sign bits. 561 APInt HighBits(APInt::getHighBitsSet( 562 BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth))); 563 Known.Zero.lshrInPlace(ShiftAmt); 564 Known.One.lshrInPlace(ShiftAmt); 565 566 // If the input sign bit is known to be zero, or if none of the top bits 567 // are demanded, turn this into an unsigned shift right. 568 assert(BitWidth > ShiftAmt && "Shift amount not saturated?"); 569 if (Known.Zero[BitWidth-ShiftAmt-1] || 570 !DemandedMask.intersects(HighBits)) { 571 BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), 572 I->getOperand(1)); 573 LShr->setIsExact(cast<BinaryOperator>(I)->isExact()); 574 return InsertNewInstWith(LShr, *I); 575 } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. 576 Known.One |= HighBits; 577 } 578 } 579 break; 580 } 581 case Instruction::UDiv: { 582 // UDiv doesn't demand low bits that are zero in the divisor. 583 const APInt *SA; 584 if (match(I->getOperand(1), m_APInt(SA))) { 585 // If the shift is exact, then it does demand the low bits. 586 if (cast<UDivOperator>(I)->isExact()) 587 break; 588 589 // FIXME: Take the demanded mask of the result into account. 590 unsigned RHSTrailingZeros = SA->countTrailingZeros(); 591 APInt DemandedMaskIn = 592 APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros); 593 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1)) 594 return I; 595 596 // Propagate zero bits from the input. 597 Known.Zero.setHighBits(std::min( 598 BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros)); 599 } 600 break; 601 } 602 case Instruction::SRem: 603 if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { 604 // X % -1 demands all the bits because we don't want to introduce 605 // INT_MIN % -1 (== undef) by accident. 606 if (Rem->isMinusOne()) 607 break; 608 APInt RA = Rem->getValue().abs(); 609 if (RA.isPowerOf2()) { 610 if (DemandedMask.ult(RA)) // srem won't affect demanded bits 611 return I->getOperand(0); 612 613 APInt LowBits = RA - 1; 614 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); 615 if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1)) 616 return I; 617 618 // The low bits of LHS are unchanged by the srem. 619 Known.Zero = LHSKnown.Zero & LowBits; 620 Known.One = LHSKnown.One & LowBits; 621 622 // If LHS is non-negative or has all low bits zero, then the upper bits 623 // are all zero. 624 if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero)) 625 Known.Zero |= ~LowBits; 626 627 // If LHS is negative and not all low bits are zero, then the upper bits 628 // are all one. 629 if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) 630 Known.One |= ~LowBits; 631 632 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 633 break; 634 } 635 } 636 637 // The sign bit is the LHS's sign bit, except when the result of the 638 // remainder is zero. 639 if (DemandedMask.isSignBitSet()) { 640 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 641 // If it's known zero, our sign bit is also zero. 642 if (LHSKnown.isNonNegative()) 643 Known.makeNonNegative(); 644 } 645 break; 646 case Instruction::URem: { 647 KnownBits Known2(BitWidth); 648 APInt AllOnes = APInt::getAllOnesValue(BitWidth); 649 if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) || 650 SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1)) 651 return I; 652 653 unsigned Leaders = Known2.countMinLeadingZeros(); 654 Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; 655 break; 656 } 657 case Instruction::Call: 658 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 659 switch (II->getIntrinsicID()) { 660 default: break; 661 case Intrinsic::bswap: { 662 // If the only bits demanded come from one byte of the bswap result, 663 // just shift the input byte into position to eliminate the bswap. 664 unsigned NLZ = DemandedMask.countLeadingZeros(); 665 unsigned NTZ = DemandedMask.countTrailingZeros(); 666 667 // Round NTZ down to the next byte. If we have 11 trailing zeros, then 668 // we need all the bits down to bit 8. Likewise, round NLZ. If we 669 // have 14 leading zeros, round to 8. 670 NLZ &= ~7; 671 NTZ &= ~7; 672 // If we need exactly one byte, we can do this transformation. 673 if (BitWidth-NLZ-NTZ == 8) { 674 unsigned ResultBit = NTZ; 675 unsigned InputBit = BitWidth-NTZ-8; 676 677 // Replace this with either a left or right shift to get the byte into 678 // the right place. 679 Instruction *NewVal; 680 if (InputBit > ResultBit) 681 NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), 682 ConstantInt::get(I->getType(), InputBit-ResultBit)); 683 else 684 NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), 685 ConstantInt::get(I->getType(), ResultBit-InputBit)); 686 NewVal->takeName(I); 687 return InsertNewInstWith(NewVal, *I); 688 } 689 690 // TODO: Could compute known zero/one bits based on the input. 691 break; 692 } 693 case Intrinsic::x86_mmx_pmovmskb: 694 case Intrinsic::x86_sse_movmsk_ps: 695 case Intrinsic::x86_sse2_movmsk_pd: 696 case Intrinsic::x86_sse2_pmovmskb_128: 697 case Intrinsic::x86_avx_movmsk_ps_256: 698 case Intrinsic::x86_avx_movmsk_pd_256: 699 case Intrinsic::x86_avx2_pmovmskb: { 700 // MOVMSK copies the vector elements' sign bits to the low bits 701 // and zeros the high bits. 702 unsigned ArgWidth; 703 if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) { 704 ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. 705 } else { 706 auto Arg = II->getArgOperand(0); 707 auto ArgType = cast<VectorType>(Arg->getType()); 708 ArgWidth = ArgType->getNumElements(); 709 } 710 711 // If we don't need any of low bits then return zero, 712 // we know that DemandedMask is non-zero already. 713 APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); 714 if (DemandedElts.isNullValue()) 715 return ConstantInt::getNullValue(VTy); 716 717 // We know that the upper bits are set to zero. 718 Known.Zero.setBitsFrom(ArgWidth); 719 return nullptr; 720 } 721 case Intrinsic::x86_sse42_crc32_64_64: 722 Known.Zero.setBitsFrom(32); 723 return nullptr; 724 } 725 } 726 computeKnownBits(V, Known, Depth, CxtI); 727 break; 728 } 729 730 // If the client is only demanding bits that we know, return the known 731 // constant. 732 if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) 733 return Constant::getIntegerValue(VTy, Known.One); 734 return nullptr; 735 } 736 737 /// Helper routine of SimplifyDemandedUseBits. It computes Known 738 /// bits. It also tries to handle simplifications that can be done based on 739 /// DemandedMask, but without modifying the Instruction. 740 Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, 741 const APInt &DemandedMask, 742 KnownBits &Known, 743 unsigned Depth, 744 Instruction *CxtI) { 745 unsigned BitWidth = DemandedMask.getBitWidth(); 746 Type *ITy = I->getType(); 747 748 KnownBits LHSKnown(BitWidth); 749 KnownBits RHSKnown(BitWidth); 750 751 // Despite the fact that we can't simplify this instruction in all User's 752 // context, we can at least compute the known bits, and we can 753 // do simplifications that apply to *just* the one user if we know that 754 // this instruction has a simpler value in that context. 755 switch (I->getOpcode()) { 756 case Instruction::And: { 757 // If either the LHS or the RHS are Zero, the result is zero. 758 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 759 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, 760 CxtI); 761 762 // Output known-0 are known to be clear if zero in either the LHS | RHS. 763 APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; 764 // Output known-1 bits are only known if set in both the LHS & RHS. 765 APInt IKnownOne = RHSKnown.One & LHSKnown.One; 766 767 // If the client is only demanding bits that we know, return the known 768 // constant. 769 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 770 return Constant::getIntegerValue(ITy, IKnownOne); 771 772 // If all of the demanded bits are known 1 on one side, return the other. 773 // These bits cannot contribute to the result of the 'and' in this 774 // context. 775 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) 776 return I->getOperand(0); 777 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) 778 return I->getOperand(1); 779 780 Known.Zero = std::move(IKnownZero); 781 Known.One = std::move(IKnownOne); 782 break; 783 } 784 case Instruction::Or: { 785 // We can simplify (X|Y) -> X or Y in the user's context if we know that 786 // only bits from X or Y are demanded. 787 788 // If either the LHS or the RHS are One, the result is One. 789 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 790 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, 791 CxtI); 792 793 // Output known-0 bits are only known if clear in both the LHS & RHS. 794 APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; 795 // Output known-1 are known to be set if set in either the LHS | RHS. 796 APInt IKnownOne = RHSKnown.One | LHSKnown.One; 797 798 // If the client is only demanding bits that we know, return the known 799 // constant. 800 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 801 return Constant::getIntegerValue(ITy, IKnownOne); 802 803 // If all of the demanded bits are known zero on one side, return the 804 // other. These bits cannot contribute to the result of the 'or' in this 805 // context. 806 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) 807 return I->getOperand(0); 808 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) 809 return I->getOperand(1); 810 811 Known.Zero = std::move(IKnownZero); 812 Known.One = std::move(IKnownOne); 813 break; 814 } 815 case Instruction::Xor: { 816 // We can simplify (X^Y) -> X or Y in the user's context if we know that 817 // only bits from X or Y are demanded. 818 819 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 820 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, 821 CxtI); 822 823 // Output known-0 bits are known if clear or set in both the LHS & RHS. 824 APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | 825 (RHSKnown.One & LHSKnown.One); 826 // Output known-1 are known to be set if set in only one of the LHS, RHS. 827 APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | 828 (RHSKnown.One & LHSKnown.Zero); 829 830 // If the client is only demanding bits that we know, return the known 831 // constant. 832 if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) 833 return Constant::getIntegerValue(ITy, IKnownOne); 834 835 // If all of the demanded bits are known zero on one side, return the 836 // other. 837 if (DemandedMask.isSubsetOf(RHSKnown.Zero)) 838 return I->getOperand(0); 839 if (DemandedMask.isSubsetOf(LHSKnown.Zero)) 840 return I->getOperand(1); 841 842 // Output known-0 bits are known if clear or set in both the LHS & RHS. 843 Known.Zero = std::move(IKnownZero); 844 // Output known-1 are known to be set if set in only one of the LHS, RHS. 845 Known.One = std::move(IKnownOne); 846 break; 847 } 848 default: 849 // Compute the Known bits to simplify things downstream. 850 computeKnownBits(I, Known, Depth, CxtI); 851 852 // If this user is only demanding bits that we know, return the known 853 // constant. 854 if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) 855 return Constant::getIntegerValue(ITy, Known.One); 856 857 break; 858 } 859 860 return nullptr; 861 } 862 863 864 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify 865 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into 866 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign 867 /// of "C2-C1". 868 /// 869 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, 870 /// ..., bn}, without considering the specific value X is holding. 871 /// This transformation is legal iff one of following conditions is hold: 872 /// 1) All the bit in S are 0, in this case E1 == E2. 873 /// 2) We don't care those bits in S, per the input DemandedMask. 874 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the 875 /// rest bits. 876 /// 877 /// Currently we only test condition 2). 878 /// 879 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was 880 /// not successful. 881 Value * 882 InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, 883 Instruction *Shl, const APInt &ShlOp1, 884 const APInt &DemandedMask, 885 KnownBits &Known) { 886 if (!ShlOp1 || !ShrOp1) 887 return nullptr; // No-op. 888 889 Value *VarX = Shr->getOperand(0); 890 Type *Ty = VarX->getType(); 891 unsigned BitWidth = Ty->getScalarSizeInBits(); 892 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) 893 return nullptr; // Undef. 894 895 unsigned ShlAmt = ShlOp1.getZExtValue(); 896 unsigned ShrAmt = ShrOp1.getZExtValue(); 897 898 Known.One.clearAllBits(); 899 Known.Zero.setLowBits(ShlAmt - 1); 900 Known.Zero &= DemandedMask; 901 902 APInt BitMask1(APInt::getAllOnesValue(BitWidth)); 903 APInt BitMask2(APInt::getAllOnesValue(BitWidth)); 904 905 bool isLshr = (Shr->getOpcode() == Instruction::LShr); 906 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : 907 (BitMask1.ashr(ShrAmt) << ShlAmt); 908 909 if (ShrAmt <= ShlAmt) { 910 BitMask2 <<= (ShlAmt - ShrAmt); 911 } else { 912 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): 913 BitMask2.ashr(ShrAmt - ShlAmt); 914 } 915 916 // Check if condition-2 (see the comment to this function) is satified. 917 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { 918 if (ShrAmt == ShlAmt) 919 return VarX; 920 921 if (!Shr->hasOneUse()) 922 return nullptr; 923 924 BinaryOperator *New; 925 if (ShrAmt < ShlAmt) { 926 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); 927 New = BinaryOperator::CreateShl(VarX, Amt); 928 BinaryOperator *Orig = cast<BinaryOperator>(Shl); 929 New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); 930 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); 931 } else { 932 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); 933 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : 934 BinaryOperator::CreateAShr(VarX, Amt); 935 if (cast<BinaryOperator>(Shr)->isExact()) 936 New->setIsExact(true); 937 } 938 939 return InsertNewInstWith(New, *Shl); 940 } 941 942 return nullptr; 943 } 944 945 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. 946 Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, 947 APInt DemandedElts, 948 int DMaskIdx) { 949 unsigned VWidth = II->getType()->getVectorNumElements(); 950 if (VWidth == 1) 951 return nullptr; 952 953 ConstantInt *NewDMask = nullptr; 954 955 if (DMaskIdx < 0) { 956 // Pretend that a prefix of elements is demanded to simplify the code 957 // below. 958 DemandedElts = (1 << DemandedElts.getActiveBits()) - 1; 959 } else { 960 ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx)); 961 if (!DMask) 962 return nullptr; // non-constant dmask is not supported by codegen 963 964 unsigned DMaskVal = DMask->getZExtValue() & 0xf; 965 966 // Mask off values that are undefined because the dmask doesn't cover them 967 DemandedElts &= (1 << countPopulation(DMaskVal)) - 1; 968 969 unsigned NewDMaskVal = 0; 970 unsigned OrigLoadIdx = 0; 971 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) { 972 const unsigned Bit = 1 << SrcIdx; 973 if (!!(DMaskVal & Bit)) { 974 if (!!DemandedElts[OrigLoadIdx]) 975 NewDMaskVal |= Bit; 976 OrigLoadIdx++; 977 } 978 } 979 980 if (DMaskVal != NewDMaskVal) 981 NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal); 982 } 983 984 // TODO: Handle 3 vectors when supported in code gen. 985 unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation()); 986 if (!NewNumElts) 987 return UndefValue::get(II->getType()); 988 989 if (NewNumElts >= VWidth && DemandedElts.isMask()) { 990 if (NewDMask) 991 II->setArgOperand(DMaskIdx, NewDMask); 992 return nullptr; 993 } 994 995 // Determine the overload types of the original intrinsic. 996 auto IID = II->getIntrinsicID(); 997 SmallVector<Intrinsic::IITDescriptor, 16> Table; 998 getIntrinsicInfoTableEntries(IID, Table); 999 ArrayRef<Intrinsic::IITDescriptor> TableRef = Table; 1000 1001 FunctionType *FTy = II->getCalledFunction()->getFunctionType(); 1002 SmallVector<Type *, 6> OverloadTys; 1003 Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys); 1004 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) 1005 Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys); 1006 1007 // Get the new return type overload of the intrinsic. 1008 Module *M = II->getParent()->getParent()->getParent(); 1009 Type *EltTy = II->getType()->getVectorElementType(); 1010 Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); 1011 1012 OverloadTys[0] = NewTy; 1013 Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys); 1014 1015 SmallVector<Value *, 16> Args; 1016 for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) 1017 Args.push_back(II->getArgOperand(I)); 1018 1019 if (NewDMask) 1020 Args[DMaskIdx] = NewDMask; 1021 1022 IRBuilderBase::InsertPointGuard Guard(Builder); 1023 Builder.SetInsertPoint(II); 1024 1025 CallInst *NewCall = Builder.CreateCall(NewIntrin, Args); 1026 NewCall->takeName(II); 1027 NewCall->copyMetadata(*II); 1028 1029 if (NewNumElts == 1) { 1030 return Builder.CreateInsertElement(UndefValue::get(II->getType()), NewCall, 1031 DemandedElts.countTrailingZeros()); 1032 } 1033 1034 SmallVector<uint32_t, 8> EltMask; 1035 unsigned NewLoadIdx = 0; 1036 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { 1037 if (!!DemandedElts[OrigLoadIdx]) 1038 EltMask.push_back(NewLoadIdx++); 1039 else 1040 EltMask.push_back(NewNumElts); 1041 } 1042 1043 Value *Shuffle = 1044 Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask); 1045 1046 return Shuffle; 1047 } 1048 1049 /// The specified value produces a vector with any number of elements. 1050 /// DemandedElts contains the set of elements that are actually used by the 1051 /// caller. This method analyzes which elements of the operand are undef and 1052 /// returns that information in UndefElts. 1053 /// 1054 /// If the information about demanded elements can be used to simplify the 1055 /// operation, the operation is simplified, then the resultant value is 1056 /// returned. This returns null if no change was made. 1057 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, 1058 APInt &UndefElts, 1059 unsigned Depth) { 1060 unsigned VWidth = V->getType()->getVectorNumElements(); 1061 APInt EltMask(APInt::getAllOnesValue(VWidth)); 1062 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); 1063 1064 if (isa<UndefValue>(V)) { 1065 // If the entire vector is undefined, just return this info. 1066 UndefElts = EltMask; 1067 return nullptr; 1068 } 1069 1070 if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef. 1071 UndefElts = EltMask; 1072 return UndefValue::get(V->getType()); 1073 } 1074 1075 UndefElts = 0; 1076 1077 if (auto *C = dyn_cast<Constant>(V)) { 1078 // Check if this is identity. If so, return 0 since we are not simplifying 1079 // anything. 1080 if (DemandedElts.isAllOnesValue()) 1081 return nullptr; 1082 1083 Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 1084 Constant *Undef = UndefValue::get(EltTy); 1085 SmallVector<Constant*, 16> Elts; 1086 for (unsigned i = 0; i != VWidth; ++i) { 1087 if (!DemandedElts[i]) { // If not demanded, set to undef. 1088 Elts.push_back(Undef); 1089 UndefElts.setBit(i); 1090 continue; 1091 } 1092 1093 Constant *Elt = C->getAggregateElement(i); 1094 if (!Elt) return nullptr; 1095 1096 if (isa<UndefValue>(Elt)) { // Already undef. 1097 Elts.push_back(Undef); 1098 UndefElts.setBit(i); 1099 } else { // Otherwise, defined. 1100 Elts.push_back(Elt); 1101 } 1102 } 1103 1104 // If we changed the constant, return it. 1105 Constant *NewCV = ConstantVector::get(Elts); 1106 return NewCV != C ? NewCV : nullptr; 1107 } 1108 1109 // Limit search depth. 1110 if (Depth == 10) 1111 return nullptr; 1112 1113 // If multiple users are using the root value, proceed with 1114 // simplification conservatively assuming that all elements 1115 // are needed. 1116 if (!V->hasOneUse()) { 1117 // Quit if we find multiple users of a non-root value though. 1118 // They'll be handled when it's their turn to be visited by 1119 // the main instcombine process. 1120 if (Depth != 0) 1121 // TODO: Just compute the UndefElts information recursively. 1122 return nullptr; 1123 1124 // Conservatively assume that all elements are needed. 1125 DemandedElts = EltMask; 1126 } 1127 1128 Instruction *I = dyn_cast<Instruction>(V); 1129 if (!I) return nullptr; // Only analyze instructions. 1130 1131 bool MadeChange = false; 1132 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum, 1133 APInt Demanded, APInt &Undef) { 1134 auto *II = dyn_cast<IntrinsicInst>(Inst); 1135 Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum); 1136 if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) { 1137 if (II) 1138 II->setArgOperand(OpNum, V); 1139 else 1140 Inst->setOperand(OpNum, V); 1141 MadeChange = true; 1142 } 1143 }; 1144 1145 APInt UndefElts2(VWidth, 0); 1146 APInt UndefElts3(VWidth, 0); 1147 switch (I->getOpcode()) { 1148 default: break; 1149 1150 case Instruction::InsertElement: { 1151 // If this is a variable index, we don't know which element it overwrites. 1152 // demand exactly the same input as we produce. 1153 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); 1154 if (!Idx) { 1155 // Note that we can't propagate undef elt info, because we don't know 1156 // which elt is getting updated. 1157 simplifyAndSetOp(I, 0, DemandedElts, UndefElts2); 1158 break; 1159 } 1160 1161 // The element inserted overwrites whatever was there, so the input demanded 1162 // set is simpler than the output set. 1163 unsigned IdxNo = Idx->getZExtValue(); 1164 APInt PreInsertDemandedElts = DemandedElts; 1165 if (IdxNo < VWidth) 1166 PreInsertDemandedElts.clearBit(IdxNo); 1167 1168 simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts); 1169 1170 // If this is inserting an element that isn't demanded, remove this 1171 // insertelement. 1172 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { 1173 Worklist.Add(I); 1174 return I->getOperand(0); 1175 } 1176 1177 // The inserted element is defined. 1178 UndefElts.clearBit(IdxNo); 1179 break; 1180 } 1181 case Instruction::ShuffleVector: { 1182 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 1183 unsigned LHSVWidth = 1184 Shuffle->getOperand(0)->getType()->getVectorNumElements(); 1185 APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); 1186 for (unsigned i = 0; i < VWidth; i++) { 1187 if (DemandedElts[i]) { 1188 unsigned MaskVal = Shuffle->getMaskValue(i); 1189 if (MaskVal != -1u) { 1190 assert(MaskVal < LHSVWidth * 2 && 1191 "shufflevector mask index out of range!"); 1192 if (MaskVal < LHSVWidth) 1193 LeftDemanded.setBit(MaskVal); 1194 else 1195 RightDemanded.setBit(MaskVal - LHSVWidth); 1196 } 1197 } 1198 } 1199 1200 APInt LHSUndefElts(LHSVWidth, 0); 1201 simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); 1202 1203 APInt RHSUndefElts(LHSVWidth, 0); 1204 simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts); 1205 1206 bool NewUndefElts = false; 1207 unsigned LHSIdx = -1u, LHSValIdx = -1u; 1208 unsigned RHSIdx = -1u, RHSValIdx = -1u; 1209 bool LHSUniform = true; 1210 bool RHSUniform = true; 1211 for (unsigned i = 0; i < VWidth; i++) { 1212 unsigned MaskVal = Shuffle->getMaskValue(i); 1213 if (MaskVal == -1u) { 1214 UndefElts.setBit(i); 1215 } else if (!DemandedElts[i]) { 1216 NewUndefElts = true; 1217 UndefElts.setBit(i); 1218 } else if (MaskVal < LHSVWidth) { 1219 if (LHSUndefElts[MaskVal]) { 1220 NewUndefElts = true; 1221 UndefElts.setBit(i); 1222 } else { 1223 LHSIdx = LHSIdx == -1u ? i : LHSVWidth; 1224 LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth; 1225 LHSUniform = LHSUniform && (MaskVal == i); 1226 } 1227 } else { 1228 if (RHSUndefElts[MaskVal - LHSVWidth]) { 1229 NewUndefElts = true; 1230 UndefElts.setBit(i); 1231 } else { 1232 RHSIdx = RHSIdx == -1u ? i : LHSVWidth; 1233 RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth; 1234 RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i); 1235 } 1236 } 1237 } 1238 1239 // Try to transform shuffle with constant vector and single element from 1240 // this constant vector to single insertelement instruction. 1241 // shufflevector V, C, <v1, v2, .., ci, .., vm> -> 1242 // insertelement V, C[ci], ci-n 1243 if (LHSVWidth == Shuffle->getType()->getNumElements()) { 1244 Value *Op = nullptr; 1245 Constant *Value = nullptr; 1246 unsigned Idx = -1u; 1247 1248 // Find constant vector with the single element in shuffle (LHS or RHS). 1249 if (LHSIdx < LHSVWidth && RHSUniform) { 1250 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) { 1251 Op = Shuffle->getOperand(1); 1252 Value = CV->getOperand(LHSValIdx); 1253 Idx = LHSIdx; 1254 } 1255 } 1256 if (RHSIdx < LHSVWidth && LHSUniform) { 1257 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) { 1258 Op = Shuffle->getOperand(0); 1259 Value = CV->getOperand(RHSValIdx); 1260 Idx = RHSIdx; 1261 } 1262 } 1263 // Found constant vector with single element - convert to insertelement. 1264 if (Op && Value) { 1265 Instruction *New = InsertElementInst::Create( 1266 Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx), 1267 Shuffle->getName()); 1268 InsertNewInstWith(New, *Shuffle); 1269 return New; 1270 } 1271 } 1272 if (NewUndefElts) { 1273 // Add additional discovered undefs. 1274 SmallVector<Constant*, 16> Elts; 1275 for (unsigned i = 0; i < VWidth; ++i) { 1276 if (UndefElts[i]) 1277 Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); 1278 else 1279 Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), 1280 Shuffle->getMaskValue(i))); 1281 } 1282 I->setOperand(2, ConstantVector::get(Elts)); 1283 MadeChange = true; 1284 } 1285 break; 1286 } 1287 case Instruction::Select: { 1288 // If this is a vector select, try to transform the select condition based 1289 // on the current demanded elements. 1290 SelectInst *Sel = cast<SelectInst>(I); 1291 if (Sel->getCondition()->getType()->isVectorTy()) { 1292 // TODO: We are not doing anything with UndefElts based on this call. 1293 // It is overwritten below based on the other select operands. If an 1294 // element of the select condition is known undef, then we are free to 1295 // choose the output value from either arm of the select. If we know that 1296 // one of those values is undef, then the output can be undef. 1297 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1298 } 1299 1300 // Next, see if we can transform the arms of the select. 1301 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts); 1302 if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) { 1303 for (unsigned i = 0; i < VWidth; i++) { 1304 // isNullValue() always returns false when called on a ConstantExpr. 1305 // Skip constant expressions to avoid propagating incorrect information. 1306 Constant *CElt = CV->getAggregateElement(i); 1307 if (isa<ConstantExpr>(CElt)) 1308 continue; 1309 // TODO: If a select condition element is undef, we can demand from 1310 // either side. If one side is known undef, choosing that side would 1311 // propagate undef. 1312 if (CElt->isNullValue()) 1313 DemandedLHS.clearBit(i); 1314 else 1315 DemandedRHS.clearBit(i); 1316 } 1317 } 1318 1319 simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2); 1320 simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3); 1321 1322 // Output elements are undefined if the element from each arm is undefined. 1323 // TODO: This can be improved. See comment in select condition handling. 1324 UndefElts = UndefElts2 & UndefElts3; 1325 break; 1326 } 1327 case Instruction::BitCast: { 1328 // Vector->vector casts only. 1329 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); 1330 if (!VTy) break; 1331 unsigned InVWidth = VTy->getNumElements(); 1332 APInt InputDemandedElts(InVWidth, 0); 1333 UndefElts2 = APInt(InVWidth, 0); 1334 unsigned Ratio; 1335 1336 if (VWidth == InVWidth) { 1337 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same 1338 // elements as are demanded of us. 1339 Ratio = 1; 1340 InputDemandedElts = DemandedElts; 1341 } else if ((VWidth % InVWidth) == 0) { 1342 // If the number of elements in the output is a multiple of the number of 1343 // elements in the input then an input element is live if any of the 1344 // corresponding output elements are live. 1345 Ratio = VWidth / InVWidth; 1346 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1347 if (DemandedElts[OutIdx]) 1348 InputDemandedElts.setBit(OutIdx / Ratio); 1349 } else if ((InVWidth % VWidth) == 0) { 1350 // If the number of elements in the input is a multiple of the number of 1351 // elements in the output then an input element is live if the 1352 // corresponding output element is live. 1353 Ratio = InVWidth / VWidth; 1354 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1355 if (DemandedElts[InIdx / Ratio]) 1356 InputDemandedElts.setBit(InIdx); 1357 } else { 1358 // Unsupported so far. 1359 break; 1360 } 1361 1362 simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2); 1363 1364 if (VWidth == InVWidth) { 1365 UndefElts = UndefElts2; 1366 } else if ((VWidth % InVWidth) == 0) { 1367 // If the number of elements in the output is a multiple of the number of 1368 // elements in the input then an output element is undef if the 1369 // corresponding input element is undef. 1370 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1371 if (UndefElts2[OutIdx / Ratio]) 1372 UndefElts.setBit(OutIdx); 1373 } else if ((InVWidth % VWidth) == 0) { 1374 // If the number of elements in the input is a multiple of the number of 1375 // elements in the output then an output element is undef if all of the 1376 // corresponding input elements are undef. 1377 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { 1378 APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); 1379 if (SubUndef.countPopulation() == Ratio) 1380 UndefElts.setBit(OutIdx); 1381 } 1382 } else { 1383 llvm_unreachable("Unimp"); 1384 } 1385 break; 1386 } 1387 case Instruction::FPTrunc: 1388 case Instruction::FPExt: 1389 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1390 break; 1391 1392 case Instruction::Call: { 1393 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1394 if (!II) break; 1395 switch (II->getIntrinsicID()) { 1396 case Intrinsic::x86_xop_vfrcz_ss: 1397 case Intrinsic::x86_xop_vfrcz_sd: 1398 // The instructions for these intrinsics are speced to zero upper bits not 1399 // pass them through like other scalar intrinsics. So we shouldn't just 1400 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. 1401 // Instead we should return a zero vector. 1402 if (!DemandedElts[0]) { 1403 Worklist.Add(II); 1404 return ConstantAggregateZero::get(II->getType()); 1405 } 1406 1407 // Only the lower element is used. 1408 DemandedElts = 1; 1409 simplifyAndSetOp(II, 0, DemandedElts, UndefElts); 1410 1411 // Only the lower element is undefined. The high elements are zero. 1412 UndefElts = UndefElts[0]; 1413 break; 1414 1415 // Unary scalar-as-vector operations that work column-wise. 1416 case Intrinsic::x86_sse_rcp_ss: 1417 case Intrinsic::x86_sse_rsqrt_ss: 1418 simplifyAndSetOp(II, 0, DemandedElts, UndefElts); 1419 1420 // If lowest element of a scalar op isn't used then use Arg0. 1421 if (!DemandedElts[0]) { 1422 Worklist.Add(II); 1423 return II->getArgOperand(0); 1424 } 1425 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions 1426 // checks). 1427 break; 1428 1429 // Binary scalar-as-vector operations that work column-wise. The high 1430 // elements come from operand 0. The low element is a function of both 1431 // operands. 1432 case Intrinsic::x86_sse_min_ss: 1433 case Intrinsic::x86_sse_max_ss: 1434 case Intrinsic::x86_sse_cmp_ss: 1435 case Intrinsic::x86_sse2_min_sd: 1436 case Intrinsic::x86_sse2_max_sd: 1437 case Intrinsic::x86_sse2_cmp_sd: { 1438 simplifyAndSetOp(II, 0, DemandedElts, UndefElts); 1439 1440 // If lowest element of a scalar op isn't used then use Arg0. 1441 if (!DemandedElts[0]) { 1442 Worklist.Add(II); 1443 return II->getArgOperand(0); 1444 } 1445 1446 // Only lower element is used for operand 1. 1447 DemandedElts = 1; 1448 simplifyAndSetOp(II, 1, DemandedElts, UndefElts2); 1449 1450 // Lower element is undefined if both lower elements are undefined. 1451 // Consider things like undef&0. The result is known zero, not undef. 1452 if (!UndefElts2[0]) 1453 UndefElts.clearBit(0); 1454 1455 break; 1456 } 1457 1458 // Binary scalar-as-vector operations that work column-wise. The high 1459 // elements come from operand 0 and the low element comes from operand 1. 1460 case Intrinsic::x86_sse41_round_ss: 1461 case Intrinsic::x86_sse41_round_sd: { 1462 // Don't use the low element of operand 0. 1463 APInt DemandedElts2 = DemandedElts; 1464 DemandedElts2.clearBit(0); 1465 simplifyAndSetOp(II, 0, DemandedElts2, UndefElts); 1466 1467 // If lowest element of a scalar op isn't used then use Arg0. 1468 if (!DemandedElts[0]) { 1469 Worklist.Add(II); 1470 return II->getArgOperand(0); 1471 } 1472 1473 // Only lower element is used for operand 1. 1474 DemandedElts = 1; 1475 simplifyAndSetOp(II, 1, DemandedElts, UndefElts2); 1476 1477 // Take the high undef elements from operand 0 and take the lower element 1478 // from operand 1. 1479 UndefElts.clearBit(0); 1480 UndefElts |= UndefElts2[0]; 1481 break; 1482 } 1483 1484 // Three input scalar-as-vector operations that work column-wise. The high 1485 // elements come from operand 0 and the low element is a function of all 1486 // three inputs. 1487 case Intrinsic::x86_avx512_mask_add_ss_round: 1488 case Intrinsic::x86_avx512_mask_div_ss_round: 1489 case Intrinsic::x86_avx512_mask_mul_ss_round: 1490 case Intrinsic::x86_avx512_mask_sub_ss_round: 1491 case Intrinsic::x86_avx512_mask_max_ss_round: 1492 case Intrinsic::x86_avx512_mask_min_ss_round: 1493 case Intrinsic::x86_avx512_mask_add_sd_round: 1494 case Intrinsic::x86_avx512_mask_div_sd_round: 1495 case Intrinsic::x86_avx512_mask_mul_sd_round: 1496 case Intrinsic::x86_avx512_mask_sub_sd_round: 1497 case Intrinsic::x86_avx512_mask_max_sd_round: 1498 case Intrinsic::x86_avx512_mask_min_sd_round: 1499 simplifyAndSetOp(II, 0, DemandedElts, UndefElts); 1500 1501 // If lowest element of a scalar op isn't used then use Arg0. 1502 if (!DemandedElts[0]) { 1503 Worklist.Add(II); 1504 return II->getArgOperand(0); 1505 } 1506 1507 // Only lower element is used for operand 1 and 2. 1508 DemandedElts = 1; 1509 simplifyAndSetOp(II, 1, DemandedElts, UndefElts2); 1510 simplifyAndSetOp(II, 2, DemandedElts, UndefElts3); 1511 1512 // Lower element is undefined if all three lower elements are undefined. 1513 // Consider things like undef&0. The result is known zero, not undef. 1514 if (!UndefElts2[0] || !UndefElts3[0]) 1515 UndefElts.clearBit(0); 1516 1517 break; 1518 1519 case Intrinsic::x86_sse2_packssdw_128: 1520 case Intrinsic::x86_sse2_packsswb_128: 1521 case Intrinsic::x86_sse2_packuswb_128: 1522 case Intrinsic::x86_sse41_packusdw: 1523 case Intrinsic::x86_avx2_packssdw: 1524 case Intrinsic::x86_avx2_packsswb: 1525 case Intrinsic::x86_avx2_packusdw: 1526 case Intrinsic::x86_avx2_packuswb: 1527 case Intrinsic::x86_avx512_packssdw_512: 1528 case Intrinsic::x86_avx512_packsswb_512: 1529 case Intrinsic::x86_avx512_packusdw_512: 1530 case Intrinsic::x86_avx512_packuswb_512: { 1531 auto *Ty0 = II->getArgOperand(0)->getType(); 1532 unsigned InnerVWidth = Ty0->getVectorNumElements(); 1533 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); 1534 1535 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; 1536 unsigned VWidthPerLane = VWidth / NumLanes; 1537 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; 1538 1539 // Per lane, pack the elements of the first input and then the second. 1540 // e.g. 1541 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) 1542 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) 1543 for (int OpNum = 0; OpNum != 2; ++OpNum) { 1544 APInt OpDemandedElts(InnerVWidth, 0); 1545 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1546 unsigned LaneIdx = Lane * VWidthPerLane; 1547 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { 1548 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; 1549 if (DemandedElts[Idx]) 1550 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); 1551 } 1552 } 1553 1554 // Demand elements from the operand. 1555 APInt OpUndefElts(InnerVWidth, 0); 1556 simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts); 1557 1558 // Pack the operand's UNDEF elements, one lane at a time. 1559 OpUndefElts = OpUndefElts.zext(VWidth); 1560 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1561 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); 1562 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); 1563 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); 1564 UndefElts |= LaneElts; 1565 } 1566 } 1567 break; 1568 } 1569 1570 // PSHUFB 1571 case Intrinsic::x86_ssse3_pshuf_b_128: 1572 case Intrinsic::x86_avx2_pshuf_b: 1573 case Intrinsic::x86_avx512_pshuf_b_512: 1574 // PERMILVAR 1575 case Intrinsic::x86_avx_vpermilvar_ps: 1576 case Intrinsic::x86_avx_vpermilvar_ps_256: 1577 case Intrinsic::x86_avx512_vpermilvar_ps_512: 1578 case Intrinsic::x86_avx_vpermilvar_pd: 1579 case Intrinsic::x86_avx_vpermilvar_pd_256: 1580 case Intrinsic::x86_avx512_vpermilvar_pd_512: 1581 // PERMV 1582 case Intrinsic::x86_avx2_permd: 1583 case Intrinsic::x86_avx2_permps: { 1584 simplifyAndSetOp(II, 1, DemandedElts, UndefElts); 1585 break; 1586 } 1587 1588 // SSE4A instructions leave the upper 64-bits of the 128-bit result 1589 // in an undefined state. 1590 case Intrinsic::x86_sse4a_extrq: 1591 case Intrinsic::x86_sse4a_extrqi: 1592 case Intrinsic::x86_sse4a_insertq: 1593 case Intrinsic::x86_sse4a_insertqi: 1594 UndefElts.setHighBits(VWidth / 2); 1595 break; 1596 case Intrinsic::amdgcn_buffer_load: 1597 case Intrinsic::amdgcn_buffer_load_format: 1598 return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); 1599 default: { 1600 if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) 1601 return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0); 1602 1603 break; 1604 } 1605 } // switch on IntrinsicID 1606 break; 1607 } // case Call 1608 } // switch on Opcode 1609 1610 // TODO: We bail completely on integer div/rem and shifts because they have 1611 // UB/poison potential, but that should be refined. 1612 BinaryOperator *BO; 1613 if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) { 1614 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1615 simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); 1616 1617 // Any change to an instruction with potential poison must clear those flags 1618 // because we can not guarantee those constraints now. Other analysis may 1619 // determine that it is safe to re-apply the flags. 1620 if (MadeChange) 1621 BO->dropPoisonGeneratingFlags(); 1622 1623 // Output elements are undefined if both are undefined. Consider things 1624 // like undef & 0. The result is known zero, not undef. 1625 UndefElts &= UndefElts2; 1626 } 1627 1628 return MadeChange ? I : nullptr; 1629 } 1630