1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains logic for simplifying instructions based on information 11 // about how they are used. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/IntrinsicInst.h" 18 #include "llvm/IR/PatternMatch.h" 19 20 using namespace llvm; 21 using namespace llvm::PatternMatch; 22 23 #define DEBUG_TYPE "instcombine" 24 25 /// Check to see if the specified operand of the specified instruction is a 26 /// constant integer. If so, check to see if there are any bits set in the 27 /// constant that are not demanded. If so, shrink the constant and return true. 28 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 29 APInt Demanded) { 30 assert(I && "No instruction?"); 31 assert(OpNo < I->getNumOperands() && "Operand index too large"); 32 33 // The operand must be a constant integer or splat integer. 34 Value *Op = I->getOperand(OpNo); 35 const APInt *C; 36 if (!match(Op, m_APInt(C))) 37 return false; 38 39 // If there are no bits set that aren't demanded, nothing to do. 40 Demanded = Demanded.zextOrTrunc(C->getBitWidth()); 41 if ((~Demanded & *C) == 0) 42 return false; 43 44 // This instruction is producing bits that are not demanded. Shrink the RHS. 45 Demanded &= *C; 46 I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); 47 48 return true; 49 } 50 51 52 53 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if 54 /// the instruction has any properties that allow us to simplify its operands. 55 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { 56 unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); 57 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 58 APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); 59 60 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 61 0, &Inst); 62 if (!V) return false; 63 if (V == &Inst) return true; 64 replaceInstUsesWith(Inst, V); 65 return true; 66 } 67 68 /// This form of SimplifyDemandedBits simplifies the specified instruction 69 /// operand if possible, updating it in place. It returns true if it made any 70 /// change and false otherwise. 71 bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask, 72 APInt &KnownZero, APInt &KnownOne, 73 unsigned Depth) { 74 auto *UserI = dyn_cast<Instruction>(U.getUser()); 75 Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, 76 KnownOne, Depth, UserI); 77 if (!NewVal) return false; 78 U = NewVal; 79 return true; 80 } 81 82 83 /// This function attempts to replace V with a simpler value based on the 84 /// demanded bits. When this function is called, it is known that only the bits 85 /// set in DemandedMask of the result of V are ever used downstream. 86 /// Consequently, depending on the mask and V, it may be possible to replace V 87 /// with a constant or one of its operands. In such cases, this function does 88 /// the replacement and returns true. In all other cases, it returns false after 89 /// analyzing the expression and setting KnownOne and known to be one in the 90 /// expression. KnownZero contains all the bits that are known to be zero in the 91 /// expression. These are provided to potentially allow the caller (which might 92 /// recursively be SimplifyDemandedBits itself) to simplify the expression. 93 /// KnownOne and KnownZero always follow the invariant that: 94 /// KnownOne & KnownZero == 0. 95 /// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and 96 /// KnownZero may only be accurate for those bits set in DemandedMask. Note also 97 /// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the 98 /// same. 99 /// 100 /// This returns null if it did not change anything and it permits no 101 /// simplification. This returns V itself if it did some simplification of V's 102 /// operands based on the information about what bits are demanded. This returns 103 /// some other non-null value if it found out that V is equal to another value 104 /// in the context where the specified bits are demanded, but not for all users. 105 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 106 APInt &KnownZero, APInt &KnownOne, 107 unsigned Depth, 108 Instruction *CxtI) { 109 assert(V != nullptr && "Null pointer of Value???"); 110 assert(Depth <= 6 && "Limit Search Depth"); 111 uint32_t BitWidth = DemandedMask.getBitWidth(); 112 Type *VTy = V->getType(); 113 assert( 114 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && 115 KnownZero.getBitWidth() == BitWidth && 116 KnownOne.getBitWidth() == BitWidth && 117 "Value *V, DemandedMask, KnownZero and KnownOne " 118 "must have same BitWidth"); 119 const APInt *C; 120 if (match(V, m_APInt(C))) { 121 // We know all of the bits for a scalar constant or a splat vector constant! 122 KnownOne = *C & DemandedMask; 123 KnownZero = ~KnownOne & DemandedMask; 124 return nullptr; 125 } 126 if (isa<ConstantPointerNull>(V)) { 127 // We know all of the bits for a constant! 128 KnownOne.clearAllBits(); 129 KnownZero = DemandedMask; 130 return nullptr; 131 } 132 133 KnownZero.clearAllBits(); 134 KnownOne.clearAllBits(); 135 if (DemandedMask == 0) { // Not demanding any bits from V. 136 if (isa<UndefValue>(V)) 137 return nullptr; 138 return UndefValue::get(VTy); 139 } 140 141 if (Depth == 6) // Limit search depth. 142 return nullptr; 143 144 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); 145 APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); 146 147 Instruction *I = dyn_cast<Instruction>(V); 148 if (!I) { 149 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 150 return nullptr; // Only analyze instructions. 151 } 152 153 // If there are multiple uses of this value and we aren't at the root, then 154 // we can't do any simplifications of the operands, because DemandedMask 155 // only reflects the bits demanded by *one* of the users. 156 if (Depth != 0 && !I->hasOneUse()) { 157 // Despite the fact that we can't simplify this instruction in all User's 158 // context, we can at least compute the knownzero/knownone bits, and we can 159 // do simplifications that apply to *just* the one user if we know that 160 // this instruction has a simpler value in that context. 161 if (I->getOpcode() == Instruction::And) { 162 // If either the LHS or the RHS are Zero, the result is zero. 163 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 164 CxtI); 165 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 166 CxtI); 167 168 // If all of the demanded bits are known 1 on one side, return the other. 169 // These bits cannot contribute to the result of the 'and' in this 170 // context. 171 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 172 (DemandedMask & ~LHSKnownZero)) 173 return I->getOperand(0); 174 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 175 (DemandedMask & ~RHSKnownZero)) 176 return I->getOperand(1); 177 178 // If all of the demanded bits in the inputs are known zeros, return zero. 179 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 180 return Constant::getNullValue(VTy); 181 182 } else if (I->getOpcode() == Instruction::Or) { 183 // We can simplify (X|Y) -> X or Y in the user's context if we know that 184 // only bits from X or Y are demanded. 185 186 // If either the LHS or the RHS are One, the result is One. 187 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 188 CxtI); 189 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 190 CxtI); 191 192 // If all of the demanded bits are known zero on one side, return the 193 // other. These bits cannot contribute to the result of the 'or' in this 194 // context. 195 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 196 (DemandedMask & ~LHSKnownOne)) 197 return I->getOperand(0); 198 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 199 (DemandedMask & ~RHSKnownOne)) 200 return I->getOperand(1); 201 202 // If all of the potentially set bits on one side are known to be set on 203 // the other side, just use the 'other' side. 204 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 205 (DemandedMask & (~RHSKnownZero))) 206 return I->getOperand(0); 207 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 208 (DemandedMask & (~LHSKnownZero))) 209 return I->getOperand(1); 210 } else if (I->getOpcode() == Instruction::Xor) { 211 // We can simplify (X^Y) -> X or Y in the user's context if we know that 212 // only bits from X or Y are demanded. 213 214 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 215 CxtI); 216 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 217 CxtI); 218 219 // If all of the demanded bits are known zero on one side, return the 220 // other. 221 if ((DemandedMask & RHSKnownZero) == DemandedMask) 222 return I->getOperand(0); 223 if ((DemandedMask & LHSKnownZero) == DemandedMask) 224 return I->getOperand(1); 225 } 226 227 // Compute the KnownZero/KnownOne bits to simplify things downstream. 228 computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); 229 return nullptr; 230 } 231 232 // If this is the root being simplified, allow it to have multiple uses, 233 // just set the DemandedMask to all bits so that we can try to simplify the 234 // operands. This allows visitTruncInst (for example) to simplify the 235 // operand of a trunc without duplicating all the logic below. 236 if (Depth == 0 && !V->hasOneUse()) 237 DemandedMask = APInt::getAllOnesValue(BitWidth); 238 239 switch (I->getOpcode()) { 240 default: 241 computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); 242 break; 243 case Instruction::And: 244 // If either the LHS or the RHS are Zero, the result is zero. 245 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 246 RHSKnownOne, Depth + 1) || 247 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, 248 LHSKnownZero, LHSKnownOne, Depth + 1)) 249 return I; 250 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 251 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 252 253 // If the client is only demanding bits that we know, return the known 254 // constant. 255 if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)| 256 (RHSKnownOne & LHSKnownOne))) == DemandedMask) 257 return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne); 258 259 // If all of the demanded bits are known 1 on one side, return the other. 260 // These bits cannot contribute to the result of the 'and'. 261 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 262 (DemandedMask & ~LHSKnownZero)) 263 return I->getOperand(0); 264 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 265 (DemandedMask & ~RHSKnownZero)) 266 return I->getOperand(1); 267 268 // If all of the demanded bits in the inputs are known zeros, return zero. 269 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 270 return Constant::getNullValue(VTy); 271 272 // If the RHS is a constant, see if we can simplify it. 273 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) 274 return I; 275 276 // Output known-1 bits are only known if set in both the LHS & RHS. 277 KnownOne = RHSKnownOne & LHSKnownOne; 278 // Output known-0 are known to be clear if zero in either the LHS | RHS. 279 KnownZero = RHSKnownZero | LHSKnownZero; 280 break; 281 case Instruction::Or: 282 // If either the LHS or the RHS are One, the result is One. 283 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 284 RHSKnownOne, Depth + 1) || 285 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 286 LHSKnownZero, LHSKnownOne, Depth + 1)) 287 return I; 288 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 289 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 290 291 // If the client is only demanding bits that we know, return the known 292 // constant. 293 if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)| 294 (RHSKnownOne | LHSKnownOne))) == DemandedMask) 295 return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne); 296 297 // If all of the demanded bits are known zero on one side, return the other. 298 // These bits cannot contribute to the result of the 'or'. 299 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 300 (DemandedMask & ~LHSKnownOne)) 301 return I->getOperand(0); 302 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 303 (DemandedMask & ~RHSKnownOne)) 304 return I->getOperand(1); 305 306 // If all of the potentially set bits on one side are known to be set on 307 // the other side, just use the 'other' side. 308 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 309 (DemandedMask & (~RHSKnownZero))) 310 return I->getOperand(0); 311 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 312 (DemandedMask & (~LHSKnownZero))) 313 return I->getOperand(1); 314 315 // If the RHS is a constant, see if we can simplify it. 316 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 317 return I; 318 319 // Output known-0 bits are only known if clear in both the LHS & RHS. 320 KnownZero = RHSKnownZero & LHSKnownZero; 321 // Output known-1 are known to be set if set in either the LHS | RHS. 322 KnownOne = RHSKnownOne | LHSKnownOne; 323 break; 324 case Instruction::Xor: { 325 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 326 RHSKnownOne, Depth + 1) || 327 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, 328 LHSKnownOne, Depth + 1)) 329 return I; 330 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 331 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 332 333 // Output known-0 bits are known if clear or set in both the LHS & RHS. 334 APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | 335 (RHSKnownOne & LHSKnownOne); 336 // Output known-1 are known to be set if set in only one of the LHS, RHS. 337 APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | 338 (RHSKnownOne & LHSKnownZero); 339 340 // If the client is only demanding bits that we know, return the known 341 // constant. 342 if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) 343 return Constant::getIntegerValue(VTy, IKnownOne); 344 345 // If all of the demanded bits are known zero on one side, return the other. 346 // These bits cannot contribute to the result of the 'xor'. 347 if ((DemandedMask & RHSKnownZero) == DemandedMask) 348 return I->getOperand(0); 349 if ((DemandedMask & LHSKnownZero) == DemandedMask) 350 return I->getOperand(1); 351 352 // If all of the demanded bits are known to be zero on one side or the 353 // other, turn this into an *inclusive* or. 354 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 355 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { 356 Instruction *Or = 357 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 358 I->getName()); 359 return InsertNewInstWith(Or, *I); 360 } 361 362 // If all of the demanded bits on one side are known, and all of the set 363 // bits on that side are also known to be set on the other side, turn this 364 // into an AND, as we know the bits will be cleared. 365 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 366 if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 367 // all known 368 if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { 369 Constant *AndC = Constant::getIntegerValue(VTy, 370 ~RHSKnownOne & DemandedMask); 371 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 372 return InsertNewInstWith(And, *I); 373 } 374 } 375 376 // If the RHS is a constant, see if we can simplify it. 377 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 378 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 379 return I; 380 381 // If our LHS is an 'and' and if it has one use, and if any of the bits we 382 // are flipping are known to be set, then the xor is just resetting those 383 // bits to zero. We can just knock out bits from the 'and' and the 'xor', 384 // simplifying both of them. 385 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) 386 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && 387 isa<ConstantInt>(I->getOperand(1)) && 388 isa<ConstantInt>(LHSInst->getOperand(1)) && 389 (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { 390 ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); 391 ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); 392 APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); 393 394 Constant *AndC = 395 ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); 396 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 397 InsertNewInstWith(NewAnd, *I); 398 399 Constant *XorC = 400 ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); 401 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); 402 return InsertNewInstWith(NewXor, *I); 403 } 404 405 // Output known-0 bits are known if clear or set in both the LHS & RHS. 406 KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); 407 // Output known-1 are known to be set if set in only one of the LHS, RHS. 408 KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); 409 break; 410 } 411 case Instruction::Select: 412 // If this is a select as part of a min/max pattern, don't simplify any 413 // further in case we break the structure. 414 Value *LHS, *RHS; 415 if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) 416 return nullptr; 417 418 if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, 419 RHSKnownOne, Depth + 1) || 420 SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, 421 LHSKnownOne, Depth + 1)) 422 return I; 423 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 424 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 425 426 // If the operands are constants, see if we can simplify them. 427 if (ShrinkDemandedConstant(I, 1, DemandedMask) || 428 ShrinkDemandedConstant(I, 2, DemandedMask)) 429 return I; 430 431 // Only known if known in both the LHS and RHS. 432 KnownOne = RHSKnownOne & LHSKnownOne; 433 KnownZero = RHSKnownZero & LHSKnownZero; 434 break; 435 case Instruction::Trunc: { 436 unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); 437 DemandedMask = DemandedMask.zext(truncBf); 438 KnownZero = KnownZero.zext(truncBf); 439 KnownOne = KnownOne.zext(truncBf); 440 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 441 KnownOne, Depth + 1)) 442 return I; 443 DemandedMask = DemandedMask.trunc(BitWidth); 444 KnownZero = KnownZero.trunc(BitWidth); 445 KnownOne = KnownOne.trunc(BitWidth); 446 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 447 break; 448 } 449 case Instruction::BitCast: 450 if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) 451 return nullptr; // vector->int or fp->int? 452 453 if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { 454 if (VectorType *SrcVTy = 455 dyn_cast<VectorType>(I->getOperand(0)->getType())) { 456 if (DstVTy->getNumElements() != SrcVTy->getNumElements()) 457 // Don't touch a bitcast between vectors of different element counts. 458 return nullptr; 459 } else 460 // Don't touch a scalar-to-vector bitcast. 461 return nullptr; 462 } else if (I->getOperand(0)->getType()->isVectorTy()) 463 // Don't touch a vector-to-scalar bitcast. 464 return nullptr; 465 466 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 467 KnownOne, Depth + 1)) 468 return I; 469 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 470 break; 471 case Instruction::ZExt: { 472 // Compute the bits in the result that are not present in the input. 473 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 474 475 DemandedMask = DemandedMask.trunc(SrcBitWidth); 476 KnownZero = KnownZero.trunc(SrcBitWidth); 477 KnownOne = KnownOne.trunc(SrcBitWidth); 478 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 479 KnownOne, Depth + 1)) 480 return I; 481 DemandedMask = DemandedMask.zext(BitWidth); 482 KnownZero = KnownZero.zext(BitWidth); 483 KnownOne = KnownOne.zext(BitWidth); 484 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 485 // The top bits are known to be zero. 486 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); 487 break; 488 } 489 case Instruction::SExt: { 490 // Compute the bits in the result that are not present in the input. 491 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 492 493 APInt InputDemandedBits = DemandedMask & 494 APInt::getLowBitsSet(BitWidth, SrcBitWidth); 495 496 APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); 497 // If any of the sign extended bits are demanded, we know that the sign 498 // bit is demanded. 499 if ((NewBits & DemandedMask) != 0) 500 InputDemandedBits.setBit(SrcBitWidth-1); 501 502 InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); 503 KnownZero = KnownZero.trunc(SrcBitWidth); 504 KnownOne = KnownOne.trunc(SrcBitWidth); 505 if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, 506 KnownOne, Depth + 1)) 507 return I; 508 InputDemandedBits = InputDemandedBits.zext(BitWidth); 509 KnownZero = KnownZero.zext(BitWidth); 510 KnownOne = KnownOne.zext(BitWidth); 511 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 512 513 // If the sign bit of the input is known set or clear, then we know the 514 // top bits of the result. 515 516 // If the input sign bit is known zero, or if the NewBits are not demanded 517 // convert this into a zero extension. 518 if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { 519 // Convert to ZExt cast 520 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); 521 return InsertNewInstWith(NewCast, *I); 522 } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set 523 KnownOne |= NewBits; 524 } 525 break; 526 } 527 case Instruction::Add: 528 case Instruction::Sub: { 529 /// If the high-bits of an ADD/SUB are not demanded, then we do not care 530 /// about the high bits of the operands. 531 unsigned NLZ = DemandedMask.countLeadingZeros(); 532 if (NLZ > 0) { 533 // Right fill the mask of bits for this ADD/SUB to demand the most 534 // significant bit and all those below it. 535 APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); 536 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, 537 LHSKnownZero, LHSKnownOne, Depth + 1) || 538 ShrinkDemandedConstant(I, 1, DemandedFromOps) || 539 SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, 540 LHSKnownZero, LHSKnownOne, Depth + 1)) { 541 // Disable the nsw and nuw flags here: We can no longer guarantee that 542 // we won't wrap after simplification. Removing the nsw/nuw flags is 543 // legal here because the top bit is not demanded. 544 BinaryOperator &BinOP = *cast<BinaryOperator>(I); 545 BinOP.setHasNoSignedWrap(false); 546 BinOP.setHasNoUnsignedWrap(false); 547 return I; 548 } 549 } 550 551 // Otherwise just hand the add/sub off to computeKnownBits to fill in 552 // the known zeros and ones. 553 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 554 break; 555 } 556 case Instruction::Shl: 557 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 558 { 559 Value *VarX; ConstantInt *C1; 560 if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { 561 Instruction *Shr = cast<Instruction>(I->getOperand(0)); 562 Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, 563 KnownZero, KnownOne); 564 if (R) 565 return R; 566 } 567 } 568 569 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 570 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); 571 572 // If the shift is NUW/NSW, then it does demand the high bits. 573 ShlOperator *IOp = cast<ShlOperator>(I); 574 if (IOp->hasNoSignedWrap()) 575 DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); 576 else if (IOp->hasNoUnsignedWrap()) 577 DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); 578 579 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 580 KnownOne, Depth + 1)) 581 return I; 582 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 583 KnownZero <<= ShiftAmt; 584 KnownOne <<= ShiftAmt; 585 // low bits known zero. 586 if (ShiftAmt) 587 KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); 588 } 589 break; 590 case Instruction::LShr: 591 // For a logical shift right 592 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 593 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 594 595 // Unsigned shift right. 596 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 597 598 // If the shift is exact, then it does demand the low bits (and knows that 599 // they are zero). 600 if (cast<LShrOperator>(I)->isExact()) 601 DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); 602 603 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 604 KnownOne, Depth + 1)) 605 return I; 606 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 607 KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); 608 KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); 609 if (ShiftAmt) { 610 // Compute the new bits that are at the top now. 611 APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); 612 KnownZero |= HighBits; // high bits known zero. 613 } 614 } 615 break; 616 case Instruction::AShr: 617 // If this is an arithmetic shift right and only the low-bit is set, we can 618 // always convert this into a logical shr, even if the shift amount is 619 // variable. The low bit of the shift cannot be an input sign bit unless 620 // the shift amount is >= the size of the datatype, which is undefined. 621 if (DemandedMask == 1) { 622 // Perform the logical shift right. 623 Instruction *NewVal = BinaryOperator::CreateLShr( 624 I->getOperand(0), I->getOperand(1), I->getName()); 625 return InsertNewInstWith(NewVal, *I); 626 } 627 628 // If the sign bit is the only bit demanded by this ashr, then there is no 629 // need to do it, the shift doesn't change the high bit. 630 if (DemandedMask.isSignBit()) 631 return I->getOperand(0); 632 633 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 634 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 635 636 // Signed shift right. 637 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 638 // If any of the "high bits" are demanded, we should set the sign bit as 639 // demanded. 640 if (DemandedMask.countLeadingZeros() <= ShiftAmt) 641 DemandedMaskIn.setBit(BitWidth-1); 642 643 // If the shift is exact, then it does demand the low bits (and knows that 644 // they are zero). 645 if (cast<AShrOperator>(I)->isExact()) 646 DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); 647 648 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 649 KnownOne, Depth + 1)) 650 return I; 651 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 652 // Compute the new bits that are at the top now. 653 APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); 654 KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); 655 KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); 656 657 // Handle the sign bits. 658 APInt SignBit(APInt::getSignBit(BitWidth)); 659 // Adjust to where it is now in the mask. 660 SignBit = APIntOps::lshr(SignBit, ShiftAmt); 661 662 // If the input sign bit is known to be zero, or if none of the top bits 663 // are demanded, turn this into an unsigned shift right. 664 if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 665 (HighBits & ~DemandedMask) == HighBits) { 666 // Perform the logical shift right. 667 BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), 668 SA, I->getName()); 669 NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); 670 return InsertNewInstWith(NewVal, *I); 671 } else if ((KnownOne & SignBit) != 0) { // New bits are known one. 672 KnownOne |= HighBits; 673 } 674 } 675 break; 676 case Instruction::SRem: 677 if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { 678 // X % -1 demands all the bits because we don't want to introduce 679 // INT_MIN % -1 (== undef) by accident. 680 if (Rem->isAllOnesValue()) 681 break; 682 APInt RA = Rem->getValue().abs(); 683 if (RA.isPowerOf2()) { 684 if (DemandedMask.ult(RA)) // srem won't affect demanded bits 685 return I->getOperand(0); 686 687 APInt LowBits = RA - 1; 688 APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); 689 if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, 690 LHSKnownOne, Depth + 1)) 691 return I; 692 693 // The low bits of LHS are unchanged by the srem. 694 KnownZero = LHSKnownZero & LowBits; 695 KnownOne = LHSKnownOne & LowBits; 696 697 // If LHS is non-negative or has all low bits zero, then the upper bits 698 // are all zero. 699 if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) 700 KnownZero |= ~LowBits; 701 702 // If LHS is negative and not all low bits are zero, then the upper bits 703 // are all one. 704 if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) 705 KnownOne |= ~LowBits; 706 707 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 708 } 709 } 710 711 // The sign bit is the LHS's sign bit, except when the result of the 712 // remainder is zero. 713 if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { 714 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); 715 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 716 CxtI); 717 // If it's known zero, our sign bit is also zero. 718 if (LHSKnownZero.isNegative()) 719 KnownZero.setBit(KnownZero.getBitWidth() - 1); 720 } 721 break; 722 case Instruction::URem: { 723 APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); 724 APInt AllOnes = APInt::getAllOnesValue(BitWidth); 725 if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, 726 KnownOne2, Depth + 1) || 727 SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, 728 KnownOne2, Depth + 1)) 729 return I; 730 731 unsigned Leaders = KnownZero2.countLeadingOnes(); 732 Leaders = std::max(Leaders, 733 KnownZero2.countLeadingOnes()); 734 KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; 735 break; 736 } 737 case Instruction::Call: 738 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 739 switch (II->getIntrinsicID()) { 740 default: break; 741 case Intrinsic::bswap: { 742 // If the only bits demanded come from one byte of the bswap result, 743 // just shift the input byte into position to eliminate the bswap. 744 unsigned NLZ = DemandedMask.countLeadingZeros(); 745 unsigned NTZ = DemandedMask.countTrailingZeros(); 746 747 // Round NTZ down to the next byte. If we have 11 trailing zeros, then 748 // we need all the bits down to bit 8. Likewise, round NLZ. If we 749 // have 14 leading zeros, round to 8. 750 NLZ &= ~7; 751 NTZ &= ~7; 752 // If we need exactly one byte, we can do this transformation. 753 if (BitWidth-NLZ-NTZ == 8) { 754 unsigned ResultBit = NTZ; 755 unsigned InputBit = BitWidth-NTZ-8; 756 757 // Replace this with either a left or right shift to get the byte into 758 // the right place. 759 Instruction *NewVal; 760 if (InputBit > ResultBit) 761 NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), 762 ConstantInt::get(I->getType(), InputBit-ResultBit)); 763 else 764 NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), 765 ConstantInt::get(I->getType(), ResultBit-InputBit)); 766 NewVal->takeName(I); 767 return InsertNewInstWith(NewVal, *I); 768 } 769 770 // TODO: Could compute known zero/one bits based on the input. 771 break; 772 } 773 case Intrinsic::x86_mmx_pmovmskb: 774 case Intrinsic::x86_sse_movmsk_ps: 775 case Intrinsic::x86_sse2_movmsk_pd: 776 case Intrinsic::x86_sse2_pmovmskb_128: 777 case Intrinsic::x86_avx_movmsk_ps_256: 778 case Intrinsic::x86_avx_movmsk_pd_256: 779 case Intrinsic::x86_avx2_pmovmskb: { 780 // MOVMSK copies the vector elements' sign bits to the low bits 781 // and zeros the high bits. 782 unsigned ArgWidth; 783 if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) { 784 ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. 785 } else { 786 auto Arg = II->getArgOperand(0); 787 auto ArgType = cast<VectorType>(Arg->getType()); 788 ArgWidth = ArgType->getNumElements(); 789 } 790 791 // If we don't need any of low bits then return zero, 792 // we know that DemandedMask is non-zero already. 793 APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); 794 if (DemandedElts == 0) 795 return ConstantInt::getNullValue(VTy); 796 797 // We know that the upper bits are set to zero. 798 KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth); 799 return nullptr; 800 } 801 case Intrinsic::x86_sse42_crc32_64_64: 802 KnownZero = APInt::getHighBitsSet(64, 32); 803 return nullptr; 804 } 805 } 806 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 807 break; 808 } 809 810 // If the client is only demanding bits that we know, return the known 811 // constant. 812 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 813 return Constant::getIntegerValue(VTy, KnownOne); 814 return nullptr; 815 } 816 817 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify 818 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into 819 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign 820 /// of "C2-C1". 821 /// 822 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, 823 /// ..., bn}, without considering the specific value X is holding. 824 /// This transformation is legal iff one of following conditions is hold: 825 /// 1) All the bit in S are 0, in this case E1 == E2. 826 /// 2) We don't care those bits in S, per the input DemandedMask. 827 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the 828 /// rest bits. 829 /// 830 /// Currently we only test condition 2). 831 /// 832 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was 833 /// not successful. 834 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, 835 Instruction *Shl, 836 const APInt &DemandedMask, 837 APInt &KnownZero, 838 APInt &KnownOne) { 839 840 const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); 841 const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); 842 if (!ShlOp1 || !ShrOp1) 843 return nullptr; // Noop. 844 845 Value *VarX = Shr->getOperand(0); 846 Type *Ty = VarX->getType(); 847 unsigned BitWidth = Ty->getIntegerBitWidth(); 848 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) 849 return nullptr; // Undef. 850 851 unsigned ShlAmt = ShlOp1.getZExtValue(); 852 unsigned ShrAmt = ShrOp1.getZExtValue(); 853 854 KnownOne.clearAllBits(); 855 KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); 856 KnownZero &= DemandedMask; 857 858 APInt BitMask1(APInt::getAllOnesValue(BitWidth)); 859 APInt BitMask2(APInt::getAllOnesValue(BitWidth)); 860 861 bool isLshr = (Shr->getOpcode() == Instruction::LShr); 862 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : 863 (BitMask1.ashr(ShrAmt) << ShlAmt); 864 865 if (ShrAmt <= ShlAmt) { 866 BitMask2 <<= (ShlAmt - ShrAmt); 867 } else { 868 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): 869 BitMask2.ashr(ShrAmt - ShlAmt); 870 } 871 872 // Check if condition-2 (see the comment to this function) is satified. 873 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { 874 if (ShrAmt == ShlAmt) 875 return VarX; 876 877 if (!Shr->hasOneUse()) 878 return nullptr; 879 880 BinaryOperator *New; 881 if (ShrAmt < ShlAmt) { 882 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); 883 New = BinaryOperator::CreateShl(VarX, Amt); 884 BinaryOperator *Orig = cast<BinaryOperator>(Shl); 885 New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); 886 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); 887 } else { 888 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); 889 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : 890 BinaryOperator::CreateAShr(VarX, Amt); 891 if (cast<BinaryOperator>(Shr)->isExact()) 892 New->setIsExact(true); 893 } 894 895 return InsertNewInstWith(New, *Shl); 896 } 897 898 return nullptr; 899 } 900 901 /// The specified value produces a vector with any number of elements. 902 /// DemandedElts contains the set of elements that are actually used by the 903 /// caller. This method analyzes which elements of the operand are undef and 904 /// returns that information in UndefElts. 905 /// 906 /// If the information about demanded elements can be used to simplify the 907 /// operation, the operation is simplified, then the resultant value is 908 /// returned. This returns null if no change was made. 909 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, 910 APInt &UndefElts, 911 unsigned Depth) { 912 unsigned VWidth = V->getType()->getVectorNumElements(); 913 APInt EltMask(APInt::getAllOnesValue(VWidth)); 914 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); 915 916 if (isa<UndefValue>(V)) { 917 // If the entire vector is undefined, just return this info. 918 UndefElts = EltMask; 919 return nullptr; 920 } 921 922 if (DemandedElts == 0) { // If nothing is demanded, provide undef. 923 UndefElts = EltMask; 924 return UndefValue::get(V->getType()); 925 } 926 927 UndefElts = 0; 928 929 // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. 930 if (Constant *C = dyn_cast<Constant>(V)) { 931 // Check if this is identity. If so, return 0 since we are not simplifying 932 // anything. 933 if (DemandedElts.isAllOnesValue()) 934 return nullptr; 935 936 Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 937 Constant *Undef = UndefValue::get(EltTy); 938 939 SmallVector<Constant*, 16> Elts; 940 for (unsigned i = 0; i != VWidth; ++i) { 941 if (!DemandedElts[i]) { // If not demanded, set to undef. 942 Elts.push_back(Undef); 943 UndefElts.setBit(i); 944 continue; 945 } 946 947 Constant *Elt = C->getAggregateElement(i); 948 if (!Elt) return nullptr; 949 950 if (isa<UndefValue>(Elt)) { // Already undef. 951 Elts.push_back(Undef); 952 UndefElts.setBit(i); 953 } else { // Otherwise, defined. 954 Elts.push_back(Elt); 955 } 956 } 957 958 // If we changed the constant, return it. 959 Constant *NewCV = ConstantVector::get(Elts); 960 return NewCV != C ? NewCV : nullptr; 961 } 962 963 // Limit search depth. 964 if (Depth == 10) 965 return nullptr; 966 967 // If multiple users are using the root value, proceed with 968 // simplification conservatively assuming that all elements 969 // are needed. 970 if (!V->hasOneUse()) { 971 // Quit if we find multiple users of a non-root value though. 972 // They'll be handled when it's their turn to be visited by 973 // the main instcombine process. 974 if (Depth != 0) 975 // TODO: Just compute the UndefElts information recursively. 976 return nullptr; 977 978 // Conservatively assume that all elements are needed. 979 DemandedElts = EltMask; 980 } 981 982 Instruction *I = dyn_cast<Instruction>(V); 983 if (!I) return nullptr; // Only analyze instructions. 984 985 bool MadeChange = false; 986 APInt UndefElts2(VWidth, 0); 987 APInt UndefElts3(VWidth, 0); 988 Value *TmpV; 989 switch (I->getOpcode()) { 990 default: break; 991 992 case Instruction::InsertElement: { 993 // If this is a variable index, we don't know which element it overwrites. 994 // demand exactly the same input as we produce. 995 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); 996 if (!Idx) { 997 // Note that we can't propagate undef elt info, because we don't know 998 // which elt is getting updated. 999 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, 1000 UndefElts2, Depth + 1); 1001 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1002 break; 1003 } 1004 1005 // If this is inserting an element that isn't demanded, remove this 1006 // insertelement. 1007 unsigned IdxNo = Idx->getZExtValue(); 1008 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { 1009 Worklist.Add(I); 1010 return I->getOperand(0); 1011 } 1012 1013 // Otherwise, the element inserted overwrites whatever was there, so the 1014 // input demanded set is simpler than the output set. 1015 APInt DemandedElts2 = DemandedElts; 1016 DemandedElts2.clearBit(IdxNo); 1017 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, 1018 UndefElts, Depth + 1); 1019 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1020 1021 // The inserted element is defined. 1022 UndefElts.clearBit(IdxNo); 1023 break; 1024 } 1025 case Instruction::ShuffleVector: { 1026 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 1027 unsigned LHSVWidth = 1028 Shuffle->getOperand(0)->getType()->getVectorNumElements(); 1029 APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); 1030 for (unsigned i = 0; i < VWidth; i++) { 1031 if (DemandedElts[i]) { 1032 unsigned MaskVal = Shuffle->getMaskValue(i); 1033 if (MaskVal != -1u) { 1034 assert(MaskVal < LHSVWidth * 2 && 1035 "shufflevector mask index out of range!"); 1036 if (MaskVal < LHSVWidth) 1037 LeftDemanded.setBit(MaskVal); 1038 else 1039 RightDemanded.setBit(MaskVal - LHSVWidth); 1040 } 1041 } 1042 } 1043 1044 APInt LHSUndefElts(LHSVWidth, 0); 1045 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, 1046 LHSUndefElts, Depth + 1); 1047 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1048 1049 APInt RHSUndefElts(LHSVWidth, 0); 1050 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, 1051 RHSUndefElts, Depth + 1); 1052 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1053 1054 bool NewUndefElts = false; 1055 unsigned LHSIdx = -1u, LHSValIdx = -1u; 1056 unsigned RHSIdx = -1u, RHSValIdx = -1u; 1057 bool LHSUniform = true; 1058 bool RHSUniform = true; 1059 for (unsigned i = 0; i < VWidth; i++) { 1060 unsigned MaskVal = Shuffle->getMaskValue(i); 1061 if (MaskVal == -1u) { 1062 UndefElts.setBit(i); 1063 } else if (!DemandedElts[i]) { 1064 NewUndefElts = true; 1065 UndefElts.setBit(i); 1066 } else if (MaskVal < LHSVWidth) { 1067 if (LHSUndefElts[MaskVal]) { 1068 NewUndefElts = true; 1069 UndefElts.setBit(i); 1070 } else { 1071 LHSIdx = LHSIdx == -1u ? i : LHSVWidth; 1072 LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth; 1073 LHSUniform = LHSUniform && (MaskVal == i); 1074 } 1075 } else { 1076 if (RHSUndefElts[MaskVal - LHSVWidth]) { 1077 NewUndefElts = true; 1078 UndefElts.setBit(i); 1079 } else { 1080 RHSIdx = RHSIdx == -1u ? i : LHSVWidth; 1081 RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth; 1082 RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i); 1083 } 1084 } 1085 } 1086 1087 // Try to transform shuffle with constant vector and single element from 1088 // this constant vector to single insertelement instruction. 1089 // shufflevector V, C, <v1, v2, .., ci, .., vm> -> 1090 // insertelement V, C[ci], ci-n 1091 if (LHSVWidth == Shuffle->getType()->getNumElements()) { 1092 Value *Op = nullptr; 1093 Constant *Value = nullptr; 1094 unsigned Idx = -1u; 1095 1096 // Find constant vector with the single element in shuffle (LHS or RHS). 1097 if (LHSIdx < LHSVWidth && RHSUniform) { 1098 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) { 1099 Op = Shuffle->getOperand(1); 1100 Value = CV->getOperand(LHSValIdx); 1101 Idx = LHSIdx; 1102 } 1103 } 1104 if (RHSIdx < LHSVWidth && LHSUniform) { 1105 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) { 1106 Op = Shuffle->getOperand(0); 1107 Value = CV->getOperand(RHSValIdx); 1108 Idx = RHSIdx; 1109 } 1110 } 1111 // Found constant vector with single element - convert to insertelement. 1112 if (Op && Value) { 1113 Instruction *New = InsertElementInst::Create( 1114 Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx), 1115 Shuffle->getName()); 1116 InsertNewInstWith(New, *Shuffle); 1117 return New; 1118 } 1119 } 1120 if (NewUndefElts) { 1121 // Add additional discovered undefs. 1122 SmallVector<Constant*, 16> Elts; 1123 for (unsigned i = 0; i < VWidth; ++i) { 1124 if (UndefElts[i]) 1125 Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); 1126 else 1127 Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), 1128 Shuffle->getMaskValue(i))); 1129 } 1130 I->setOperand(2, ConstantVector::get(Elts)); 1131 MadeChange = true; 1132 } 1133 break; 1134 } 1135 case Instruction::Select: { 1136 APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); 1137 if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { 1138 for (unsigned i = 0; i < VWidth; i++) { 1139 Constant *CElt = CV->getAggregateElement(i); 1140 // Method isNullValue always returns false when called on a 1141 // ConstantExpr. If CElt is a ConstantExpr then skip it in order to 1142 // to avoid propagating incorrect information. 1143 if (isa<ConstantExpr>(CElt)) 1144 continue; 1145 if (CElt->isNullValue()) 1146 LeftDemanded.clearBit(i); 1147 else 1148 RightDemanded.clearBit(i); 1149 } 1150 } 1151 1152 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts, 1153 Depth + 1); 1154 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1155 1156 TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, 1157 UndefElts2, Depth + 1); 1158 if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } 1159 1160 // Output elements are undefined if both are undefined. 1161 UndefElts &= UndefElts2; 1162 break; 1163 } 1164 case Instruction::BitCast: { 1165 // Vector->vector casts only. 1166 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); 1167 if (!VTy) break; 1168 unsigned InVWidth = VTy->getNumElements(); 1169 APInt InputDemandedElts(InVWidth, 0); 1170 UndefElts2 = APInt(InVWidth, 0); 1171 unsigned Ratio; 1172 1173 if (VWidth == InVWidth) { 1174 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same 1175 // elements as are demanded of us. 1176 Ratio = 1; 1177 InputDemandedElts = DemandedElts; 1178 } else if ((VWidth % InVWidth) == 0) { 1179 // If the number of elements in the output is a multiple of the number of 1180 // elements in the input then an input element is live if any of the 1181 // corresponding output elements are live. 1182 Ratio = VWidth / InVWidth; 1183 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1184 if (DemandedElts[OutIdx]) 1185 InputDemandedElts.setBit(OutIdx / Ratio); 1186 } else if ((InVWidth % VWidth) == 0) { 1187 // If the number of elements in the input is a multiple of the number of 1188 // elements in the output then an input element is live if the 1189 // corresponding output element is live. 1190 Ratio = InVWidth / VWidth; 1191 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1192 if (DemandedElts[InIdx / Ratio]) 1193 InputDemandedElts.setBit(InIdx); 1194 } else { 1195 // Unsupported so far. 1196 break; 1197 } 1198 1199 // div/rem demand all inputs, because they don't want divide by zero. 1200 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, 1201 UndefElts2, Depth + 1); 1202 if (TmpV) { 1203 I->setOperand(0, TmpV); 1204 MadeChange = true; 1205 } 1206 1207 if (VWidth == InVWidth) { 1208 UndefElts = UndefElts2; 1209 } else if ((VWidth % InVWidth) == 0) { 1210 // If the number of elements in the output is a multiple of the number of 1211 // elements in the input then an output element is undef if the 1212 // corresponding input element is undef. 1213 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1214 if (UndefElts2[OutIdx / Ratio]) 1215 UndefElts.setBit(OutIdx); 1216 } else if ((InVWidth % VWidth) == 0) { 1217 // If the number of elements in the input is a multiple of the number of 1218 // elements in the output then an output element is undef if all of the 1219 // corresponding input elements are undef. 1220 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { 1221 APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); 1222 if (SubUndef.countPopulation() == Ratio) 1223 UndefElts.setBit(OutIdx); 1224 } 1225 } else { 1226 llvm_unreachable("Unimp"); 1227 } 1228 break; 1229 } 1230 case Instruction::And: 1231 case Instruction::Or: 1232 case Instruction::Xor: 1233 case Instruction::Add: 1234 case Instruction::Sub: 1235 case Instruction::Mul: 1236 // div/rem demand all inputs, because they don't want divide by zero. 1237 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, 1238 Depth + 1); 1239 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1240 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, 1241 UndefElts2, Depth + 1); 1242 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1243 1244 // Output elements are undefined if both are undefined. Consider things 1245 // like undef&0. The result is known zero, not undef. 1246 UndefElts &= UndefElts2; 1247 break; 1248 case Instruction::FPTrunc: 1249 case Instruction::FPExt: 1250 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, 1251 Depth + 1); 1252 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1253 break; 1254 1255 case Instruction::Call: { 1256 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1257 if (!II) break; 1258 switch (II->getIntrinsicID()) { 1259 default: break; 1260 1261 case Intrinsic::x86_xop_vfrcz_ss: 1262 case Intrinsic::x86_xop_vfrcz_sd: 1263 // The instructions for these intrinsics are speced to zero upper bits not 1264 // pass them through like other scalar intrinsics. So we shouldn't just 1265 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. 1266 // Instead we should return a zero vector. 1267 if (!DemandedElts[0]) { 1268 Worklist.Add(II); 1269 return ConstantAggregateZero::get(II->getType()); 1270 } 1271 1272 // Only the lower element is used. 1273 DemandedElts = 1; 1274 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1275 UndefElts, Depth + 1); 1276 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1277 1278 // Only the lower element is undefined. The high elements are zero. 1279 UndefElts = UndefElts[0]; 1280 break; 1281 1282 // Unary scalar-as-vector operations that work column-wise. 1283 case Intrinsic::x86_sse_rcp_ss: 1284 case Intrinsic::x86_sse_rsqrt_ss: 1285 case Intrinsic::x86_sse_sqrt_ss: 1286 case Intrinsic::x86_sse2_sqrt_sd: 1287 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1288 UndefElts, Depth + 1); 1289 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1290 1291 // If lowest element of a scalar op isn't used then use Arg0. 1292 if (!DemandedElts[0]) { 1293 Worklist.Add(II); 1294 return II->getArgOperand(0); 1295 } 1296 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions 1297 // checks). 1298 break; 1299 1300 // Binary scalar-as-vector operations that work column-wise. The high 1301 // elements come from operand 0. The low element is a function of both 1302 // operands. 1303 case Intrinsic::x86_sse_min_ss: 1304 case Intrinsic::x86_sse_max_ss: 1305 case Intrinsic::x86_sse_cmp_ss: 1306 case Intrinsic::x86_sse2_min_sd: 1307 case Intrinsic::x86_sse2_max_sd: 1308 case Intrinsic::x86_sse2_cmp_sd: { 1309 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1310 UndefElts, Depth + 1); 1311 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1312 1313 // If lowest element of a scalar op isn't used then use Arg0. 1314 if (!DemandedElts[0]) { 1315 Worklist.Add(II); 1316 return II->getArgOperand(0); 1317 } 1318 1319 // Only lower element is used for operand 1. 1320 DemandedElts = 1; 1321 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1322 UndefElts2, Depth + 1); 1323 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1324 1325 // Lower element is undefined if both lower elements are undefined. 1326 // Consider things like undef&0. The result is known zero, not undef. 1327 if (!UndefElts2[0]) 1328 UndefElts.clearBit(0); 1329 1330 break; 1331 } 1332 1333 // Binary scalar-as-vector operations that work column-wise. The high 1334 // elements come from operand 0 and the low element comes from operand 1. 1335 case Intrinsic::x86_sse41_round_ss: 1336 case Intrinsic::x86_sse41_round_sd: { 1337 // Don't use the low element of operand 0. 1338 APInt DemandedElts2 = DemandedElts; 1339 DemandedElts2.clearBit(0); 1340 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2, 1341 UndefElts, Depth + 1); 1342 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1343 1344 // If lowest element of a scalar op isn't used then use Arg0. 1345 if (!DemandedElts[0]) { 1346 Worklist.Add(II); 1347 return II->getArgOperand(0); 1348 } 1349 1350 // Only lower element is used for operand 1. 1351 DemandedElts = 1; 1352 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1353 UndefElts2, Depth + 1); 1354 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1355 1356 // Take the high undef elements from operand 0 and take the lower element 1357 // from operand 1. 1358 UndefElts.clearBit(0); 1359 UndefElts |= UndefElts2[0]; 1360 break; 1361 } 1362 1363 // Three input scalar-as-vector operations that work column-wise. The high 1364 // elements come from operand 0 and the low element is a function of all 1365 // three inputs. 1366 case Intrinsic::x86_avx512_mask_add_ss_round: 1367 case Intrinsic::x86_avx512_mask_div_ss_round: 1368 case Intrinsic::x86_avx512_mask_mul_ss_round: 1369 case Intrinsic::x86_avx512_mask_sub_ss_round: 1370 case Intrinsic::x86_avx512_mask_max_ss_round: 1371 case Intrinsic::x86_avx512_mask_min_ss_round: 1372 case Intrinsic::x86_avx512_mask_add_sd_round: 1373 case Intrinsic::x86_avx512_mask_div_sd_round: 1374 case Intrinsic::x86_avx512_mask_mul_sd_round: 1375 case Intrinsic::x86_avx512_mask_sub_sd_round: 1376 case Intrinsic::x86_avx512_mask_max_sd_round: 1377 case Intrinsic::x86_avx512_mask_min_sd_round: 1378 case Intrinsic::x86_fma_vfmadd_ss: 1379 case Intrinsic::x86_fma_vfmsub_ss: 1380 case Intrinsic::x86_fma_vfnmadd_ss: 1381 case Intrinsic::x86_fma_vfnmsub_ss: 1382 case Intrinsic::x86_fma_vfmadd_sd: 1383 case Intrinsic::x86_fma_vfmsub_sd: 1384 case Intrinsic::x86_fma_vfnmadd_sd: 1385 case Intrinsic::x86_fma_vfnmsub_sd: 1386 case Intrinsic::x86_avx512_mask_vfmadd_ss: 1387 case Intrinsic::x86_avx512_mask_vfmadd_sd: 1388 case Intrinsic::x86_avx512_maskz_vfmadd_ss: 1389 case Intrinsic::x86_avx512_maskz_vfmadd_sd: 1390 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1391 UndefElts, Depth + 1); 1392 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1393 1394 // If lowest element of a scalar op isn't used then use Arg0. 1395 if (!DemandedElts[0]) { 1396 Worklist.Add(II); 1397 return II->getArgOperand(0); 1398 } 1399 1400 // Only lower element is used for operand 1 and 2. 1401 DemandedElts = 1; 1402 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1403 UndefElts2, Depth + 1); 1404 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1405 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, 1406 UndefElts3, Depth + 1); 1407 if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } 1408 1409 // Lower element is undefined if all three lower elements are undefined. 1410 // Consider things like undef&0. The result is known zero, not undef. 1411 if (!UndefElts2[0] || !UndefElts3[0]) 1412 UndefElts.clearBit(0); 1413 1414 break; 1415 1416 case Intrinsic::x86_avx512_mask3_vfmadd_ss: 1417 case Intrinsic::x86_avx512_mask3_vfmadd_sd: 1418 case Intrinsic::x86_avx512_mask3_vfmsub_ss: 1419 case Intrinsic::x86_avx512_mask3_vfmsub_sd: 1420 case Intrinsic::x86_avx512_mask3_vfnmsub_ss: 1421 case Intrinsic::x86_avx512_mask3_vfnmsub_sd: 1422 // These intrinsics get the passthru bits from operand 2. 1423 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, 1424 UndefElts, Depth + 1); 1425 if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } 1426 1427 // If lowest element of a scalar op isn't used then use Arg2. 1428 if (!DemandedElts[0]) { 1429 Worklist.Add(II); 1430 return II->getArgOperand(2); 1431 } 1432 1433 // Only lower element is used for operand 0 and 1. 1434 DemandedElts = 1; 1435 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1436 UndefElts2, Depth + 1); 1437 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1438 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1439 UndefElts3, Depth + 1); 1440 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1441 1442 // Lower element is undefined if all three lower elements are undefined. 1443 // Consider things like undef&0. The result is known zero, not undef. 1444 if (!UndefElts2[0] || !UndefElts3[0]) 1445 UndefElts.clearBit(0); 1446 1447 break; 1448 1449 case Intrinsic::x86_sse2_pmulu_dq: 1450 case Intrinsic::x86_sse41_pmuldq: 1451 case Intrinsic::x86_avx2_pmul_dq: 1452 case Intrinsic::x86_avx2_pmulu_dq: 1453 case Intrinsic::x86_avx512_pmul_dq_512: 1454 case Intrinsic::x86_avx512_pmulu_dq_512: { 1455 Value *Op0 = II->getArgOperand(0); 1456 Value *Op1 = II->getArgOperand(1); 1457 unsigned InnerVWidth = Op0->getType()->getVectorNumElements(); 1458 assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); 1459 1460 APInt InnerDemandedElts(InnerVWidth, 0); 1461 for (unsigned i = 0; i != VWidth; ++i) 1462 if (DemandedElts[i]) 1463 InnerDemandedElts.setBit(i * 2); 1464 1465 UndefElts2 = APInt(InnerVWidth, 0); 1466 TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2, 1467 Depth + 1); 1468 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1469 1470 UndefElts3 = APInt(InnerVWidth, 0); 1471 TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3, 1472 Depth + 1); 1473 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1474 1475 break; 1476 } 1477 1478 case Intrinsic::x86_sse2_packssdw_128: 1479 case Intrinsic::x86_sse2_packsswb_128: 1480 case Intrinsic::x86_sse2_packuswb_128: 1481 case Intrinsic::x86_sse41_packusdw: 1482 case Intrinsic::x86_avx2_packssdw: 1483 case Intrinsic::x86_avx2_packsswb: 1484 case Intrinsic::x86_avx2_packusdw: 1485 case Intrinsic::x86_avx2_packuswb: 1486 case Intrinsic::x86_avx512_packssdw_512: 1487 case Intrinsic::x86_avx512_packsswb_512: 1488 case Intrinsic::x86_avx512_packusdw_512: 1489 case Intrinsic::x86_avx512_packuswb_512: { 1490 auto *Ty0 = II->getArgOperand(0)->getType(); 1491 unsigned InnerVWidth = Ty0->getVectorNumElements(); 1492 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); 1493 1494 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; 1495 unsigned VWidthPerLane = VWidth / NumLanes; 1496 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; 1497 1498 // Per lane, pack the elements of the first input and then the second. 1499 // e.g. 1500 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) 1501 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) 1502 for (int OpNum = 0; OpNum != 2; ++OpNum) { 1503 APInt OpDemandedElts(InnerVWidth, 0); 1504 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1505 unsigned LaneIdx = Lane * VWidthPerLane; 1506 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { 1507 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; 1508 if (DemandedElts[Idx]) 1509 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); 1510 } 1511 } 1512 1513 // Demand elements from the operand. 1514 auto *Op = II->getArgOperand(OpNum); 1515 APInt OpUndefElts(InnerVWidth, 0); 1516 TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts, 1517 Depth + 1); 1518 if (TmpV) { 1519 II->setArgOperand(OpNum, TmpV); 1520 MadeChange = true; 1521 } 1522 1523 // Pack the operand's UNDEF elements, one lane at a time. 1524 OpUndefElts = OpUndefElts.zext(VWidth); 1525 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1526 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); 1527 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); 1528 LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); 1529 UndefElts |= LaneElts; 1530 } 1531 } 1532 break; 1533 } 1534 1535 // PSHUFB 1536 case Intrinsic::x86_ssse3_pshuf_b_128: 1537 case Intrinsic::x86_avx2_pshuf_b: 1538 case Intrinsic::x86_avx512_pshuf_b_512: 1539 // PERMILVAR 1540 case Intrinsic::x86_avx_vpermilvar_ps: 1541 case Intrinsic::x86_avx_vpermilvar_ps_256: 1542 case Intrinsic::x86_avx512_vpermilvar_ps_512: 1543 case Intrinsic::x86_avx_vpermilvar_pd: 1544 case Intrinsic::x86_avx_vpermilvar_pd_256: 1545 case Intrinsic::x86_avx512_vpermilvar_pd_512: 1546 // PERMV 1547 case Intrinsic::x86_avx2_permd: 1548 case Intrinsic::x86_avx2_permps: { 1549 Value *Op1 = II->getArgOperand(1); 1550 TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts, 1551 Depth + 1); 1552 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1553 break; 1554 } 1555 1556 // SSE4A instructions leave the upper 64-bits of the 128-bit result 1557 // in an undefined state. 1558 case Intrinsic::x86_sse4a_extrq: 1559 case Intrinsic::x86_sse4a_extrqi: 1560 case Intrinsic::x86_sse4a_insertq: 1561 case Intrinsic::x86_sse4a_insertqi: 1562 UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2); 1563 break; 1564 } 1565 break; 1566 } 1567 } 1568 return MadeChange ? I : nullptr; 1569 } 1570