1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains logic for simplifying instructions based on information 11 // about how they are used. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/IntrinsicInst.h" 18 #include "llvm/IR/PatternMatch.h" 19 20 using namespace llvm; 21 using namespace llvm::PatternMatch; 22 23 #define DEBUG_TYPE "instcombine" 24 25 /// Check to see if the specified operand of the specified instruction is a 26 /// constant integer. If so, check to see if there are any bits set in the 27 /// constant that are not demanded. If so, shrink the constant and return true. 28 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 29 APInt Demanded) { 30 assert(I && "No instruction?"); 31 assert(OpNo < I->getNumOperands() && "Operand index too large"); 32 33 // The operand must be a constant integer or splat integer. 34 Value *Op = I->getOperand(OpNo); 35 const APInt *C; 36 if (!match(Op, m_APInt(C))) 37 return false; 38 39 // If there are no bits set that aren't demanded, nothing to do. 40 Demanded = Demanded.zextOrTrunc(C->getBitWidth()); 41 if ((~Demanded & *C) == 0) 42 return false; 43 44 // This instruction is producing bits that are not demanded. Shrink the RHS. 45 Demanded &= *C; 46 I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); 47 48 return true; 49 } 50 51 52 53 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if 54 /// the instruction has any properties that allow us to simplify its operands. 55 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { 56 unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); 57 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 58 APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); 59 60 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 61 0, &Inst); 62 if (!V) return false; 63 if (V == &Inst) return true; 64 replaceInstUsesWith(Inst, V); 65 return true; 66 } 67 68 /// This form of SimplifyDemandedBits simplifies the specified instruction 69 /// operand if possible, updating it in place. It returns true if it made any 70 /// change and false otherwise. 71 bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask, 72 APInt &KnownZero, APInt &KnownOne, 73 unsigned Depth) { 74 auto *UserI = dyn_cast<Instruction>(U.getUser()); 75 Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, 76 KnownOne, Depth, UserI); 77 if (!NewVal) return false; 78 U = NewVal; 79 return true; 80 } 81 82 83 /// This function attempts to replace V with a simpler value based on the 84 /// demanded bits. When this function is called, it is known that only the bits 85 /// set in DemandedMask of the result of V are ever used downstream. 86 /// Consequently, depending on the mask and V, it may be possible to replace V 87 /// with a constant or one of its operands. In such cases, this function does 88 /// the replacement and returns true. In all other cases, it returns false after 89 /// analyzing the expression and setting KnownOne and known to be one in the 90 /// expression. KnownZero contains all the bits that are known to be zero in the 91 /// expression. These are provided to potentially allow the caller (which might 92 /// recursively be SimplifyDemandedBits itself) to simplify the expression. 93 /// KnownOne and KnownZero always follow the invariant that: 94 /// KnownOne & KnownZero == 0. 95 /// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and 96 /// KnownZero may only be accurate for those bits set in DemandedMask. Note also 97 /// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the 98 /// same. 99 /// 100 /// This returns null if it did not change anything and it permits no 101 /// simplification. This returns V itself if it did some simplification of V's 102 /// operands based on the information about what bits are demanded. This returns 103 /// some other non-null value if it found out that V is equal to another value 104 /// in the context where the specified bits are demanded, but not for all users. 105 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 106 APInt &KnownZero, APInt &KnownOne, 107 unsigned Depth, 108 Instruction *CxtI) { 109 assert(V != nullptr && "Null pointer of Value???"); 110 assert(Depth <= 6 && "Limit Search Depth"); 111 uint32_t BitWidth = DemandedMask.getBitWidth(); 112 Type *VTy = V->getType(); 113 assert( 114 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && 115 KnownZero.getBitWidth() == BitWidth && 116 KnownOne.getBitWidth() == BitWidth && 117 "Value *V, DemandedMask, KnownZero and KnownOne " 118 "must have same BitWidth"); 119 const APInt *C; 120 if (match(V, m_APInt(C))) { 121 // We know all of the bits for a scalar constant or a splat vector constant! 122 KnownOne = *C & DemandedMask; 123 KnownZero = ~KnownOne & DemandedMask; 124 return nullptr; 125 } 126 if (isa<ConstantPointerNull>(V)) { 127 // We know all of the bits for a constant! 128 KnownOne.clearAllBits(); 129 KnownZero = DemandedMask; 130 return nullptr; 131 } 132 133 KnownZero.clearAllBits(); 134 KnownOne.clearAllBits(); 135 if (DemandedMask == 0) { // Not demanding any bits from V. 136 if (isa<UndefValue>(V)) 137 return nullptr; 138 return UndefValue::get(VTy); 139 } 140 141 if (Depth == 6) // Limit search depth. 142 return nullptr; 143 144 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); 145 APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); 146 147 Instruction *I = dyn_cast<Instruction>(V); 148 if (!I) { 149 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 150 return nullptr; // Only analyze instructions. 151 } 152 153 // If there are multiple uses of this value and we aren't at the root, then 154 // we can't do any simplifications of the operands, because DemandedMask 155 // only reflects the bits demanded by *one* of the users. 156 if (Depth != 0 && !I->hasOneUse()) { 157 // Despite the fact that we can't simplify this instruction in all User's 158 // context, we can at least compute the knownzero/knownone bits, and we can 159 // do simplifications that apply to *just* the one user if we know that 160 // this instruction has a simpler value in that context. 161 if (I->getOpcode() == Instruction::And) { 162 // If either the LHS or the RHS are Zero, the result is zero. 163 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 164 CxtI); 165 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 166 CxtI); 167 168 // If all of the demanded bits are known 1 on one side, return the other. 169 // These bits cannot contribute to the result of the 'and' in this 170 // context. 171 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 172 (DemandedMask & ~LHSKnownZero)) 173 return I->getOperand(0); 174 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 175 (DemandedMask & ~RHSKnownZero)) 176 return I->getOperand(1); 177 178 // If all of the demanded bits in the inputs are known zeros, return zero. 179 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 180 return Constant::getNullValue(VTy); 181 182 } else if (I->getOpcode() == Instruction::Or) { 183 // We can simplify (X|Y) -> X or Y in the user's context if we know that 184 // only bits from X or Y are demanded. 185 186 // If either the LHS or the RHS are One, the result is One. 187 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 188 CxtI); 189 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 190 CxtI); 191 192 // If all of the demanded bits are known zero on one side, return the 193 // other. These bits cannot contribute to the result of the 'or' in this 194 // context. 195 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 196 (DemandedMask & ~LHSKnownOne)) 197 return I->getOperand(0); 198 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 199 (DemandedMask & ~RHSKnownOne)) 200 return I->getOperand(1); 201 202 // If all of the potentially set bits on one side are known to be set on 203 // the other side, just use the 'other' side. 204 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 205 (DemandedMask & (~RHSKnownZero))) 206 return I->getOperand(0); 207 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 208 (DemandedMask & (~LHSKnownZero))) 209 return I->getOperand(1); 210 } else if (I->getOpcode() == Instruction::Xor) { 211 // We can simplify (X^Y) -> X or Y in the user's context if we know that 212 // only bits from X or Y are demanded. 213 214 computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, 215 CxtI); 216 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 217 CxtI); 218 219 // If all of the demanded bits are known zero on one side, return the 220 // other. 221 if ((DemandedMask & RHSKnownZero) == DemandedMask) 222 return I->getOperand(0); 223 if ((DemandedMask & LHSKnownZero) == DemandedMask) 224 return I->getOperand(1); 225 } 226 227 // Compute the KnownZero/KnownOne bits to simplify things downstream. 228 computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); 229 return nullptr; 230 } 231 232 // If this is the root being simplified, allow it to have multiple uses, 233 // just set the DemandedMask to all bits so that we can try to simplify the 234 // operands. This allows visitTruncInst (for example) to simplify the 235 // operand of a trunc without duplicating all the logic below. 236 if (Depth == 0 && !V->hasOneUse()) 237 DemandedMask = APInt::getAllOnesValue(BitWidth); 238 239 switch (I->getOpcode()) { 240 default: 241 computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); 242 break; 243 case Instruction::And: 244 // If either the LHS or the RHS are Zero, the result is zero. 245 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 246 RHSKnownOne, Depth + 1) || 247 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, 248 LHSKnownZero, LHSKnownOne, Depth + 1)) 249 return I; 250 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 251 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 252 253 // If the client is only demanding bits that we know, return the known 254 // constant. 255 if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)| 256 (RHSKnownOne & LHSKnownOne))) == DemandedMask) 257 return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne); 258 259 // If all of the demanded bits are known 1 on one side, return the other. 260 // These bits cannot contribute to the result of the 'and'. 261 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 262 (DemandedMask & ~LHSKnownZero)) 263 return I->getOperand(0); 264 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 265 (DemandedMask & ~RHSKnownZero)) 266 return I->getOperand(1); 267 268 // If all of the demanded bits in the inputs are known zeros, return zero. 269 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 270 return Constant::getNullValue(VTy); 271 272 // If the RHS is a constant, see if we can simplify it. 273 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) 274 return I; 275 276 // Output known-1 bits are only known if set in both the LHS & RHS. 277 KnownOne = RHSKnownOne & LHSKnownOne; 278 // Output known-0 are known to be clear if zero in either the LHS | RHS. 279 KnownZero = RHSKnownZero | LHSKnownZero; 280 break; 281 case Instruction::Or: 282 // If either the LHS or the RHS are One, the result is One. 283 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 284 RHSKnownOne, Depth + 1) || 285 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 286 LHSKnownZero, LHSKnownOne, Depth + 1)) 287 return I; 288 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 289 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 290 291 // If the client is only demanding bits that we know, return the known 292 // constant. 293 if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)| 294 (RHSKnownOne | LHSKnownOne))) == DemandedMask) 295 return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne); 296 297 // If all of the demanded bits are known zero on one side, return the other. 298 // These bits cannot contribute to the result of the 'or'. 299 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 300 (DemandedMask & ~LHSKnownOne)) 301 return I->getOperand(0); 302 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 303 (DemandedMask & ~RHSKnownOne)) 304 return I->getOperand(1); 305 306 // If all of the potentially set bits on one side are known to be set on 307 // the other side, just use the 'other' side. 308 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 309 (DemandedMask & (~RHSKnownZero))) 310 return I->getOperand(0); 311 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 312 (DemandedMask & (~LHSKnownZero))) 313 return I->getOperand(1); 314 315 // If the RHS is a constant, see if we can simplify it. 316 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 317 return I; 318 319 // Output known-0 bits are only known if clear in both the LHS & RHS. 320 KnownZero = RHSKnownZero & LHSKnownZero; 321 // Output known-1 are known to be set if set in either the LHS | RHS. 322 KnownOne = RHSKnownOne | LHSKnownOne; 323 break; 324 case Instruction::Xor: { 325 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, 326 RHSKnownOne, Depth + 1) || 327 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, 328 LHSKnownOne, Depth + 1)) 329 return I; 330 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 331 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 332 333 // Output known-0 bits are known if clear or set in both the LHS & RHS. 334 APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | 335 (RHSKnownOne & LHSKnownOne); 336 // Output known-1 are known to be set if set in only one of the LHS, RHS. 337 APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | 338 (RHSKnownOne & LHSKnownZero); 339 340 // If the client is only demanding bits that we know, return the known 341 // constant. 342 if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) 343 return Constant::getIntegerValue(VTy, IKnownOne); 344 345 // If all of the demanded bits are known zero on one side, return the other. 346 // These bits cannot contribute to the result of the 'xor'. 347 if ((DemandedMask & RHSKnownZero) == DemandedMask) 348 return I->getOperand(0); 349 if ((DemandedMask & LHSKnownZero) == DemandedMask) 350 return I->getOperand(1); 351 352 // If all of the demanded bits are known to be zero on one side or the 353 // other, turn this into an *inclusive* or. 354 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 355 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { 356 Instruction *Or = 357 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 358 I->getName()); 359 return InsertNewInstWith(Or, *I); 360 } 361 362 // If all of the demanded bits on one side are known, and all of the set 363 // bits on that side are also known to be set on the other side, turn this 364 // into an AND, as we know the bits will be cleared. 365 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 366 if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 367 // all known 368 if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { 369 Constant *AndC = Constant::getIntegerValue(VTy, 370 ~RHSKnownOne & DemandedMask); 371 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 372 return InsertNewInstWith(And, *I); 373 } 374 } 375 376 // If the RHS is a constant, see if we can simplify it. 377 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 378 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 379 return I; 380 381 // If our LHS is an 'and' and if it has one use, and if any of the bits we 382 // are flipping are known to be set, then the xor is just resetting those 383 // bits to zero. We can just knock out bits from the 'and' and the 'xor', 384 // simplifying both of them. 385 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) 386 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && 387 isa<ConstantInt>(I->getOperand(1)) && 388 isa<ConstantInt>(LHSInst->getOperand(1)) && 389 (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { 390 ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); 391 ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); 392 APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); 393 394 Constant *AndC = 395 ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); 396 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 397 InsertNewInstWith(NewAnd, *I); 398 399 Constant *XorC = 400 ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); 401 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); 402 return InsertNewInstWith(NewXor, *I); 403 } 404 405 // Output known-0 bits are known if clear or set in both the LHS & RHS. 406 KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); 407 // Output known-1 are known to be set if set in only one of the LHS, RHS. 408 KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); 409 break; 410 } 411 case Instruction::Select: 412 // If this is a select as part of a min/max pattern, don't simplify any 413 // further in case we break the structure. 414 Value *LHS, *RHS; 415 if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) 416 return nullptr; 417 418 if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, 419 RHSKnownOne, Depth + 1) || 420 SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, 421 LHSKnownOne, Depth + 1)) 422 return I; 423 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 424 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 425 426 // If the operands are constants, see if we can simplify them. 427 if (ShrinkDemandedConstant(I, 1, DemandedMask) || 428 ShrinkDemandedConstant(I, 2, DemandedMask)) 429 return I; 430 431 // Only known if known in both the LHS and RHS. 432 KnownOne = RHSKnownOne & LHSKnownOne; 433 KnownZero = RHSKnownZero & LHSKnownZero; 434 break; 435 case Instruction::Trunc: { 436 unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); 437 DemandedMask = DemandedMask.zext(truncBf); 438 KnownZero = KnownZero.zext(truncBf); 439 KnownOne = KnownOne.zext(truncBf); 440 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 441 KnownOne, Depth + 1)) 442 return I; 443 DemandedMask = DemandedMask.trunc(BitWidth); 444 KnownZero = KnownZero.trunc(BitWidth); 445 KnownOne = KnownOne.trunc(BitWidth); 446 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 447 break; 448 } 449 case Instruction::BitCast: 450 if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) 451 return nullptr; // vector->int or fp->int? 452 453 if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { 454 if (VectorType *SrcVTy = 455 dyn_cast<VectorType>(I->getOperand(0)->getType())) { 456 if (DstVTy->getNumElements() != SrcVTy->getNumElements()) 457 // Don't touch a bitcast between vectors of different element counts. 458 return nullptr; 459 } else 460 // Don't touch a scalar-to-vector bitcast. 461 return nullptr; 462 } else if (I->getOperand(0)->getType()->isVectorTy()) 463 // Don't touch a vector-to-scalar bitcast. 464 return nullptr; 465 466 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 467 KnownOne, Depth + 1)) 468 return I; 469 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 470 break; 471 case Instruction::ZExt: { 472 // Compute the bits in the result that are not present in the input. 473 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 474 475 DemandedMask = DemandedMask.trunc(SrcBitWidth); 476 KnownZero = KnownZero.trunc(SrcBitWidth); 477 KnownOne = KnownOne.trunc(SrcBitWidth); 478 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, 479 KnownOne, Depth + 1)) 480 return I; 481 DemandedMask = DemandedMask.zext(BitWidth); 482 KnownZero = KnownZero.zext(BitWidth); 483 KnownOne = KnownOne.zext(BitWidth); 484 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 485 // The top bits are known to be zero. 486 KnownZero.setBitsFrom(SrcBitWidth); 487 break; 488 } 489 case Instruction::SExt: { 490 // Compute the bits in the result that are not present in the input. 491 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 492 493 APInt InputDemandedBits = DemandedMask & 494 APInt::getLowBitsSet(BitWidth, SrcBitWidth); 495 496 APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth)); 497 // If any of the sign extended bits are demanded, we know that the sign 498 // bit is demanded. 499 if ((NewBits & DemandedMask) != 0) 500 InputDemandedBits.setBit(SrcBitWidth-1); 501 502 InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); 503 KnownZero = KnownZero.trunc(SrcBitWidth); 504 KnownOne = KnownOne.trunc(SrcBitWidth); 505 if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, 506 KnownOne, Depth + 1)) 507 return I; 508 InputDemandedBits = InputDemandedBits.zext(BitWidth); 509 KnownZero = KnownZero.zext(BitWidth); 510 KnownOne = KnownOne.zext(BitWidth); 511 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 512 513 // If the sign bit of the input is known set or clear, then we know the 514 // top bits of the result. 515 516 // If the input sign bit is known zero, or if the NewBits are not demanded 517 // convert this into a zero extension. 518 if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { 519 // Convert to ZExt cast 520 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); 521 return InsertNewInstWith(NewCast, *I); 522 } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set 523 KnownOne |= NewBits; 524 } 525 break; 526 } 527 case Instruction::Add: 528 case Instruction::Sub: { 529 /// If the high-bits of an ADD/SUB are not demanded, then we do not care 530 /// about the high bits of the operands. 531 unsigned NLZ = DemandedMask.countLeadingZeros(); 532 if (NLZ > 0) { 533 // Right fill the mask of bits for this ADD/SUB to demand the most 534 // significant bit and all those below it. 535 APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); 536 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || 537 SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, 538 LHSKnownZero, LHSKnownOne, Depth + 1) || 539 ShrinkDemandedConstant(I, 1, DemandedFromOps) || 540 SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, 541 LHSKnownZero, LHSKnownOne, Depth + 1)) { 542 // Disable the nsw and nuw flags here: We can no longer guarantee that 543 // we won't wrap after simplification. Removing the nsw/nuw flags is 544 // legal here because the top bit is not demanded. 545 BinaryOperator &BinOP = *cast<BinaryOperator>(I); 546 BinOP.setHasNoSignedWrap(false); 547 BinOP.setHasNoUnsignedWrap(false); 548 return I; 549 } 550 } 551 552 // Otherwise just hand the add/sub off to computeKnownBits to fill in 553 // the known zeros and ones. 554 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 555 break; 556 } 557 case Instruction::Shl: 558 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 559 { 560 Value *VarX; ConstantInt *C1; 561 if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { 562 Instruction *Shr = cast<Instruction>(I->getOperand(0)); 563 Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, 564 KnownZero, KnownOne); 565 if (R) 566 return R; 567 } 568 } 569 570 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 571 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); 572 573 // If the shift is NUW/NSW, then it does demand the high bits. 574 ShlOperator *IOp = cast<ShlOperator>(I); 575 if (IOp->hasNoSignedWrap()) 576 DemandedMaskIn.setHighBits(ShiftAmt+1); 577 else if (IOp->hasNoUnsignedWrap()) 578 DemandedMaskIn.setHighBits(ShiftAmt); 579 580 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 581 KnownOne, Depth + 1)) 582 return I; 583 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 584 KnownZero <<= ShiftAmt; 585 KnownOne <<= ShiftAmt; 586 // low bits known zero. 587 if (ShiftAmt) 588 KnownZero.setLowBits(ShiftAmt); 589 } 590 break; 591 case Instruction::LShr: 592 // For a logical shift right 593 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 594 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 595 596 // Unsigned shift right. 597 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 598 599 // If the shift is exact, then it does demand the low bits (and knows that 600 // they are zero). 601 if (cast<LShrOperator>(I)->isExact()) 602 DemandedMaskIn.setLowBits(ShiftAmt); 603 604 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 605 KnownOne, Depth + 1)) 606 return I; 607 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 608 KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); 609 KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); 610 if (ShiftAmt) 611 KnownZero.setHighBits(ShiftAmt); // high bits known zero. 612 } 613 break; 614 case Instruction::AShr: 615 // If this is an arithmetic shift right and only the low-bit is set, we can 616 // always convert this into a logical shr, even if the shift amount is 617 // variable. The low bit of the shift cannot be an input sign bit unless 618 // the shift amount is >= the size of the datatype, which is undefined. 619 if (DemandedMask == 1) { 620 // Perform the logical shift right. 621 Instruction *NewVal = BinaryOperator::CreateLShr( 622 I->getOperand(0), I->getOperand(1), I->getName()); 623 return InsertNewInstWith(NewVal, *I); 624 } 625 626 // If the sign bit is the only bit demanded by this ashr, then there is no 627 // need to do it, the shift doesn't change the high bit. 628 if (DemandedMask.isSignBit()) 629 return I->getOperand(0); 630 631 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 632 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 633 634 // Signed shift right. 635 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 636 // If any of the "high bits" are demanded, we should set the sign bit as 637 // demanded. 638 if (DemandedMask.countLeadingZeros() <= ShiftAmt) 639 DemandedMaskIn.setBit(BitWidth-1); 640 641 // If the shift is exact, then it does demand the low bits (and knows that 642 // they are zero). 643 if (cast<AShrOperator>(I)->isExact()) 644 DemandedMaskIn.setLowBits(ShiftAmt); 645 646 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, 647 KnownOne, Depth + 1)) 648 return I; 649 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 650 // Compute the new bits that are at the top now. 651 APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); 652 KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); 653 KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); 654 655 // Handle the sign bits. 656 APInt SignBit(APInt::getSignBit(BitWidth)); 657 // Adjust to where it is now in the mask. 658 SignBit = APIntOps::lshr(SignBit, ShiftAmt); 659 660 // If the input sign bit is known to be zero, or if none of the top bits 661 // are demanded, turn this into an unsigned shift right. 662 if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 663 (HighBits & ~DemandedMask) == HighBits) { 664 // Perform the logical shift right. 665 BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), 666 SA, I->getName()); 667 NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); 668 return InsertNewInstWith(NewVal, *I); 669 } else if ((KnownOne & SignBit) != 0) { // New bits are known one. 670 KnownOne |= HighBits; 671 } 672 } 673 break; 674 case Instruction::SRem: 675 if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { 676 // X % -1 demands all the bits because we don't want to introduce 677 // INT_MIN % -1 (== undef) by accident. 678 if (Rem->isAllOnesValue()) 679 break; 680 APInt RA = Rem->getValue().abs(); 681 if (RA.isPowerOf2()) { 682 if (DemandedMask.ult(RA)) // srem won't affect demanded bits 683 return I->getOperand(0); 684 685 APInt LowBits = RA - 1; 686 APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); 687 if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, 688 LHSKnownOne, Depth + 1)) 689 return I; 690 691 // The low bits of LHS are unchanged by the srem. 692 KnownZero = LHSKnownZero & LowBits; 693 KnownOne = LHSKnownOne & LowBits; 694 695 // If LHS is non-negative or has all low bits zero, then the upper bits 696 // are all zero. 697 if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) 698 KnownZero |= ~LowBits; 699 700 // If LHS is negative and not all low bits are zero, then the upper bits 701 // are all one. 702 if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) 703 KnownOne |= ~LowBits; 704 705 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 706 } 707 } 708 709 // The sign bit is the LHS's sign bit, except when the result of the 710 // remainder is zero. 711 if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { 712 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); 713 computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, 714 CxtI); 715 // If it's known zero, our sign bit is also zero. 716 if (LHSKnownZero.isNegative()) 717 KnownZero.setSignBit(); 718 } 719 break; 720 case Instruction::URem: { 721 APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); 722 APInt AllOnes = APInt::getAllOnesValue(BitWidth); 723 if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, 724 KnownOne2, Depth + 1) || 725 SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, 726 KnownOne2, Depth + 1)) 727 return I; 728 729 unsigned Leaders = KnownZero2.countLeadingOnes(); 730 KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; 731 break; 732 } 733 case Instruction::Call: 734 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 735 switch (II->getIntrinsicID()) { 736 default: break; 737 case Intrinsic::bswap: { 738 // If the only bits demanded come from one byte of the bswap result, 739 // just shift the input byte into position to eliminate the bswap. 740 unsigned NLZ = DemandedMask.countLeadingZeros(); 741 unsigned NTZ = DemandedMask.countTrailingZeros(); 742 743 // Round NTZ down to the next byte. If we have 11 trailing zeros, then 744 // we need all the bits down to bit 8. Likewise, round NLZ. If we 745 // have 14 leading zeros, round to 8. 746 NLZ &= ~7; 747 NTZ &= ~7; 748 // If we need exactly one byte, we can do this transformation. 749 if (BitWidth-NLZ-NTZ == 8) { 750 unsigned ResultBit = NTZ; 751 unsigned InputBit = BitWidth-NTZ-8; 752 753 // Replace this with either a left or right shift to get the byte into 754 // the right place. 755 Instruction *NewVal; 756 if (InputBit > ResultBit) 757 NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), 758 ConstantInt::get(I->getType(), InputBit-ResultBit)); 759 else 760 NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), 761 ConstantInt::get(I->getType(), ResultBit-InputBit)); 762 NewVal->takeName(I); 763 return InsertNewInstWith(NewVal, *I); 764 } 765 766 // TODO: Could compute known zero/one bits based on the input. 767 break; 768 } 769 case Intrinsic::x86_mmx_pmovmskb: 770 case Intrinsic::x86_sse_movmsk_ps: 771 case Intrinsic::x86_sse2_movmsk_pd: 772 case Intrinsic::x86_sse2_pmovmskb_128: 773 case Intrinsic::x86_avx_movmsk_ps_256: 774 case Intrinsic::x86_avx_movmsk_pd_256: 775 case Intrinsic::x86_avx2_pmovmskb: { 776 // MOVMSK copies the vector elements' sign bits to the low bits 777 // and zeros the high bits. 778 unsigned ArgWidth; 779 if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) { 780 ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. 781 } else { 782 auto Arg = II->getArgOperand(0); 783 auto ArgType = cast<VectorType>(Arg->getType()); 784 ArgWidth = ArgType->getNumElements(); 785 } 786 787 // If we don't need any of low bits then return zero, 788 // we know that DemandedMask is non-zero already. 789 APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); 790 if (DemandedElts == 0) 791 return ConstantInt::getNullValue(VTy); 792 793 // We know that the upper bits are set to zero. 794 KnownZero.setBitsFrom(ArgWidth); 795 return nullptr; 796 } 797 case Intrinsic::x86_sse42_crc32_64_64: 798 KnownZero.setBitsFrom(32); 799 return nullptr; 800 } 801 } 802 computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); 803 break; 804 } 805 806 // If the client is only demanding bits that we know, return the known 807 // constant. 808 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 809 return Constant::getIntegerValue(VTy, KnownOne); 810 return nullptr; 811 } 812 813 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify 814 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into 815 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign 816 /// of "C2-C1". 817 /// 818 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, 819 /// ..., bn}, without considering the specific value X is holding. 820 /// This transformation is legal iff one of following conditions is hold: 821 /// 1) All the bit in S are 0, in this case E1 == E2. 822 /// 2) We don't care those bits in S, per the input DemandedMask. 823 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the 824 /// rest bits. 825 /// 826 /// Currently we only test condition 2). 827 /// 828 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was 829 /// not successful. 830 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, 831 Instruction *Shl, 832 const APInt &DemandedMask, 833 APInt &KnownZero, 834 APInt &KnownOne) { 835 836 const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); 837 const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); 838 if (!ShlOp1 || !ShrOp1) 839 return nullptr; // Noop. 840 841 Value *VarX = Shr->getOperand(0); 842 Type *Ty = VarX->getType(); 843 unsigned BitWidth = Ty->getIntegerBitWidth(); 844 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) 845 return nullptr; // Undef. 846 847 unsigned ShlAmt = ShlOp1.getZExtValue(); 848 unsigned ShrAmt = ShrOp1.getZExtValue(); 849 850 KnownOne.clearAllBits(); 851 KnownZero.setLowBits(ShlAmt - 1); 852 KnownZero &= DemandedMask; 853 854 APInt BitMask1(APInt::getAllOnesValue(BitWidth)); 855 APInt BitMask2(APInt::getAllOnesValue(BitWidth)); 856 857 bool isLshr = (Shr->getOpcode() == Instruction::LShr); 858 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : 859 (BitMask1.ashr(ShrAmt) << ShlAmt); 860 861 if (ShrAmt <= ShlAmt) { 862 BitMask2 <<= (ShlAmt - ShrAmt); 863 } else { 864 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): 865 BitMask2.ashr(ShrAmt - ShlAmt); 866 } 867 868 // Check if condition-2 (see the comment to this function) is satified. 869 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { 870 if (ShrAmt == ShlAmt) 871 return VarX; 872 873 if (!Shr->hasOneUse()) 874 return nullptr; 875 876 BinaryOperator *New; 877 if (ShrAmt < ShlAmt) { 878 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); 879 New = BinaryOperator::CreateShl(VarX, Amt); 880 BinaryOperator *Orig = cast<BinaryOperator>(Shl); 881 New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); 882 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); 883 } else { 884 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); 885 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : 886 BinaryOperator::CreateAShr(VarX, Amt); 887 if (cast<BinaryOperator>(Shr)->isExact()) 888 New->setIsExact(true); 889 } 890 891 return InsertNewInstWith(New, *Shl); 892 } 893 894 return nullptr; 895 } 896 897 /// The specified value produces a vector with any number of elements. 898 /// DemandedElts contains the set of elements that are actually used by the 899 /// caller. This method analyzes which elements of the operand are undef and 900 /// returns that information in UndefElts. 901 /// 902 /// If the information about demanded elements can be used to simplify the 903 /// operation, the operation is simplified, then the resultant value is 904 /// returned. This returns null if no change was made. 905 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, 906 APInt &UndefElts, 907 unsigned Depth) { 908 unsigned VWidth = V->getType()->getVectorNumElements(); 909 APInt EltMask(APInt::getAllOnesValue(VWidth)); 910 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); 911 912 if (isa<UndefValue>(V)) { 913 // If the entire vector is undefined, just return this info. 914 UndefElts = EltMask; 915 return nullptr; 916 } 917 918 if (DemandedElts == 0) { // If nothing is demanded, provide undef. 919 UndefElts = EltMask; 920 return UndefValue::get(V->getType()); 921 } 922 923 UndefElts = 0; 924 925 // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. 926 if (Constant *C = dyn_cast<Constant>(V)) { 927 // Check if this is identity. If so, return 0 since we are not simplifying 928 // anything. 929 if (DemandedElts.isAllOnesValue()) 930 return nullptr; 931 932 Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 933 Constant *Undef = UndefValue::get(EltTy); 934 935 SmallVector<Constant*, 16> Elts; 936 for (unsigned i = 0; i != VWidth; ++i) { 937 if (!DemandedElts[i]) { // If not demanded, set to undef. 938 Elts.push_back(Undef); 939 UndefElts.setBit(i); 940 continue; 941 } 942 943 Constant *Elt = C->getAggregateElement(i); 944 if (!Elt) return nullptr; 945 946 if (isa<UndefValue>(Elt)) { // Already undef. 947 Elts.push_back(Undef); 948 UndefElts.setBit(i); 949 } else { // Otherwise, defined. 950 Elts.push_back(Elt); 951 } 952 } 953 954 // If we changed the constant, return it. 955 Constant *NewCV = ConstantVector::get(Elts); 956 return NewCV != C ? NewCV : nullptr; 957 } 958 959 // Limit search depth. 960 if (Depth == 10) 961 return nullptr; 962 963 // If multiple users are using the root value, proceed with 964 // simplification conservatively assuming that all elements 965 // are needed. 966 if (!V->hasOneUse()) { 967 // Quit if we find multiple users of a non-root value though. 968 // They'll be handled when it's their turn to be visited by 969 // the main instcombine process. 970 if (Depth != 0) 971 // TODO: Just compute the UndefElts information recursively. 972 return nullptr; 973 974 // Conservatively assume that all elements are needed. 975 DemandedElts = EltMask; 976 } 977 978 Instruction *I = dyn_cast<Instruction>(V); 979 if (!I) return nullptr; // Only analyze instructions. 980 981 bool MadeChange = false; 982 APInt UndefElts2(VWidth, 0); 983 APInt UndefElts3(VWidth, 0); 984 Value *TmpV; 985 switch (I->getOpcode()) { 986 default: break; 987 988 case Instruction::InsertElement: { 989 // If this is a variable index, we don't know which element it overwrites. 990 // demand exactly the same input as we produce. 991 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); 992 if (!Idx) { 993 // Note that we can't propagate undef elt info, because we don't know 994 // which elt is getting updated. 995 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, 996 UndefElts2, Depth + 1); 997 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 998 break; 999 } 1000 1001 // If this is inserting an element that isn't demanded, remove this 1002 // insertelement. 1003 unsigned IdxNo = Idx->getZExtValue(); 1004 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { 1005 Worklist.Add(I); 1006 return I->getOperand(0); 1007 } 1008 1009 // Otherwise, the element inserted overwrites whatever was there, so the 1010 // input demanded set is simpler than the output set. 1011 APInt DemandedElts2 = DemandedElts; 1012 DemandedElts2.clearBit(IdxNo); 1013 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, 1014 UndefElts, Depth + 1); 1015 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1016 1017 // The inserted element is defined. 1018 UndefElts.clearBit(IdxNo); 1019 break; 1020 } 1021 case Instruction::ShuffleVector: { 1022 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 1023 unsigned LHSVWidth = 1024 Shuffle->getOperand(0)->getType()->getVectorNumElements(); 1025 APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); 1026 for (unsigned i = 0; i < VWidth; i++) { 1027 if (DemandedElts[i]) { 1028 unsigned MaskVal = Shuffle->getMaskValue(i); 1029 if (MaskVal != -1u) { 1030 assert(MaskVal < LHSVWidth * 2 && 1031 "shufflevector mask index out of range!"); 1032 if (MaskVal < LHSVWidth) 1033 LeftDemanded.setBit(MaskVal); 1034 else 1035 RightDemanded.setBit(MaskVal - LHSVWidth); 1036 } 1037 } 1038 } 1039 1040 APInt LHSUndefElts(LHSVWidth, 0); 1041 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, 1042 LHSUndefElts, Depth + 1); 1043 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1044 1045 APInt RHSUndefElts(LHSVWidth, 0); 1046 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, 1047 RHSUndefElts, Depth + 1); 1048 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1049 1050 bool NewUndefElts = false; 1051 unsigned LHSIdx = -1u, LHSValIdx = -1u; 1052 unsigned RHSIdx = -1u, RHSValIdx = -1u; 1053 bool LHSUniform = true; 1054 bool RHSUniform = true; 1055 for (unsigned i = 0; i < VWidth; i++) { 1056 unsigned MaskVal = Shuffle->getMaskValue(i); 1057 if (MaskVal == -1u) { 1058 UndefElts.setBit(i); 1059 } else if (!DemandedElts[i]) { 1060 NewUndefElts = true; 1061 UndefElts.setBit(i); 1062 } else if (MaskVal < LHSVWidth) { 1063 if (LHSUndefElts[MaskVal]) { 1064 NewUndefElts = true; 1065 UndefElts.setBit(i); 1066 } else { 1067 LHSIdx = LHSIdx == -1u ? i : LHSVWidth; 1068 LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth; 1069 LHSUniform = LHSUniform && (MaskVal == i); 1070 } 1071 } else { 1072 if (RHSUndefElts[MaskVal - LHSVWidth]) { 1073 NewUndefElts = true; 1074 UndefElts.setBit(i); 1075 } else { 1076 RHSIdx = RHSIdx == -1u ? i : LHSVWidth; 1077 RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth; 1078 RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i); 1079 } 1080 } 1081 } 1082 1083 // Try to transform shuffle with constant vector and single element from 1084 // this constant vector to single insertelement instruction. 1085 // shufflevector V, C, <v1, v2, .., ci, .., vm> -> 1086 // insertelement V, C[ci], ci-n 1087 if (LHSVWidth == Shuffle->getType()->getNumElements()) { 1088 Value *Op = nullptr; 1089 Constant *Value = nullptr; 1090 unsigned Idx = -1u; 1091 1092 // Find constant vector with the single element in shuffle (LHS or RHS). 1093 if (LHSIdx < LHSVWidth && RHSUniform) { 1094 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) { 1095 Op = Shuffle->getOperand(1); 1096 Value = CV->getOperand(LHSValIdx); 1097 Idx = LHSIdx; 1098 } 1099 } 1100 if (RHSIdx < LHSVWidth && LHSUniform) { 1101 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) { 1102 Op = Shuffle->getOperand(0); 1103 Value = CV->getOperand(RHSValIdx); 1104 Idx = RHSIdx; 1105 } 1106 } 1107 // Found constant vector with single element - convert to insertelement. 1108 if (Op && Value) { 1109 Instruction *New = InsertElementInst::Create( 1110 Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx), 1111 Shuffle->getName()); 1112 InsertNewInstWith(New, *Shuffle); 1113 return New; 1114 } 1115 } 1116 if (NewUndefElts) { 1117 // Add additional discovered undefs. 1118 SmallVector<Constant*, 16> Elts; 1119 for (unsigned i = 0; i < VWidth; ++i) { 1120 if (UndefElts[i]) 1121 Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); 1122 else 1123 Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), 1124 Shuffle->getMaskValue(i))); 1125 } 1126 I->setOperand(2, ConstantVector::get(Elts)); 1127 MadeChange = true; 1128 } 1129 break; 1130 } 1131 case Instruction::Select: { 1132 APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); 1133 if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { 1134 for (unsigned i = 0; i < VWidth; i++) { 1135 Constant *CElt = CV->getAggregateElement(i); 1136 // Method isNullValue always returns false when called on a 1137 // ConstantExpr. If CElt is a ConstantExpr then skip it in order to 1138 // to avoid propagating incorrect information. 1139 if (isa<ConstantExpr>(CElt)) 1140 continue; 1141 if (CElt->isNullValue()) 1142 LeftDemanded.clearBit(i); 1143 else 1144 RightDemanded.clearBit(i); 1145 } 1146 } 1147 1148 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts, 1149 Depth + 1); 1150 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1151 1152 TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, 1153 UndefElts2, Depth + 1); 1154 if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } 1155 1156 // Output elements are undefined if both are undefined. 1157 UndefElts &= UndefElts2; 1158 break; 1159 } 1160 case Instruction::BitCast: { 1161 // Vector->vector casts only. 1162 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); 1163 if (!VTy) break; 1164 unsigned InVWidth = VTy->getNumElements(); 1165 APInt InputDemandedElts(InVWidth, 0); 1166 UndefElts2 = APInt(InVWidth, 0); 1167 unsigned Ratio; 1168 1169 if (VWidth == InVWidth) { 1170 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same 1171 // elements as are demanded of us. 1172 Ratio = 1; 1173 InputDemandedElts = DemandedElts; 1174 } else if ((VWidth % InVWidth) == 0) { 1175 // If the number of elements in the output is a multiple of the number of 1176 // elements in the input then an input element is live if any of the 1177 // corresponding output elements are live. 1178 Ratio = VWidth / InVWidth; 1179 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1180 if (DemandedElts[OutIdx]) 1181 InputDemandedElts.setBit(OutIdx / Ratio); 1182 } else if ((InVWidth % VWidth) == 0) { 1183 // If the number of elements in the input is a multiple of the number of 1184 // elements in the output then an input element is live if the 1185 // corresponding output element is live. 1186 Ratio = InVWidth / VWidth; 1187 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1188 if (DemandedElts[InIdx / Ratio]) 1189 InputDemandedElts.setBit(InIdx); 1190 } else { 1191 // Unsupported so far. 1192 break; 1193 } 1194 1195 // div/rem demand all inputs, because they don't want divide by zero. 1196 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, 1197 UndefElts2, Depth + 1); 1198 if (TmpV) { 1199 I->setOperand(0, TmpV); 1200 MadeChange = true; 1201 } 1202 1203 if (VWidth == InVWidth) { 1204 UndefElts = UndefElts2; 1205 } else if ((VWidth % InVWidth) == 0) { 1206 // If the number of elements in the output is a multiple of the number of 1207 // elements in the input then an output element is undef if the 1208 // corresponding input element is undef. 1209 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1210 if (UndefElts2[OutIdx / Ratio]) 1211 UndefElts.setBit(OutIdx); 1212 } else if ((InVWidth % VWidth) == 0) { 1213 // If the number of elements in the input is a multiple of the number of 1214 // elements in the output then an output element is undef if all of the 1215 // corresponding input elements are undef. 1216 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { 1217 APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); 1218 if (SubUndef.countPopulation() == Ratio) 1219 UndefElts.setBit(OutIdx); 1220 } 1221 } else { 1222 llvm_unreachable("Unimp"); 1223 } 1224 break; 1225 } 1226 case Instruction::And: 1227 case Instruction::Or: 1228 case Instruction::Xor: 1229 case Instruction::Add: 1230 case Instruction::Sub: 1231 case Instruction::Mul: 1232 // div/rem demand all inputs, because they don't want divide by zero. 1233 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, 1234 Depth + 1); 1235 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1236 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, 1237 UndefElts2, Depth + 1); 1238 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1239 1240 // Output elements are undefined if both are undefined. Consider things 1241 // like undef&0. The result is known zero, not undef. 1242 UndefElts &= UndefElts2; 1243 break; 1244 case Instruction::FPTrunc: 1245 case Instruction::FPExt: 1246 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, 1247 Depth + 1); 1248 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1249 break; 1250 1251 case Instruction::Call: { 1252 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1253 if (!II) break; 1254 switch (II->getIntrinsicID()) { 1255 default: break; 1256 1257 case Intrinsic::x86_xop_vfrcz_ss: 1258 case Intrinsic::x86_xop_vfrcz_sd: 1259 // The instructions for these intrinsics are speced to zero upper bits not 1260 // pass them through like other scalar intrinsics. So we shouldn't just 1261 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. 1262 // Instead we should return a zero vector. 1263 if (!DemandedElts[0]) { 1264 Worklist.Add(II); 1265 return ConstantAggregateZero::get(II->getType()); 1266 } 1267 1268 // Only the lower element is used. 1269 DemandedElts = 1; 1270 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1271 UndefElts, Depth + 1); 1272 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1273 1274 // Only the lower element is undefined. The high elements are zero. 1275 UndefElts = UndefElts[0]; 1276 break; 1277 1278 // Unary scalar-as-vector operations that work column-wise. 1279 case Intrinsic::x86_sse_rcp_ss: 1280 case Intrinsic::x86_sse_rsqrt_ss: 1281 case Intrinsic::x86_sse_sqrt_ss: 1282 case Intrinsic::x86_sse2_sqrt_sd: 1283 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1284 UndefElts, Depth + 1); 1285 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1286 1287 // If lowest element of a scalar op isn't used then use Arg0. 1288 if (!DemandedElts[0]) { 1289 Worklist.Add(II); 1290 return II->getArgOperand(0); 1291 } 1292 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions 1293 // checks). 1294 break; 1295 1296 // Binary scalar-as-vector operations that work column-wise. The high 1297 // elements come from operand 0. The low element is a function of both 1298 // operands. 1299 case Intrinsic::x86_sse_min_ss: 1300 case Intrinsic::x86_sse_max_ss: 1301 case Intrinsic::x86_sse_cmp_ss: 1302 case Intrinsic::x86_sse2_min_sd: 1303 case Intrinsic::x86_sse2_max_sd: 1304 case Intrinsic::x86_sse2_cmp_sd: { 1305 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1306 UndefElts, Depth + 1); 1307 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1308 1309 // If lowest element of a scalar op isn't used then use Arg0. 1310 if (!DemandedElts[0]) { 1311 Worklist.Add(II); 1312 return II->getArgOperand(0); 1313 } 1314 1315 // Only lower element is used for operand 1. 1316 DemandedElts = 1; 1317 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1318 UndefElts2, Depth + 1); 1319 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1320 1321 // Lower element is undefined if both lower elements are undefined. 1322 // Consider things like undef&0. The result is known zero, not undef. 1323 if (!UndefElts2[0]) 1324 UndefElts.clearBit(0); 1325 1326 break; 1327 } 1328 1329 // Binary scalar-as-vector operations that work column-wise. The high 1330 // elements come from operand 0 and the low element comes from operand 1. 1331 case Intrinsic::x86_sse41_round_ss: 1332 case Intrinsic::x86_sse41_round_sd: { 1333 // Don't use the low element of operand 0. 1334 APInt DemandedElts2 = DemandedElts; 1335 DemandedElts2.clearBit(0); 1336 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2, 1337 UndefElts, Depth + 1); 1338 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1339 1340 // If lowest element of a scalar op isn't used then use Arg0. 1341 if (!DemandedElts[0]) { 1342 Worklist.Add(II); 1343 return II->getArgOperand(0); 1344 } 1345 1346 // Only lower element is used for operand 1. 1347 DemandedElts = 1; 1348 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1349 UndefElts2, Depth + 1); 1350 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1351 1352 // Take the high undef elements from operand 0 and take the lower element 1353 // from operand 1. 1354 UndefElts.clearBit(0); 1355 UndefElts |= UndefElts2[0]; 1356 break; 1357 } 1358 1359 // Three input scalar-as-vector operations that work column-wise. The high 1360 // elements come from operand 0 and the low element is a function of all 1361 // three inputs. 1362 case Intrinsic::x86_avx512_mask_add_ss_round: 1363 case Intrinsic::x86_avx512_mask_div_ss_round: 1364 case Intrinsic::x86_avx512_mask_mul_ss_round: 1365 case Intrinsic::x86_avx512_mask_sub_ss_round: 1366 case Intrinsic::x86_avx512_mask_max_ss_round: 1367 case Intrinsic::x86_avx512_mask_min_ss_round: 1368 case Intrinsic::x86_avx512_mask_add_sd_round: 1369 case Intrinsic::x86_avx512_mask_div_sd_round: 1370 case Intrinsic::x86_avx512_mask_mul_sd_round: 1371 case Intrinsic::x86_avx512_mask_sub_sd_round: 1372 case Intrinsic::x86_avx512_mask_max_sd_round: 1373 case Intrinsic::x86_avx512_mask_min_sd_round: 1374 case Intrinsic::x86_fma_vfmadd_ss: 1375 case Intrinsic::x86_fma_vfmsub_ss: 1376 case Intrinsic::x86_fma_vfnmadd_ss: 1377 case Intrinsic::x86_fma_vfnmsub_ss: 1378 case Intrinsic::x86_fma_vfmadd_sd: 1379 case Intrinsic::x86_fma_vfmsub_sd: 1380 case Intrinsic::x86_fma_vfnmadd_sd: 1381 case Intrinsic::x86_fma_vfnmsub_sd: 1382 case Intrinsic::x86_avx512_mask_vfmadd_ss: 1383 case Intrinsic::x86_avx512_mask_vfmadd_sd: 1384 case Intrinsic::x86_avx512_maskz_vfmadd_ss: 1385 case Intrinsic::x86_avx512_maskz_vfmadd_sd: 1386 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1387 UndefElts, Depth + 1); 1388 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1389 1390 // If lowest element of a scalar op isn't used then use Arg0. 1391 if (!DemandedElts[0]) { 1392 Worklist.Add(II); 1393 return II->getArgOperand(0); 1394 } 1395 1396 // Only lower element is used for operand 1 and 2. 1397 DemandedElts = 1; 1398 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1399 UndefElts2, Depth + 1); 1400 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1401 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, 1402 UndefElts3, Depth + 1); 1403 if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } 1404 1405 // Lower element is undefined if all three lower elements are undefined. 1406 // Consider things like undef&0. The result is known zero, not undef. 1407 if (!UndefElts2[0] || !UndefElts3[0]) 1408 UndefElts.clearBit(0); 1409 1410 break; 1411 1412 case Intrinsic::x86_avx512_mask3_vfmadd_ss: 1413 case Intrinsic::x86_avx512_mask3_vfmadd_sd: 1414 case Intrinsic::x86_avx512_mask3_vfmsub_ss: 1415 case Intrinsic::x86_avx512_mask3_vfmsub_sd: 1416 case Intrinsic::x86_avx512_mask3_vfnmsub_ss: 1417 case Intrinsic::x86_avx512_mask3_vfnmsub_sd: 1418 // These intrinsics get the passthru bits from operand 2. 1419 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, 1420 UndefElts, Depth + 1); 1421 if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } 1422 1423 // If lowest element of a scalar op isn't used then use Arg2. 1424 if (!DemandedElts[0]) { 1425 Worklist.Add(II); 1426 return II->getArgOperand(2); 1427 } 1428 1429 // Only lower element is used for operand 0 and 1. 1430 DemandedElts = 1; 1431 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, 1432 UndefElts2, Depth + 1); 1433 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1434 TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, 1435 UndefElts3, Depth + 1); 1436 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1437 1438 // Lower element is undefined if all three lower elements are undefined. 1439 // Consider things like undef&0. The result is known zero, not undef. 1440 if (!UndefElts2[0] || !UndefElts3[0]) 1441 UndefElts.clearBit(0); 1442 1443 break; 1444 1445 case Intrinsic::x86_sse2_pmulu_dq: 1446 case Intrinsic::x86_sse41_pmuldq: 1447 case Intrinsic::x86_avx2_pmul_dq: 1448 case Intrinsic::x86_avx2_pmulu_dq: 1449 case Intrinsic::x86_avx512_pmul_dq_512: 1450 case Intrinsic::x86_avx512_pmulu_dq_512: { 1451 Value *Op0 = II->getArgOperand(0); 1452 Value *Op1 = II->getArgOperand(1); 1453 unsigned InnerVWidth = Op0->getType()->getVectorNumElements(); 1454 assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); 1455 1456 APInt InnerDemandedElts(InnerVWidth, 0); 1457 for (unsigned i = 0; i != VWidth; ++i) 1458 if (DemandedElts[i]) 1459 InnerDemandedElts.setBit(i * 2); 1460 1461 UndefElts2 = APInt(InnerVWidth, 0); 1462 TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2, 1463 Depth + 1); 1464 if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } 1465 1466 UndefElts3 = APInt(InnerVWidth, 0); 1467 TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3, 1468 Depth + 1); 1469 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1470 1471 break; 1472 } 1473 1474 case Intrinsic::x86_sse2_packssdw_128: 1475 case Intrinsic::x86_sse2_packsswb_128: 1476 case Intrinsic::x86_sse2_packuswb_128: 1477 case Intrinsic::x86_sse41_packusdw: 1478 case Intrinsic::x86_avx2_packssdw: 1479 case Intrinsic::x86_avx2_packsswb: 1480 case Intrinsic::x86_avx2_packusdw: 1481 case Intrinsic::x86_avx2_packuswb: 1482 case Intrinsic::x86_avx512_packssdw_512: 1483 case Intrinsic::x86_avx512_packsswb_512: 1484 case Intrinsic::x86_avx512_packusdw_512: 1485 case Intrinsic::x86_avx512_packuswb_512: { 1486 auto *Ty0 = II->getArgOperand(0)->getType(); 1487 unsigned InnerVWidth = Ty0->getVectorNumElements(); 1488 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); 1489 1490 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; 1491 unsigned VWidthPerLane = VWidth / NumLanes; 1492 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; 1493 1494 // Per lane, pack the elements of the first input and then the second. 1495 // e.g. 1496 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) 1497 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) 1498 for (int OpNum = 0; OpNum != 2; ++OpNum) { 1499 APInt OpDemandedElts(InnerVWidth, 0); 1500 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1501 unsigned LaneIdx = Lane * VWidthPerLane; 1502 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { 1503 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; 1504 if (DemandedElts[Idx]) 1505 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); 1506 } 1507 } 1508 1509 // Demand elements from the operand. 1510 auto *Op = II->getArgOperand(OpNum); 1511 APInt OpUndefElts(InnerVWidth, 0); 1512 TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts, 1513 Depth + 1); 1514 if (TmpV) { 1515 II->setArgOperand(OpNum, TmpV); 1516 MadeChange = true; 1517 } 1518 1519 // Pack the operand's UNDEF elements, one lane at a time. 1520 OpUndefElts = OpUndefElts.zext(VWidth); 1521 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 1522 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); 1523 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); 1524 LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); 1525 UndefElts |= LaneElts; 1526 } 1527 } 1528 break; 1529 } 1530 1531 // PSHUFB 1532 case Intrinsic::x86_ssse3_pshuf_b_128: 1533 case Intrinsic::x86_avx2_pshuf_b: 1534 case Intrinsic::x86_avx512_pshuf_b_512: 1535 // PERMILVAR 1536 case Intrinsic::x86_avx_vpermilvar_ps: 1537 case Intrinsic::x86_avx_vpermilvar_ps_256: 1538 case Intrinsic::x86_avx512_vpermilvar_ps_512: 1539 case Intrinsic::x86_avx_vpermilvar_pd: 1540 case Intrinsic::x86_avx_vpermilvar_pd_256: 1541 case Intrinsic::x86_avx512_vpermilvar_pd_512: 1542 // PERMV 1543 case Intrinsic::x86_avx2_permd: 1544 case Intrinsic::x86_avx2_permps: { 1545 Value *Op1 = II->getArgOperand(1); 1546 TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts, 1547 Depth + 1); 1548 if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } 1549 break; 1550 } 1551 1552 // SSE4A instructions leave the upper 64-bits of the 128-bit result 1553 // in an undefined state. 1554 case Intrinsic::x86_sse4a_extrq: 1555 case Intrinsic::x86_sse4a_extrqi: 1556 case Intrinsic::x86_sse4a_insertq: 1557 case Intrinsic::x86_sse4a_insertqi: 1558 UndefElts.setHighBits(VWidth / 2); 1559 break; 1560 case Intrinsic::amdgcn_buffer_load: 1561 case Intrinsic::amdgcn_buffer_load_format: { 1562 if (VWidth == 1 || !APIntOps::isMask(DemandedElts)) 1563 return nullptr; 1564 1565 // TODO: Handle 3 vectors when supported in code gen. 1566 unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes()); 1567 if (NewNumElts == VWidth) 1568 return nullptr; 1569 1570 Module *M = II->getParent()->getParent()->getParent(); 1571 Type *EltTy = V->getType()->getVectorElementType(); 1572 1573 Type *NewTy = (NewNumElts == 1) ? EltTy : 1574 VectorType::get(EltTy, NewNumElts); 1575 1576 Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), 1577 NewTy); 1578 1579 SmallVector<Value *, 5> Args; 1580 for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) 1581 Args.push_back(II->getArgOperand(I)); 1582 1583 IRBuilderBase::InsertPointGuard Guard(*Builder); 1584 Builder->SetInsertPoint(II); 1585 1586 CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); 1587 NewCall->takeName(II); 1588 NewCall->copyMetadata(*II); 1589 if (NewNumElts == 1) { 1590 return Builder->CreateInsertElement(UndefValue::get(V->getType()), 1591 NewCall, static_cast<uint64_t>(0)); 1592 } 1593 1594 SmallVector<uint32_t, 8> EltMask; 1595 for (unsigned I = 0; I < VWidth; ++I) 1596 EltMask.push_back(I); 1597 1598 Value *Shuffle = Builder->CreateShuffleVector( 1599 NewCall, UndefValue::get(NewTy), EltMask); 1600 1601 MadeChange = true; 1602 return Shuffle; 1603 } 1604 } 1605 break; 1606 } 1607 } 1608 return MadeChange ? I : nullptr; 1609 } 1610