1 //===- InstCombineCompares.cpp --------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visitICmp and visitFCmp functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombineInternal.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/ConstantFolding.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/MemoryBuiltins.h" 20 #include "llvm/IR/ConstantRange.h" 21 #include "llvm/IR/DataLayout.h" 22 #include "llvm/IR/GetElementPtrTypeIterator.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Analysis/TargetLibraryInfo.h" 28 29 using namespace llvm; 30 using namespace PatternMatch; 31 32 #define DEBUG_TYPE "instcombine" 33 34 // How many times is a select replaced by one of its operands? 35 STATISTIC(NumSel, "Number of select opts"); 36 37 // Initialization Routines 38 39 static ConstantInt *getOne(Constant *C) { 40 return ConstantInt::get(cast<IntegerType>(C->getType()), 1); 41 } 42 43 static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { 44 return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); 45 } 46 47 static bool HasAddOverflow(ConstantInt *Result, 48 ConstantInt *In1, ConstantInt *In2, 49 bool IsSigned) { 50 if (!IsSigned) 51 return Result->getValue().ult(In1->getValue()); 52 53 if (In2->isNegative()) 54 return Result->getValue().sgt(In1->getValue()); 55 return Result->getValue().slt(In1->getValue()); 56 } 57 58 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result 59 /// overflowed for this type. 60 static bool AddWithOverflow(Constant *&Result, Constant *In1, 61 Constant *In2, bool IsSigned = false) { 62 Result = ConstantExpr::getAdd(In1, In2); 63 64 if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { 65 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { 66 Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); 67 if (HasAddOverflow(ExtractElement(Result, Idx), 68 ExtractElement(In1, Idx), 69 ExtractElement(In2, Idx), 70 IsSigned)) 71 return true; 72 } 73 return false; 74 } 75 76 return HasAddOverflow(cast<ConstantInt>(Result), 77 cast<ConstantInt>(In1), cast<ConstantInt>(In2), 78 IsSigned); 79 } 80 81 static bool HasSubOverflow(ConstantInt *Result, 82 ConstantInt *In1, ConstantInt *In2, 83 bool IsSigned) { 84 if (!IsSigned) 85 return Result->getValue().ugt(In1->getValue()); 86 87 if (In2->isNegative()) 88 return Result->getValue().slt(In1->getValue()); 89 90 return Result->getValue().sgt(In1->getValue()); 91 } 92 93 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result 94 /// overflowed for this type. 95 static bool SubWithOverflow(Constant *&Result, Constant *In1, 96 Constant *In2, bool IsSigned = false) { 97 Result = ConstantExpr::getSub(In1, In2); 98 99 if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { 100 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { 101 Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); 102 if (HasSubOverflow(ExtractElement(Result, Idx), 103 ExtractElement(In1, Idx), 104 ExtractElement(In2, Idx), 105 IsSigned)) 106 return true; 107 } 108 return false; 109 } 110 111 return HasSubOverflow(cast<ConstantInt>(Result), 112 cast<ConstantInt>(In1), cast<ConstantInt>(In2), 113 IsSigned); 114 } 115 116 /// isSignBitCheck - Given an exploded icmp instruction, return true if the 117 /// comparison only checks the sign bit. If it only checks the sign bit, set 118 /// TrueIfSigned if the result of the comparison is true when the input value is 119 /// signed. 120 static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, 121 bool &TrueIfSigned) { 122 switch (pred) { 123 case ICmpInst::ICMP_SLT: // True if LHS s< 0 124 TrueIfSigned = true; 125 return RHS->isZero(); 126 case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 127 TrueIfSigned = true; 128 return RHS->isAllOnesValue(); 129 case ICmpInst::ICMP_SGT: // True if LHS s> -1 130 TrueIfSigned = false; 131 return RHS->isAllOnesValue(); 132 case ICmpInst::ICMP_UGT: 133 // True if LHS u> RHS and RHS == high-bit-mask - 1 134 TrueIfSigned = true; 135 return RHS->isMaxValue(true); 136 case ICmpInst::ICMP_UGE: 137 // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) 138 TrueIfSigned = true; 139 return RHS->getValue().isSignBit(); 140 default: 141 return false; 142 } 143 } 144 145 /// Returns true if the exploded icmp can be expressed as a signed comparison 146 /// to zero and updates the predicate accordingly. 147 /// The signedness of the comparison is preserved. 148 static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { 149 if (!ICmpInst::isSigned(pred)) 150 return false; 151 152 if (RHS->isZero()) 153 return ICmpInst::isRelational(pred); 154 155 if (RHS->isOne()) { 156 if (pred == ICmpInst::ICMP_SLT) { 157 pred = ICmpInst::ICMP_SLE; 158 return true; 159 } 160 } else if (RHS->isAllOnesValue()) { 161 if (pred == ICmpInst::ICMP_SGT) { 162 pred = ICmpInst::ICMP_SGE; 163 return true; 164 } 165 } 166 167 return false; 168 } 169 170 // isHighOnes - Return true if the constant is of the form 1+0+. 171 // This is the same as lowones(~X). 172 static bool isHighOnes(const ConstantInt *CI) { 173 return (~CI->getValue() + 1).isPowerOf2(); 174 } 175 176 /// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 177 /// set of known zero and one bits, compute the maximum and minimum values that 178 /// could have the specified known zero and known one bits, returning them in 179 /// min/max. 180 static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, 181 const APInt& KnownOne, 182 APInt& Min, APInt& Max) { 183 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && 184 KnownZero.getBitWidth() == Min.getBitWidth() && 185 KnownZero.getBitWidth() == Max.getBitWidth() && 186 "KnownZero, KnownOne and Min, Max must have equal bitwidth."); 187 APInt UnknownBits = ~(KnownZero|KnownOne); 188 189 // The minimum value is when all unknown bits are zeros, EXCEPT for the sign 190 // bit if it is unknown. 191 Min = KnownOne; 192 Max = KnownOne|UnknownBits; 193 194 if (UnknownBits.isNegative()) { // Sign bit is unknown 195 Min.setBit(Min.getBitWidth()-1); 196 Max.clearBit(Max.getBitWidth()-1); 197 } 198 } 199 200 // ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and 201 // a set of known zero and one bits, compute the maximum and minimum values that 202 // could have the specified known zero and known one bits, returning them in 203 // min/max. 204 static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, 205 const APInt &KnownOne, 206 APInt &Min, APInt &Max) { 207 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && 208 KnownZero.getBitWidth() == Min.getBitWidth() && 209 KnownZero.getBitWidth() == Max.getBitWidth() && 210 "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); 211 APInt UnknownBits = ~(KnownZero|KnownOne); 212 213 // The minimum value is when the unknown bits are all zeros. 214 Min = KnownOne; 215 // The maximum value is when the unknown bits are all ones. 216 Max = KnownOne|UnknownBits; 217 } 218 219 220 221 /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: 222 /// cmp pred (load (gep GV, ...)), cmpcst 223 /// where GV is a global variable with a constant initializer. Try to simplify 224 /// this into some simple computation that does not need the load. For example 225 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". 226 /// 227 /// If AndCst is non-null, then the loaded value is masked with that constant 228 /// before doing the comparison. This handles cases like "A[i]&4 == 0". 229 Instruction *InstCombiner:: 230 FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, 231 CmpInst &ICI, ConstantInt *AndCst) { 232 // We need TD information to know the pointer size unless this is inbounds. 233 if (!GEP->isInBounds() && !DL) 234 return nullptr; 235 236 Constant *Init = GV->getInitializer(); 237 if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) 238 return nullptr; 239 240 uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); 241 if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. 242 243 // There are many forms of this optimization we can handle, for now, just do 244 // the simple index into a single-dimensional array. 245 // 246 // Require: GEP GV, 0, i {{, constant indices}} 247 if (GEP->getNumOperands() < 3 || 248 !isa<ConstantInt>(GEP->getOperand(1)) || 249 !cast<ConstantInt>(GEP->getOperand(1))->isZero() || 250 isa<Constant>(GEP->getOperand(2))) 251 return nullptr; 252 253 // Check that indices after the variable are constants and in-range for the 254 // type they index. Collect the indices. This is typically for arrays of 255 // structs. 256 SmallVector<unsigned, 4> LaterIndices; 257 258 Type *EltTy = Init->getType()->getArrayElementType(); 259 for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { 260 ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); 261 if (!Idx) return nullptr; // Variable index. 262 263 uint64_t IdxVal = Idx->getZExtValue(); 264 if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. 265 266 if (StructType *STy = dyn_cast<StructType>(EltTy)) 267 EltTy = STy->getElementType(IdxVal); 268 else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { 269 if (IdxVal >= ATy->getNumElements()) return nullptr; 270 EltTy = ATy->getElementType(); 271 } else { 272 return nullptr; // Unknown type. 273 } 274 275 LaterIndices.push_back(IdxVal); 276 } 277 278 enum { Overdefined = -3, Undefined = -2 }; 279 280 // Variables for our state machines. 281 282 // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form 283 // "i == 47 | i == 87", where 47 is the first index the condition is true for, 284 // and 87 is the second (and last) index. FirstTrueElement is -2 when 285 // undefined, otherwise set to the first true element. SecondTrueElement is 286 // -2 when undefined, -3 when overdefined and >= 0 when that index is true. 287 int FirstTrueElement = Undefined, SecondTrueElement = Undefined; 288 289 // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the 290 // form "i != 47 & i != 87". Same state transitions as for true elements. 291 int FirstFalseElement = Undefined, SecondFalseElement = Undefined; 292 293 /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these 294 /// define a state machine that triggers for ranges of values that the index 295 /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. 296 /// This is -2 when undefined, -3 when overdefined, and otherwise the last 297 /// index in the range (inclusive). We use -2 for undefined here because we 298 /// use relative comparisons and don't want 0-1 to match -1. 299 int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; 300 301 // MagicBitvector - This is a magic bitvector where we set a bit if the 302 // comparison is true for element 'i'. If there are 64 elements or less in 303 // the array, this will fully represent all the comparison results. 304 uint64_t MagicBitvector = 0; 305 306 307 // Scan the array and see if one of our patterns matches. 308 Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); 309 for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { 310 Constant *Elt = Init->getAggregateElement(i); 311 if (!Elt) return nullptr; 312 313 // If this is indexing an array of structures, get the structure element. 314 if (!LaterIndices.empty()) 315 Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); 316 317 // If the element is masked, handle it. 318 if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); 319 320 // Find out if the comparison would be true or false for the i'th element. 321 Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, 322 CompareRHS, DL, TLI); 323 // If the result is undef for this element, ignore it. 324 if (isa<UndefValue>(C)) { 325 // Extend range state machines to cover this element in case there is an 326 // undef in the middle of the range. 327 if (TrueRangeEnd == (int)i-1) 328 TrueRangeEnd = i; 329 if (FalseRangeEnd == (int)i-1) 330 FalseRangeEnd = i; 331 continue; 332 } 333 334 // If we can't compute the result for any of the elements, we have to give 335 // up evaluating the entire conditional. 336 if (!isa<ConstantInt>(C)) return nullptr; 337 338 // Otherwise, we know if the comparison is true or false for this element, 339 // update our state machines. 340 bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); 341 342 // State machine for single/double/range index comparison. 343 if (IsTrueForElt) { 344 // Update the TrueElement state machine. 345 if (FirstTrueElement == Undefined) 346 FirstTrueElement = TrueRangeEnd = i; // First true element. 347 else { 348 // Update double-compare state machine. 349 if (SecondTrueElement == Undefined) 350 SecondTrueElement = i; 351 else 352 SecondTrueElement = Overdefined; 353 354 // Update range state machine. 355 if (TrueRangeEnd == (int)i-1) 356 TrueRangeEnd = i; 357 else 358 TrueRangeEnd = Overdefined; 359 } 360 } else { 361 // Update the FalseElement state machine. 362 if (FirstFalseElement == Undefined) 363 FirstFalseElement = FalseRangeEnd = i; // First false element. 364 else { 365 // Update double-compare state machine. 366 if (SecondFalseElement == Undefined) 367 SecondFalseElement = i; 368 else 369 SecondFalseElement = Overdefined; 370 371 // Update range state machine. 372 if (FalseRangeEnd == (int)i-1) 373 FalseRangeEnd = i; 374 else 375 FalseRangeEnd = Overdefined; 376 } 377 } 378 379 380 // If this element is in range, update our magic bitvector. 381 if (i < 64 && IsTrueForElt) 382 MagicBitvector |= 1ULL << i; 383 384 // If all of our states become overdefined, bail out early. Since the 385 // predicate is expensive, only check it every 8 elements. This is only 386 // really useful for really huge arrays. 387 if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && 388 SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && 389 FalseRangeEnd == Overdefined) 390 return nullptr; 391 } 392 393 // Now that we've scanned the entire array, emit our new comparison(s). We 394 // order the state machines in complexity of the generated code. 395 Value *Idx = GEP->getOperand(2); 396 397 // If the index is larger than the pointer size of the target, truncate the 398 // index down like the GEP would do implicitly. We don't have to do this for 399 // an inbounds GEP because the index can't be out of range. 400 if (!GEP->isInBounds()) { 401 Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); 402 unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); 403 if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) 404 Idx = Builder->CreateTrunc(Idx, IntPtrTy); 405 } 406 407 // If the comparison is only true for one or two elements, emit direct 408 // comparisons. 409 if (SecondTrueElement != Overdefined) { 410 // None true -> false. 411 if (FirstTrueElement == Undefined) 412 return ReplaceInstUsesWith(ICI, Builder->getFalse()); 413 414 Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); 415 416 // True for one element -> 'i == 47'. 417 if (SecondTrueElement == Undefined) 418 return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); 419 420 // True for two elements -> 'i == 47 | i == 72'. 421 Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); 422 Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); 423 Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); 424 return BinaryOperator::CreateOr(C1, C2); 425 } 426 427 // If the comparison is only false for one or two elements, emit direct 428 // comparisons. 429 if (SecondFalseElement != Overdefined) { 430 // None false -> true. 431 if (FirstFalseElement == Undefined) 432 return ReplaceInstUsesWith(ICI, Builder->getTrue()); 433 434 Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); 435 436 // False for one element -> 'i != 47'. 437 if (SecondFalseElement == Undefined) 438 return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); 439 440 // False for two elements -> 'i != 47 & i != 72'. 441 Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); 442 Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); 443 Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); 444 return BinaryOperator::CreateAnd(C1, C2); 445 } 446 447 // If the comparison can be replaced with a range comparison for the elements 448 // where it is true, emit the range check. 449 if (TrueRangeEnd != Overdefined) { 450 assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); 451 452 // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). 453 if (FirstTrueElement) { 454 Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); 455 Idx = Builder->CreateAdd(Idx, Offs); 456 } 457 458 Value *End = ConstantInt::get(Idx->getType(), 459 TrueRangeEnd-FirstTrueElement+1); 460 return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); 461 } 462 463 // False range check. 464 if (FalseRangeEnd != Overdefined) { 465 assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); 466 // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). 467 if (FirstFalseElement) { 468 Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); 469 Idx = Builder->CreateAdd(Idx, Offs); 470 } 471 472 Value *End = ConstantInt::get(Idx->getType(), 473 FalseRangeEnd-FirstFalseElement); 474 return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); 475 } 476 477 478 // If a magic bitvector captures the entire comparison state 479 // of this load, replace it with computation that does: 480 // ((magic_cst >> i) & 1) != 0 481 { 482 Type *Ty = nullptr; 483 484 // Look for an appropriate type: 485 // - The type of Idx if the magic fits 486 // - The smallest fitting legal type if we have a DataLayout 487 // - Default to i32 488 if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) 489 Ty = Idx->getType(); 490 else if (DL) 491 Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); 492 else if (ArrayElementCount <= 32) 493 Ty = Type::getInt32Ty(Init->getContext()); 494 495 if (Ty) { 496 Value *V = Builder->CreateIntCast(Idx, Ty, false); 497 V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); 498 V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); 499 return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); 500 } 501 } 502 503 return nullptr; 504 } 505 506 507 /// EvaluateGEPOffsetExpression - Return a value that can be used to compare 508 /// the *offset* implied by a GEP to zero. For example, if we have &A[i], we 509 /// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can 510 /// be complex, and scales are involved. The above expression would also be 511 /// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). 512 /// This later form is less amenable to optimization though, and we are allowed 513 /// to generate the first by knowing that pointer arithmetic doesn't overflow. 514 /// 515 /// If we can't emit an optimized form for this expression, this returns null. 516 /// 517 static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { 518 const DataLayout &DL = *IC.getDataLayout(); 519 gep_type_iterator GTI = gep_type_begin(GEP); 520 521 // Check to see if this gep only has a single variable index. If so, and if 522 // any constant indices are a multiple of its scale, then we can compute this 523 // in terms of the scale of the variable index. For example, if the GEP 524 // implies an offset of "12 + i*4", then we can codegen this as "3 + i", 525 // because the expression will cross zero at the same point. 526 unsigned i, e = GEP->getNumOperands(); 527 int64_t Offset = 0; 528 for (i = 1; i != e; ++i, ++GTI) { 529 if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { 530 // Compute the aggregate offset of constant indices. 531 if (CI->isZero()) continue; 532 533 // Handle a struct index, which adds its field offset to the pointer. 534 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 535 Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); 536 } else { 537 uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); 538 Offset += Size*CI->getSExtValue(); 539 } 540 } else { 541 // Found our variable index. 542 break; 543 } 544 } 545 546 // If there are no variable indices, we must have a constant offset, just 547 // evaluate it the general way. 548 if (i == e) return nullptr; 549 550 Value *VariableIdx = GEP->getOperand(i); 551 // Determine the scale factor of the variable element. For example, this is 552 // 4 if the variable index is into an array of i32. 553 uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType()); 554 555 // Verify that there are no other variable indices. If so, emit the hard way. 556 for (++i, ++GTI; i != e; ++i, ++GTI) { 557 ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); 558 if (!CI) return nullptr; 559 560 // Compute the aggregate offset of constant indices. 561 if (CI->isZero()) continue; 562 563 // Handle a struct index, which adds its field offset to the pointer. 564 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 565 Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); 566 } else { 567 uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); 568 Offset += Size*CI->getSExtValue(); 569 } 570 } 571 572 573 574 // Okay, we know we have a single variable index, which must be a 575 // pointer/array/vector index. If there is no offset, life is simple, return 576 // the index. 577 Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType()); 578 unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); 579 if (Offset == 0) { 580 // Cast to intptrty in case a truncation occurs. If an extension is needed, 581 // we don't need to bother extending: the extension won't affect where the 582 // computation crosses zero. 583 if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { 584 VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); 585 } 586 return VariableIdx; 587 } 588 589 // Otherwise, there is an index. The computation we will do will be modulo 590 // the pointer size, so get it. 591 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); 592 593 Offset &= PtrSizeMask; 594 VariableScale &= PtrSizeMask; 595 596 // To do this transformation, any constant index must be a multiple of the 597 // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", 598 // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a 599 // multiple of the variable scale. 600 int64_t NewOffs = Offset / (int64_t)VariableScale; 601 if (Offset != NewOffs*(int64_t)VariableScale) 602 return nullptr; 603 604 // Okay, we can do this evaluation. Start by converting the index to intptr. 605 if (VariableIdx->getType() != IntPtrTy) 606 VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, 607 true /*Signed*/); 608 Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); 609 return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); 610 } 611 612 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something 613 /// else. At this point we know that the GEP is on the LHS of the comparison. 614 Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, 615 ICmpInst::Predicate Cond, 616 Instruction &I) { 617 // Don't transform signed compares of GEPs into index compares. Even if the 618 // GEP is inbounds, the final add of the base pointer can have signed overflow 619 // and would change the result of the icmp. 620 // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be 621 // the maximum signed value for the pointer type. 622 if (ICmpInst::isSigned(Cond)) 623 return nullptr; 624 625 // Look through bitcasts and addrspacecasts. We do not however want to remove 626 // 0 GEPs. 627 if (!isa<GetElementPtrInst>(RHS)) 628 RHS = RHS->stripPointerCasts(); 629 630 Value *PtrBase = GEPLHS->getOperand(0); 631 if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { 632 // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). 633 // This transformation (ignoring the base and scales) is valid because we 634 // know pointers can't overflow since the gep is inbounds. See if we can 635 // output an optimized form. 636 Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); 637 638 // If not, synthesize the offset the hard way. 639 if (!Offset) 640 Offset = EmitGEPOffset(GEPLHS); 641 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, 642 Constant::getNullValue(Offset->getType())); 643 } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { 644 // If the base pointers are different, but the indices are the same, just 645 // compare the base pointer. 646 if (PtrBase != GEPRHS->getOperand(0)) { 647 bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); 648 IndicesTheSame &= GEPLHS->getOperand(0)->getType() == 649 GEPRHS->getOperand(0)->getType(); 650 if (IndicesTheSame) 651 for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) 652 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { 653 IndicesTheSame = false; 654 break; 655 } 656 657 // If all indices are the same, just compare the base pointers. 658 if (IndicesTheSame) 659 return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); 660 661 // If we're comparing GEPs with two base pointers that only differ in type 662 // and both GEPs have only constant indices or just one use, then fold 663 // the compare with the adjusted indices. 664 if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && 665 (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && 666 (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && 667 PtrBase->stripPointerCasts() == 668 GEPRHS->getOperand(0)->stripPointerCasts()) { 669 Value *LOffset = EmitGEPOffset(GEPLHS); 670 Value *ROffset = EmitGEPOffset(GEPRHS); 671 672 // If we looked through an addrspacecast between different sized address 673 // spaces, the LHS and RHS pointers are different sized 674 // integers. Truncate to the smaller one. 675 Type *LHSIndexTy = LOffset->getType(); 676 Type *RHSIndexTy = ROffset->getType(); 677 if (LHSIndexTy != RHSIndexTy) { 678 if (LHSIndexTy->getPrimitiveSizeInBits() < 679 RHSIndexTy->getPrimitiveSizeInBits()) { 680 ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); 681 } else 682 LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); 683 } 684 685 Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), 686 LOffset, ROffset); 687 return ReplaceInstUsesWith(I, Cmp); 688 } 689 690 // Otherwise, the base pointers are different and the indices are 691 // different, bail out. 692 return nullptr; 693 } 694 695 // If one of the GEPs has all zero indices, recurse. 696 if (GEPLHS->hasAllZeroIndices()) 697 return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), 698 ICmpInst::getSwappedPredicate(Cond), I); 699 700 // If the other GEP has all zero indices, recurse. 701 if (GEPRHS->hasAllZeroIndices()) 702 return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); 703 704 bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); 705 if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { 706 // If the GEPs only differ by one index, compare it. 707 unsigned NumDifferences = 0; // Keep track of # differences. 708 unsigned DiffOperand = 0; // The operand that differs. 709 for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) 710 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { 711 if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != 712 GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { 713 // Irreconcilable differences. 714 NumDifferences = 2; 715 break; 716 } else { 717 if (NumDifferences++) break; 718 DiffOperand = i; 719 } 720 } 721 722 if (NumDifferences == 0) // SAME GEP? 723 return ReplaceInstUsesWith(I, // No comparison is needed here. 724 Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); 725 726 else if (NumDifferences == 1 && GEPsInBounds) { 727 Value *LHSV = GEPLHS->getOperand(DiffOperand); 728 Value *RHSV = GEPRHS->getOperand(DiffOperand); 729 // Make sure we do a signed comparison here. 730 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); 731 } 732 } 733 734 // Only lower this if the icmp is the only user of the GEP or if we expect 735 // the result to fold to a constant! 736 if (DL && 737 GEPsInBounds && 738 (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && 739 (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { 740 // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) 741 Value *L = EmitGEPOffset(GEPLHS); 742 Value *R = EmitGEPOffset(GEPRHS); 743 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); 744 } 745 } 746 return nullptr; 747 } 748 749 /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". 750 Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, 751 Value *X, ConstantInt *CI, 752 ICmpInst::Predicate Pred) { 753 // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, 754 // so the values can never be equal. Similarly for all other "or equals" 755 // operators. 756 757 // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 758 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 759 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 760 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 761 Value *R = 762 ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); 763 return new ICmpInst(ICmpInst::ICMP_UGT, X, R); 764 } 765 766 // (X+1) >u X --> X <u (0-1) --> X != 255 767 // (X+2) >u X --> X <u (0-2) --> X <u 254 768 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 769 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 770 return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); 771 772 unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); 773 ConstantInt *SMax = ConstantInt::get(X->getContext(), 774 APInt::getSignedMaxValue(BitWidth)); 775 776 // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 777 // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 778 // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 779 // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 780 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 781 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 782 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 783 return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); 784 785 // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 786 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 787 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 788 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 789 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 790 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 791 792 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); 793 Constant *C = Builder->getInt(CI->getValue()-1); 794 return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); 795 } 796 797 /// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS 798 /// and CmpRHS are both known to be integer constants. 799 Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, 800 ConstantInt *DivRHS) { 801 ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); 802 const APInt &CmpRHSV = CmpRHS->getValue(); 803 804 // FIXME: If the operand types don't match the type of the divide 805 // then don't attempt this transform. The code below doesn't have the 806 // logic to deal with a signed divide and an unsigned compare (and 807 // vice versa). This is because (x /s C1) <s C2 produces different 808 // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even 809 // (x /u C1) <u C2. Simply casting the operands and result won't 810 // work. :( The if statement below tests that condition and bails 811 // if it finds it. 812 bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; 813 if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) 814 return nullptr; 815 if (DivRHS->isZero()) 816 return nullptr; // The ProdOV computation fails on divide by zero. 817 if (DivIsSigned && DivRHS->isAllOnesValue()) 818 return nullptr; // The overflow computation also screws up here 819 if (DivRHS->isOne()) { 820 // This eliminates some funny cases with INT_MIN. 821 ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. 822 return &ICI; 823 } 824 825 // Compute Prod = CI * DivRHS. We are essentially solving an equation 826 // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 827 // C2 (CI). By solving for X we can turn this into a range check 828 // instead of computing a divide. 829 Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); 830 831 // Determine if the product overflows by seeing if the product is 832 // not equal to the divide. Make sure we do the same kind of divide 833 // as in the LHS instruction that we're folding. 834 bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : 835 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; 836 837 // Get the ICmp opcode 838 ICmpInst::Predicate Pred = ICI.getPredicate(); 839 840 /// If the division is known to be exact, then there is no remainder from the 841 /// divide, so the covered range size is unit, otherwise it is the divisor. 842 ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; 843 844 // Figure out the interval that is being checked. For example, a comparison 845 // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 846 // Compute this interval based on the constants involved and the signedness of 847 // the compare/divide. This computes a half-open interval, keeping track of 848 // whether either value in the interval overflows. After analysis each 849 // overflow variable is set to 0 if it's corresponding bound variable is valid 850 // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. 851 int LoOverflow = 0, HiOverflow = 0; 852 Constant *LoBound = nullptr, *HiBound = nullptr; 853 854 if (!DivIsSigned) { // udiv 855 // e.g. X/5 op 3 --> [15, 20) 856 LoBound = Prod; 857 HiOverflow = LoOverflow = ProdOV; 858 if (!HiOverflow) { 859 // If this is not an exact divide, then many values in the range collapse 860 // to the same result value. 861 HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); 862 } 863 864 } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. 865 if (CmpRHSV == 0) { // (X / pos) op 0 866 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) 867 LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); 868 HiBound = RangeSize; 869 } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos 870 LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) 871 HiOverflow = LoOverflow = ProdOV; 872 if (!HiOverflow) 873 HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); 874 } else { // (X / pos) op neg 875 // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) 876 HiBound = AddOne(Prod); 877 LoOverflow = HiOverflow = ProdOV ? -1 : 0; 878 if (!LoOverflow) { 879 ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); 880 LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; 881 } 882 } 883 } else if (DivRHS->isNegative()) { // Divisor is < 0. 884 if (DivI->isExact()) 885 RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); 886 if (CmpRHSV == 0) { // (X / neg) op 0 887 // e.g. X/-5 op 0 --> [-4, 5) 888 LoBound = AddOne(RangeSize); 889 HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); 890 if (HiBound == DivRHS) { // -INTMIN = INTMIN 891 HiOverflow = 1; // [INTMIN+1, overflow) 892 HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN 893 } 894 } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos 895 // e.g. X/-5 op 3 --> [-19, -14) 896 HiBound = AddOne(Prod); 897 HiOverflow = LoOverflow = ProdOV ? -1 : 0; 898 if (!LoOverflow) 899 LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; 900 } else { // (X / neg) op neg 901 LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) 902 LoOverflow = HiOverflow = ProdOV; 903 if (!HiOverflow) 904 HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); 905 } 906 907 // Dividing by a negative swaps the condition. LT <-> GT 908 Pred = ICmpInst::getSwappedPredicate(Pred); 909 } 910 911 Value *X = DivI->getOperand(0); 912 switch (Pred) { 913 default: llvm_unreachable("Unhandled icmp opcode!"); 914 case ICmpInst::ICMP_EQ: 915 if (LoOverflow && HiOverflow) 916 return ReplaceInstUsesWith(ICI, Builder->getFalse()); 917 if (HiOverflow) 918 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 919 ICmpInst::ICMP_UGE, X, LoBound); 920 if (LoOverflow) 921 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 922 ICmpInst::ICMP_ULT, X, HiBound); 923 return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, 924 DivIsSigned, true)); 925 case ICmpInst::ICMP_NE: 926 if (LoOverflow && HiOverflow) 927 return ReplaceInstUsesWith(ICI, Builder->getTrue()); 928 if (HiOverflow) 929 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 930 ICmpInst::ICMP_ULT, X, LoBound); 931 if (LoOverflow) 932 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 933 ICmpInst::ICMP_UGE, X, HiBound); 934 return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, 935 DivIsSigned, false)); 936 case ICmpInst::ICMP_ULT: 937 case ICmpInst::ICMP_SLT: 938 if (LoOverflow == +1) // Low bound is greater than input range. 939 return ReplaceInstUsesWith(ICI, Builder->getTrue()); 940 if (LoOverflow == -1) // Low bound is less than input range. 941 return ReplaceInstUsesWith(ICI, Builder->getFalse()); 942 return new ICmpInst(Pred, X, LoBound); 943 case ICmpInst::ICMP_UGT: 944 case ICmpInst::ICMP_SGT: 945 if (HiOverflow == +1) // High bound greater than input range. 946 return ReplaceInstUsesWith(ICI, Builder->getFalse()); 947 if (HiOverflow == -1) // High bound less than input range. 948 return ReplaceInstUsesWith(ICI, Builder->getTrue()); 949 if (Pred == ICmpInst::ICMP_UGT) 950 return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); 951 return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); 952 } 953 } 954 955 /// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". 956 Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, 957 ConstantInt *ShAmt) { 958 const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); 959 960 // Check that the shift amount is in range. If not, don't perform 961 // undefined shifts. When the shift is visited it will be 962 // simplified. 963 uint32_t TypeBits = CmpRHSV.getBitWidth(); 964 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); 965 if (ShAmtVal >= TypeBits || ShAmtVal == 0) 966 return nullptr; 967 968 if (!ICI.isEquality()) { 969 // If we have an unsigned comparison and an ashr, we can't simplify this. 970 // Similarly for signed comparisons with lshr. 971 if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) 972 return nullptr; 973 974 // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv 975 // by a power of 2. Since we already have logic to simplify these, 976 // transform to div and then simplify the resultant comparison. 977 if (Shr->getOpcode() == Instruction::AShr && 978 (!Shr->isExact() || ShAmtVal == TypeBits - 1)) 979 return nullptr; 980 981 // Revisit the shift (to delete it). 982 Worklist.Add(Shr); 983 984 Constant *DivCst = 985 ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); 986 987 Value *Tmp = 988 Shr->getOpcode() == Instruction::AShr ? 989 Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : 990 Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); 991 992 ICI.setOperand(0, Tmp); 993 994 // If the builder folded the binop, just return it. 995 BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); 996 if (!TheDiv) 997 return &ICI; 998 999 // Otherwise, fold this div/compare. 1000 assert(TheDiv->getOpcode() == Instruction::SDiv || 1001 TheDiv->getOpcode() == Instruction::UDiv); 1002 1003 Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); 1004 assert(Res && "This div/cst should have folded!"); 1005 return Res; 1006 } 1007 1008 1009 // If we are comparing against bits always shifted out, the 1010 // comparison cannot succeed. 1011 APInt Comp = CmpRHSV << ShAmtVal; 1012 ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp); 1013 if (Shr->getOpcode() == Instruction::LShr) 1014 Comp = Comp.lshr(ShAmtVal); 1015 else 1016 Comp = Comp.ashr(ShAmtVal); 1017 1018 if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. 1019 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 1020 Constant *Cst = Builder->getInt1(IsICMP_NE); 1021 return ReplaceInstUsesWith(ICI, Cst); 1022 } 1023 1024 // Otherwise, check to see if the bits shifted out are known to be zero. 1025 // If so, we can compare against the unshifted value: 1026 // (X & 4) >> 1 == 2 --> (X & 4) == 4. 1027 if (Shr->hasOneUse() && Shr->isExact()) 1028 return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); 1029 1030 if (Shr->hasOneUse()) { 1031 // Otherwise strength reduce the shift into an and. 1032 APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); 1033 Constant *Mask = Builder->getInt(Val); 1034 1035 Value *And = Builder->CreateAnd(Shr->getOperand(0), 1036 Mask, Shr->getName()+".mask"); 1037 return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); 1038 } 1039 return nullptr; 1040 } 1041 1042 /// FoldICmpCstShrCst - Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" -> 1043 /// (icmp eq/ne A, Log2(const2/const1)) -> 1044 /// (icmp eq/ne A, Log2(const2) - Log2(const1)). 1045 Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A, 1046 ConstantInt *CI1, 1047 ConstantInt *CI2) { 1048 assert(I.isEquality() && "Cannot fold icmp gt/lt"); 1049 1050 auto getConstant = [&I, this](bool IsTrue) { 1051 if (I.getPredicate() == I.ICMP_NE) 1052 IsTrue = !IsTrue; 1053 return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue)); 1054 }; 1055 1056 auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { 1057 if (I.getPredicate() == I.ICMP_NE) 1058 Pred = CmpInst::getInversePredicate(Pred); 1059 return new ICmpInst(Pred, LHS, RHS); 1060 }; 1061 1062 APInt AP1 = CI1->getValue(); 1063 APInt AP2 = CI2->getValue(); 1064 1065 // Don't bother doing any work for cases which InstSimplify handles. 1066 if (AP2 == 0) 1067 return nullptr; 1068 bool IsAShr = isa<AShrOperator>(Op); 1069 if (IsAShr) { 1070 if (AP2.isAllOnesValue()) 1071 return nullptr; 1072 if (AP2.isNegative() != AP1.isNegative()) 1073 return nullptr; 1074 if (AP2.sgt(AP1)) 1075 return nullptr; 1076 } 1077 1078 if (!AP1) 1079 // 'A' must be large enough to shift out the highest set bit. 1080 return getICmp(I.ICMP_UGT, A, 1081 ConstantInt::get(A->getType(), AP2.logBase2())); 1082 1083 if (AP1 == AP2) 1084 return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); 1085 1086 // Get the distance between the highest bit that's set. 1087 int Shift; 1088 // Both the constants are negative, take their positive to calculate log. 1089 if (IsAShr && AP1.isNegative()) 1090 // Get the ones' complement of AP2 and AP1 when computing the distance. 1091 Shift = (~AP2).logBase2() - (~AP1).logBase2(); 1092 else 1093 Shift = AP2.logBase2() - AP1.logBase2(); 1094 1095 if (Shift > 0) { 1096 if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift)) 1097 return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); 1098 } 1099 // Shifting const2 will never be equal to const1. 1100 return getConstant(false); 1101 } 1102 1103 /// FoldICmpCstShlCst - Handle "(icmp eq/ne (shl const2, A), const1)" -> 1104 /// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)). 1105 Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, 1106 ConstantInt *CI1, 1107 ConstantInt *CI2) { 1108 assert(I.isEquality() && "Cannot fold icmp gt/lt"); 1109 1110 auto getConstant = [&I, this](bool IsTrue) { 1111 if (I.getPredicate() == I.ICMP_NE) 1112 IsTrue = !IsTrue; 1113 return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue)); 1114 }; 1115 1116 auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { 1117 if (I.getPredicate() == I.ICMP_NE) 1118 Pred = CmpInst::getInversePredicate(Pred); 1119 return new ICmpInst(Pred, LHS, RHS); 1120 }; 1121 1122 APInt AP1 = CI1->getValue(); 1123 APInt AP2 = CI2->getValue(); 1124 1125 // Don't bother doing any work for cases which InstSimplify handles. 1126 if (AP2 == 0) 1127 return nullptr; 1128 1129 unsigned AP2TrailingZeros = AP2.countTrailingZeros(); 1130 1131 if (!AP1 && AP2TrailingZeros != 0) 1132 return getICmp(I.ICMP_UGE, A, 1133 ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); 1134 1135 if (AP1 == AP2) 1136 return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); 1137 1138 // Get the distance between the lowest bits that are set. 1139 int Shift = AP1.countTrailingZeros() - AP2TrailingZeros; 1140 1141 if (Shift > 0 && AP2.shl(Shift) == AP1) 1142 return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); 1143 1144 // Shifting const2 will never be equal to const1. 1145 return getConstant(false); 1146 } 1147 1148 /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". 1149 /// 1150 Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, 1151 Instruction *LHSI, 1152 ConstantInt *RHS) { 1153 const APInt &RHSV = RHS->getValue(); 1154 1155 switch (LHSI->getOpcode()) { 1156 case Instruction::Trunc: 1157 if (ICI.isEquality() && LHSI->hasOneUse()) { 1158 // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all 1159 // of the high bits truncated out of x are known. 1160 unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), 1161 SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); 1162 APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); 1163 computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI); 1164 1165 // If all the high bits are known, we can do this xform. 1166 if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { 1167 // Pull in the high bits from known-ones set. 1168 APInt NewRHS = RHS->getValue().zext(SrcBits); 1169 NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); 1170 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), 1171 Builder->getInt(NewRHS)); 1172 } 1173 } 1174 break; 1175 1176 case Instruction::Xor: // (icmp pred (xor X, XorCst), CI) 1177 if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { 1178 // If this is a comparison that tests the signbit (X < 0) or (x > -1), 1179 // fold the xor. 1180 if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || 1181 (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { 1182 Value *CompareVal = LHSI->getOperand(0); 1183 1184 // If the sign bit of the XorCst is not set, there is no change to 1185 // the operation, just stop using the Xor. 1186 if (!XorCst->isNegative()) { 1187 ICI.setOperand(0, CompareVal); 1188 Worklist.Add(LHSI); 1189 return &ICI; 1190 } 1191 1192 // Was the old condition true if the operand is positive? 1193 bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; 1194 1195 // If so, the new one isn't. 1196 isTrueIfPositive ^= true; 1197 1198 if (isTrueIfPositive) 1199 return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, 1200 SubOne(RHS)); 1201 else 1202 return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, 1203 AddOne(RHS)); 1204 } 1205 1206 if (LHSI->hasOneUse()) { 1207 // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) 1208 if (!ICI.isEquality() && XorCst->getValue().isSignBit()) { 1209 const APInt &SignBit = XorCst->getValue(); 1210 ICmpInst::Predicate Pred = ICI.isSigned() 1211 ? ICI.getUnsignedPredicate() 1212 : ICI.getSignedPredicate(); 1213 return new ICmpInst(Pred, LHSI->getOperand(0), 1214 Builder->getInt(RHSV ^ SignBit)); 1215 } 1216 1217 // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) 1218 if (!ICI.isEquality() && XorCst->isMaxValue(true)) { 1219 const APInt &NotSignBit = XorCst->getValue(); 1220 ICmpInst::Predicate Pred = ICI.isSigned() 1221 ? ICI.getUnsignedPredicate() 1222 : ICI.getSignedPredicate(); 1223 Pred = ICI.getSwappedPredicate(Pred); 1224 return new ICmpInst(Pred, LHSI->getOperand(0), 1225 Builder->getInt(RHSV ^ NotSignBit)); 1226 } 1227 } 1228 1229 // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) 1230 // iff -C is a power of 2 1231 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && 1232 XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) 1233 return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst); 1234 1235 // (icmp ult (xor X, C), -C) -> (icmp uge X, C) 1236 // iff -C is a power of 2 1237 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && 1238 XorCst->getValue() == -RHSV && RHSV.isPowerOf2()) 1239 return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst); 1240 } 1241 break; 1242 case Instruction::And: // (icmp pred (and X, AndCst), RHS) 1243 if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && 1244 LHSI->getOperand(0)->hasOneUse()) { 1245 ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1)); 1246 1247 // If the LHS is an AND of a truncating cast, we can widen the 1248 // and/compare to be the input width without changing the value 1249 // produced, eliminating a cast. 1250 if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { 1251 // We can do this transformation if either the AND constant does not 1252 // have its sign bit set or if it is an equality comparison. 1253 // Extending a relational comparison when we're checking the sign 1254 // bit would not work. 1255 if (ICI.isEquality() || 1256 (!AndCst->isNegative() && RHSV.isNonNegative())) { 1257 Value *NewAnd = 1258 Builder->CreateAnd(Cast->getOperand(0), 1259 ConstantExpr::getZExt(AndCst, Cast->getSrcTy())); 1260 NewAnd->takeName(LHSI); 1261 return new ICmpInst(ICI.getPredicate(), NewAnd, 1262 ConstantExpr::getZExt(RHS, Cast->getSrcTy())); 1263 } 1264 } 1265 1266 // If the LHS is an AND of a zext, and we have an equality compare, we can 1267 // shrink the and/compare to the smaller type, eliminating the cast. 1268 if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { 1269 IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); 1270 // Make sure we don't compare the upper bits, SimplifyDemandedBits 1271 // should fold the icmp to true/false in that case. 1272 if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { 1273 Value *NewAnd = 1274 Builder->CreateAnd(Cast->getOperand(0), 1275 ConstantExpr::getTrunc(AndCst, Ty)); 1276 NewAnd->takeName(LHSI); 1277 return new ICmpInst(ICI.getPredicate(), NewAnd, 1278 ConstantExpr::getTrunc(RHS, Ty)); 1279 } 1280 } 1281 1282 // If this is: (X >> C1) & C2 != C3 (where any shift and any compare 1283 // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This 1284 // happens a LOT in code produced by the C front-end, for bitfield 1285 // access. 1286 BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); 1287 if (Shift && !Shift->isShift()) 1288 Shift = nullptr; 1289 1290 ConstantInt *ShAmt; 1291 ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr; 1292 1293 // This seemingly simple opportunity to fold away a shift turns out to 1294 // be rather complicated. See PR17827 1295 // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details. 1296 if (ShAmt) { 1297 bool CanFold = false; 1298 unsigned ShiftOpcode = Shift->getOpcode(); 1299 if (ShiftOpcode == Instruction::AShr) { 1300 // There may be some constraints that make this possible, 1301 // but nothing simple has been discovered yet. 1302 CanFold = false; 1303 } else if (ShiftOpcode == Instruction::Shl) { 1304 // For a left shift, we can fold if the comparison is not signed. 1305 // We can also fold a signed comparison if the mask value and 1306 // comparison value are not negative. These constraints may not be 1307 // obvious, but we can prove that they are correct using an SMT 1308 // solver. 1309 if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative())) 1310 CanFold = true; 1311 } else if (ShiftOpcode == Instruction::LShr) { 1312 // For a logical right shift, we can fold if the comparison is not 1313 // signed. We can also fold a signed comparison if the shifted mask 1314 // value and the shifted comparison value are not negative. 1315 // These constraints may not be obvious, but we can prove that they 1316 // are correct using an SMT solver. 1317 if (!ICI.isSigned()) 1318 CanFold = true; 1319 else { 1320 ConstantInt *ShiftedAndCst = 1321 cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt)); 1322 ConstantInt *ShiftedRHSCst = 1323 cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt)); 1324 1325 if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative()) 1326 CanFold = true; 1327 } 1328 } 1329 1330 if (CanFold) { 1331 Constant *NewCst; 1332 if (ShiftOpcode == Instruction::Shl) 1333 NewCst = ConstantExpr::getLShr(RHS, ShAmt); 1334 else 1335 NewCst = ConstantExpr::getShl(RHS, ShAmt); 1336 1337 // Check to see if we are shifting out any of the bits being 1338 // compared. 1339 if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) { 1340 // If we shifted bits out, the fold is not going to work out. 1341 // As a special case, check to see if this means that the 1342 // result is always true or false now. 1343 if (ICI.getPredicate() == ICmpInst::ICMP_EQ) 1344 return ReplaceInstUsesWith(ICI, Builder->getFalse()); 1345 if (ICI.getPredicate() == ICmpInst::ICMP_NE) 1346 return ReplaceInstUsesWith(ICI, Builder->getTrue()); 1347 } else { 1348 ICI.setOperand(1, NewCst); 1349 Constant *NewAndCst; 1350 if (ShiftOpcode == Instruction::Shl) 1351 NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt); 1352 else 1353 NewAndCst = ConstantExpr::getShl(AndCst, ShAmt); 1354 LHSI->setOperand(1, NewAndCst); 1355 LHSI->setOperand(0, Shift->getOperand(0)); 1356 Worklist.Add(Shift); // Shift is dead. 1357 return &ICI; 1358 } 1359 } 1360 } 1361 1362 // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is 1363 // preferable because it allows the C<<Y expression to be hoisted out 1364 // of a loop if Y is invariant and X is not. 1365 if (Shift && Shift->hasOneUse() && RHSV == 0 && 1366 ICI.isEquality() && !Shift->isArithmeticShift() && 1367 !isa<Constant>(Shift->getOperand(0))) { 1368 // Compute C << Y. 1369 Value *NS; 1370 if (Shift->getOpcode() == Instruction::LShr) { 1371 NS = Builder->CreateShl(AndCst, Shift->getOperand(1)); 1372 } else { 1373 // Insert a logical shift. 1374 NS = Builder->CreateLShr(AndCst, Shift->getOperand(1)); 1375 } 1376 1377 // Compute X & (C << Y). 1378 Value *NewAnd = 1379 Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); 1380 1381 ICI.setOperand(0, NewAnd); 1382 return &ICI; 1383 } 1384 1385 // (icmp pred (and (or (lshr X, Y), X), 1), 0) --> 1386 // (icmp pred (and X, (or (shl 1, Y), 1), 0)) 1387 // 1388 // iff pred isn't signed 1389 { 1390 Value *X, *Y, *LShr; 1391 if (!ICI.isSigned() && RHSV == 0) { 1392 if (match(LHSI->getOperand(1), m_One())) { 1393 Constant *One = cast<Constant>(LHSI->getOperand(1)); 1394 Value *Or = LHSI->getOperand(0); 1395 if (match(Or, m_Or(m_Value(LShr), m_Value(X))) && 1396 match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) { 1397 unsigned UsesRemoved = 0; 1398 if (LHSI->hasOneUse()) 1399 ++UsesRemoved; 1400 if (Or->hasOneUse()) 1401 ++UsesRemoved; 1402 if (LShr->hasOneUse()) 1403 ++UsesRemoved; 1404 Value *NewOr = nullptr; 1405 // Compute X & ((1 << Y) | 1) 1406 if (auto *C = dyn_cast<Constant>(Y)) { 1407 if (UsesRemoved >= 1) 1408 NewOr = 1409 ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); 1410 } else { 1411 if (UsesRemoved >= 3) 1412 NewOr = Builder->CreateOr(Builder->CreateShl(One, Y, 1413 LShr->getName(), 1414 /*HasNUW=*/true), 1415 One, Or->getName()); 1416 } 1417 if (NewOr) { 1418 Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName()); 1419 ICI.setOperand(0, NewAnd); 1420 return &ICI; 1421 } 1422 } 1423 } 1424 } 1425 } 1426 1427 // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any 1428 // bit set in (X & AndCst) will produce a result greater than RHSV. 1429 if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { 1430 unsigned NTZ = AndCst->getValue().countTrailingZeros(); 1431 if ((NTZ < AndCst->getBitWidth()) && 1432 APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV)) 1433 return new ICmpInst(ICmpInst::ICMP_NE, LHSI, 1434 Constant::getNullValue(RHS->getType())); 1435 } 1436 } 1437 1438 // Try to optimize things like "A[i]&42 == 0" to index computations. 1439 if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { 1440 if (GetElementPtrInst *GEP = 1441 dyn_cast<GetElementPtrInst>(LI->getOperand(0))) 1442 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 1443 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 1444 !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { 1445 ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); 1446 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) 1447 return Res; 1448 } 1449 } 1450 1451 // X & -C == -C -> X > u ~C 1452 // X & -C != -C -> X <= u ~C 1453 // iff C is a power of 2 1454 if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2()) 1455 return new ICmpInst( 1456 ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT 1457 : ICmpInst::ICMP_ULE, 1458 LHSI->getOperand(0), SubOne(RHS)); 1459 break; 1460 1461 case Instruction::Or: { 1462 if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) 1463 break; 1464 Value *P, *Q; 1465 if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { 1466 // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 1467 // -> and (icmp eq P, null), (icmp eq Q, null). 1468 Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, 1469 Constant::getNullValue(P->getType())); 1470 Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, 1471 Constant::getNullValue(Q->getType())); 1472 Instruction *Op; 1473 if (ICI.getPredicate() == ICmpInst::ICMP_EQ) 1474 Op = BinaryOperator::CreateAnd(ICIP, ICIQ); 1475 else 1476 Op = BinaryOperator::CreateOr(ICIP, ICIQ); 1477 return Op; 1478 } 1479 break; 1480 } 1481 1482 case Instruction::Mul: { // (icmp pred (mul X, Val), CI) 1483 ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 1484 if (!Val) break; 1485 1486 // If this is a signed comparison to 0 and the mul is sign preserving, 1487 // use the mul LHS operand instead. 1488 ICmpInst::Predicate pred = ICI.getPredicate(); 1489 if (isSignTest(pred, RHS) && !Val->isZero() && 1490 cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) 1491 return new ICmpInst(Val->isNegative() ? 1492 ICmpInst::getSwappedPredicate(pred) : pred, 1493 LHSI->getOperand(0), 1494 Constant::getNullValue(RHS->getType())); 1495 1496 break; 1497 } 1498 1499 case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) 1500 uint32_t TypeBits = RHSV.getBitWidth(); 1501 ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 1502 if (!ShAmt) { 1503 Value *X; 1504 // (1 << X) pred P2 -> X pred Log2(P2) 1505 if (match(LHSI, m_Shl(m_One(), m_Value(X)))) { 1506 bool RHSVIsPowerOf2 = RHSV.isPowerOf2(); 1507 ICmpInst::Predicate Pred = ICI.getPredicate(); 1508 if (ICI.isUnsigned()) { 1509 if (!RHSVIsPowerOf2) { 1510 // (1 << X) < 30 -> X <= 4 1511 // (1 << X) <= 30 -> X <= 4 1512 // (1 << X) >= 30 -> X > 4 1513 // (1 << X) > 30 -> X > 4 1514 if (Pred == ICmpInst::ICMP_ULT) 1515 Pred = ICmpInst::ICMP_ULE; 1516 else if (Pred == ICmpInst::ICMP_UGE) 1517 Pred = ICmpInst::ICMP_UGT; 1518 } 1519 unsigned RHSLog2 = RHSV.logBase2(); 1520 1521 // (1 << X) >= 2147483648 -> X >= 31 -> X == 31 1522 // (1 << X) < 2147483648 -> X < 31 -> X != 31 1523 if (RHSLog2 == TypeBits-1) { 1524 if (Pred == ICmpInst::ICMP_UGE) 1525 Pred = ICmpInst::ICMP_EQ; 1526 else if (Pred == ICmpInst::ICMP_ULT) 1527 Pred = ICmpInst::ICMP_NE; 1528 } 1529 1530 return new ICmpInst(Pred, X, 1531 ConstantInt::get(RHS->getType(), RHSLog2)); 1532 } else if (ICI.isSigned()) { 1533 if (RHSV.isAllOnesValue()) { 1534 // (1 << X) <= -1 -> X == 31 1535 if (Pred == ICmpInst::ICMP_SLE) 1536 return new ICmpInst(ICmpInst::ICMP_EQ, X, 1537 ConstantInt::get(RHS->getType(), TypeBits-1)); 1538 1539 // (1 << X) > -1 -> X != 31 1540 if (Pred == ICmpInst::ICMP_SGT) 1541 return new ICmpInst(ICmpInst::ICMP_NE, X, 1542 ConstantInt::get(RHS->getType(), TypeBits-1)); 1543 } else if (!RHSV) { 1544 // (1 << X) < 0 -> X == 31 1545 // (1 << X) <= 0 -> X == 31 1546 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 1547 return new ICmpInst(ICmpInst::ICMP_EQ, X, 1548 ConstantInt::get(RHS->getType(), TypeBits-1)); 1549 1550 // (1 << X) >= 0 -> X != 31 1551 // (1 << X) > 0 -> X != 31 1552 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 1553 return new ICmpInst(ICmpInst::ICMP_NE, X, 1554 ConstantInt::get(RHS->getType(), TypeBits-1)); 1555 } 1556 } else if (ICI.isEquality()) { 1557 if (RHSVIsPowerOf2) 1558 return new ICmpInst( 1559 Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2())); 1560 } 1561 } 1562 break; 1563 } 1564 1565 // Check that the shift amount is in range. If not, don't perform 1566 // undefined shifts. When the shift is visited it will be 1567 // simplified. 1568 if (ShAmt->uge(TypeBits)) 1569 break; 1570 1571 if (ICI.isEquality()) { 1572 // If we are comparing against bits always shifted out, the 1573 // comparison cannot succeed. 1574 Constant *Comp = 1575 ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), 1576 ShAmt); 1577 if (Comp != RHS) {// Comparing against a bit that we know is zero. 1578 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 1579 Constant *Cst = Builder->getInt1(IsICMP_NE); 1580 return ReplaceInstUsesWith(ICI, Cst); 1581 } 1582 1583 // If the shift is NUW, then it is just shifting out zeros, no need for an 1584 // AND. 1585 if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) 1586 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), 1587 ConstantExpr::getLShr(RHS, ShAmt)); 1588 1589 // If the shift is NSW and we compare to 0, then it is just shifting out 1590 // sign bits, no need for an AND either. 1591 if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) 1592 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), 1593 ConstantExpr::getLShr(RHS, ShAmt)); 1594 1595 if (LHSI->hasOneUse()) { 1596 // Otherwise strength reduce the shift into an and. 1597 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); 1598 Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits, 1599 TypeBits - ShAmtVal)); 1600 1601 Value *And = 1602 Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); 1603 return new ICmpInst(ICI.getPredicate(), And, 1604 ConstantExpr::getLShr(RHS, ShAmt)); 1605 } 1606 } 1607 1608 // If this is a signed comparison to 0 and the shift is sign preserving, 1609 // use the shift LHS operand instead. 1610 ICmpInst::Predicate pred = ICI.getPredicate(); 1611 if (isSignTest(pred, RHS) && 1612 cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) 1613 return new ICmpInst(pred, 1614 LHSI->getOperand(0), 1615 Constant::getNullValue(RHS->getType())); 1616 1617 // Otherwise, if this is a comparison of the sign bit, simplify to and/test. 1618 bool TrueIfSigned = false; 1619 if (LHSI->hasOneUse() && 1620 isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { 1621 // (X << 31) <s 0 --> (X&1) != 0 1622 Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), 1623 APInt::getOneBitSet(TypeBits, 1624 TypeBits-ShAmt->getZExtValue()-1)); 1625 Value *And = 1626 Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); 1627 return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, 1628 And, Constant::getNullValue(And->getType())); 1629 } 1630 1631 // Transform (icmp pred iM (shl iM %v, N), CI) 1632 // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) 1633 // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. 1634 // This enables to get rid of the shift in favor of a trunc which can be 1635 // free on the target. It has the additional benefit of comparing to a 1636 // smaller constant, which will be target friendly. 1637 unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); 1638 if (LHSI->hasOneUse() && 1639 Amt != 0 && RHSV.countTrailingZeros() >= Amt) { 1640 Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); 1641 Constant *NCI = ConstantExpr::getTrunc( 1642 ConstantExpr::getAShr(RHS, 1643 ConstantInt::get(RHS->getType(), Amt)), 1644 NTy); 1645 return new ICmpInst(ICI.getPredicate(), 1646 Builder->CreateTrunc(LHSI->getOperand(0), NTy), 1647 NCI); 1648 } 1649 1650 break; 1651 } 1652 1653 case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) 1654 case Instruction::AShr: { 1655 // Handle equality comparisons of shift-by-constant. 1656 BinaryOperator *BO = cast<BinaryOperator>(LHSI); 1657 if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { 1658 if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) 1659 return Res; 1660 } 1661 1662 // Handle exact shr's. 1663 if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { 1664 if (RHSV.isMinValue()) 1665 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); 1666 } 1667 break; 1668 } 1669 1670 case Instruction::SDiv: 1671 case Instruction::UDiv: 1672 // Fold: icmp pred ([us]div X, C1), C2 -> range test 1673 // Fold this div into the comparison, producing a range check. 1674 // Determine, based on the divide type, what the range is being 1675 // checked. If there is an overflow on the low or high side, remember 1676 // it, otherwise compute the range [low, hi) bounding the new value. 1677 // See: InsertRangeTest above for the kinds of replacements possible. 1678 if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) 1679 if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), 1680 DivRHS)) 1681 return R; 1682 break; 1683 1684 case Instruction::Sub: { 1685 ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0)); 1686 if (!LHSC) break; 1687 const APInt &LHSV = LHSC->getValue(); 1688 1689 // C1-X <u C2 -> (X|(C2-1)) == C1 1690 // iff C1 & (C2-1) == C2-1 1691 // C2 is a power of 2 1692 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && 1693 RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1)) 1694 return new ICmpInst(ICmpInst::ICMP_EQ, 1695 Builder->CreateOr(LHSI->getOperand(1), RHSV - 1), 1696 LHSC); 1697 1698 // C1-X >u C2 -> (X|C2) != C1 1699 // iff C1 & C2 == C2 1700 // C2+1 is a power of 2 1701 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && 1702 (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV) 1703 return new ICmpInst(ICmpInst::ICMP_NE, 1704 Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC); 1705 break; 1706 } 1707 1708 case Instruction::Add: 1709 // Fold: icmp pred (add X, C1), C2 1710 if (!ICI.isEquality()) { 1711 ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 1712 if (!LHSC) break; 1713 const APInt &LHSV = LHSC->getValue(); 1714 1715 ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) 1716 .subtract(LHSV); 1717 1718 if (ICI.isSigned()) { 1719 if (CR.getLower().isSignBit()) { 1720 return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), 1721 Builder->getInt(CR.getUpper())); 1722 } else if (CR.getUpper().isSignBit()) { 1723 return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), 1724 Builder->getInt(CR.getLower())); 1725 } 1726 } else { 1727 if (CR.getLower().isMinValue()) { 1728 return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), 1729 Builder->getInt(CR.getUpper())); 1730 } else if (CR.getUpper().isMinValue()) { 1731 return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), 1732 Builder->getInt(CR.getLower())); 1733 } 1734 } 1735 1736 // X-C1 <u C2 -> (X & -C2) == C1 1737 // iff C1 & (C2-1) == 0 1738 // C2 is a power of 2 1739 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && 1740 RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0) 1741 return new ICmpInst(ICmpInst::ICMP_EQ, 1742 Builder->CreateAnd(LHSI->getOperand(0), -RHSV), 1743 ConstantExpr::getNeg(LHSC)); 1744 1745 // X-C1 >u C2 -> (X & ~C2) != C1 1746 // iff C1 & C2 == 0 1747 // C2+1 is a power of 2 1748 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && 1749 (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0) 1750 return new ICmpInst(ICmpInst::ICMP_NE, 1751 Builder->CreateAnd(LHSI->getOperand(0), ~RHSV), 1752 ConstantExpr::getNeg(LHSC)); 1753 } 1754 break; 1755 } 1756 1757 // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. 1758 if (ICI.isEquality()) { 1759 bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 1760 1761 // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 1762 // the second operand is a constant, simplify a bit. 1763 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { 1764 switch (BO->getOpcode()) { 1765 case Instruction::SRem: 1766 // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. 1767 if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ 1768 const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); 1769 if (V.sgt(1) && V.isPowerOf2()) { 1770 Value *NewRem = 1771 Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), 1772 BO->getName()); 1773 return new ICmpInst(ICI.getPredicate(), NewRem, 1774 Constant::getNullValue(BO->getType())); 1775 } 1776 } 1777 break; 1778 case Instruction::Add: 1779 // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. 1780 if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { 1781 if (BO->hasOneUse()) 1782 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 1783 ConstantExpr::getSub(RHS, BOp1C)); 1784 } else if (RHSV == 0) { 1785 // Replace ((add A, B) != 0) with (A != -B) if A or B is 1786 // efficiently invertible, or if the add has just this one use. 1787 Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); 1788 1789 if (Value *NegVal = dyn_castNegVal(BOp1)) 1790 return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); 1791 if (Value *NegVal = dyn_castNegVal(BOp0)) 1792 return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); 1793 if (BO->hasOneUse()) { 1794 Value *Neg = Builder->CreateNeg(BOp1); 1795 Neg->takeName(BO); 1796 return new ICmpInst(ICI.getPredicate(), BOp0, Neg); 1797 } 1798 } 1799 break; 1800 case Instruction::Xor: 1801 // For the xor case, we can xor two constants together, eliminating 1802 // the explicit xor. 1803 if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { 1804 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 1805 ConstantExpr::getXor(RHS, BOC)); 1806 } else if (RHSV == 0) { 1807 // Replace ((xor A, B) != 0) with (A != B) 1808 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 1809 BO->getOperand(1)); 1810 } 1811 break; 1812 case Instruction::Sub: 1813 // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. 1814 if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { 1815 if (BO->hasOneUse()) 1816 return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), 1817 ConstantExpr::getSub(BOp0C, RHS)); 1818 } else if (RHSV == 0) { 1819 // Replace ((sub A, B) != 0) with (A != B) 1820 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 1821 BO->getOperand(1)); 1822 } 1823 break; 1824 case Instruction::Or: 1825 // If bits are being or'd in that are not present in the constant we 1826 // are comparing against, then the comparison could never succeed! 1827 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { 1828 Constant *NotCI = ConstantExpr::getNot(RHS); 1829 if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) 1830 return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); 1831 } 1832 break; 1833 1834 case Instruction::And: 1835 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { 1836 // If bits are being compared against that are and'd out, then the 1837 // comparison can never succeed! 1838 if ((RHSV & ~BOC->getValue()) != 0) 1839 return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); 1840 1841 // If we have ((X & C) == C), turn it into ((X & C) != 0). 1842 if (RHS == BOC && RHSV.isPowerOf2()) 1843 return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : 1844 ICmpInst::ICMP_NE, LHSI, 1845 Constant::getNullValue(RHS->getType())); 1846 1847 // Don't perform the following transforms if the AND has multiple uses 1848 if (!BO->hasOneUse()) 1849 break; 1850 1851 // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 1852 if (BOC->getValue().isSignBit()) { 1853 Value *X = BO->getOperand(0); 1854 Constant *Zero = Constant::getNullValue(X->getType()); 1855 ICmpInst::Predicate pred = isICMP_NE ? 1856 ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; 1857 return new ICmpInst(pred, X, Zero); 1858 } 1859 1860 // ((X & ~7) == 0) --> X < 8 1861 if (RHSV == 0 && isHighOnes(BOC)) { 1862 Value *X = BO->getOperand(0); 1863 Constant *NegX = ConstantExpr::getNeg(BOC); 1864 ICmpInst::Predicate pred = isICMP_NE ? 1865 ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; 1866 return new ICmpInst(pred, X, NegX); 1867 } 1868 } 1869 break; 1870 case Instruction::Mul: 1871 if (RHSV == 0 && BO->hasNoSignedWrap()) { 1872 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { 1873 // The trivial case (mul X, 0) is handled by InstSimplify 1874 // General case : (mul X, C) != 0 iff X != 0 1875 // (mul X, C) == 0 iff X == 0 1876 if (!BOC->isZero()) 1877 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 1878 Constant::getNullValue(RHS->getType())); 1879 } 1880 } 1881 break; 1882 default: break; 1883 } 1884 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { 1885 // Handle icmp {eq|ne} <intrinsic>, intcst. 1886 switch (II->getIntrinsicID()) { 1887 case Intrinsic::bswap: 1888 Worklist.Add(II); 1889 ICI.setOperand(0, II->getArgOperand(0)); 1890 ICI.setOperand(1, Builder->getInt(RHSV.byteSwap())); 1891 return &ICI; 1892 case Intrinsic::ctlz: 1893 case Intrinsic::cttz: 1894 // ctz(A) == bitwidth(a) -> A == 0 and likewise for != 1895 if (RHSV == RHS->getType()->getBitWidth()) { 1896 Worklist.Add(II); 1897 ICI.setOperand(0, II->getArgOperand(0)); 1898 ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); 1899 return &ICI; 1900 } 1901 break; 1902 case Intrinsic::ctpop: 1903 // popcount(A) == 0 -> A == 0 and likewise for != 1904 if (RHS->isZero()) { 1905 Worklist.Add(II); 1906 ICI.setOperand(0, II->getArgOperand(0)); 1907 ICI.setOperand(1, RHS); 1908 return &ICI; 1909 } 1910 break; 1911 default: 1912 break; 1913 } 1914 } 1915 } 1916 return nullptr; 1917 } 1918 1919 /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). 1920 /// We only handle extending casts so far. 1921 /// 1922 Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { 1923 const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); 1924 Value *LHSCIOp = LHSCI->getOperand(0); 1925 Type *SrcTy = LHSCIOp->getType(); 1926 Type *DestTy = LHSCI->getType(); 1927 Value *RHSCIOp; 1928 1929 // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 1930 // integer type is the same size as the pointer type. 1931 if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && 1932 DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { 1933 Value *RHSOp = nullptr; 1934 if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) { 1935 Value *RHSCIOp = RHSC->getOperand(0); 1936 if (RHSCIOp->getType()->getPointerAddressSpace() == 1937 LHSCIOp->getType()->getPointerAddressSpace()) { 1938 RHSOp = RHSC->getOperand(0); 1939 // If the pointer types don't match, insert a bitcast. 1940 if (LHSCIOp->getType() != RHSOp->getType()) 1941 RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); 1942 } 1943 } else if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) 1944 RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); 1945 1946 if (RHSOp) 1947 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); 1948 } 1949 1950 // The code below only handles extension cast instructions, so far. 1951 // Enforce this. 1952 if (LHSCI->getOpcode() != Instruction::ZExt && 1953 LHSCI->getOpcode() != Instruction::SExt) 1954 return nullptr; 1955 1956 bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; 1957 bool isSignedCmp = ICI.isSigned(); 1958 1959 if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { 1960 // Not an extension from the same type? 1961 RHSCIOp = CI->getOperand(0); 1962 if (RHSCIOp->getType() != LHSCIOp->getType()) 1963 return nullptr; 1964 1965 // If the signedness of the two casts doesn't agree (i.e. one is a sext 1966 // and the other is a zext), then we can't handle this. 1967 if (CI->getOpcode() != LHSCI->getOpcode()) 1968 return nullptr; 1969 1970 // Deal with equality cases early. 1971 if (ICI.isEquality()) 1972 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); 1973 1974 // A signed comparison of sign extended values simplifies into a 1975 // signed comparison. 1976 if (isSignedCmp && isSignedExt) 1977 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); 1978 1979 // The other three cases all fold into an unsigned comparison. 1980 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); 1981 } 1982 1983 // If we aren't dealing with a constant on the RHS, exit early 1984 ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); 1985 if (!CI) 1986 return nullptr; 1987 1988 // Compute the constant that would happen if we truncated to SrcTy then 1989 // reextended to DestTy. 1990 Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); 1991 Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), 1992 Res1, DestTy); 1993 1994 // If the re-extended constant didn't change... 1995 if (Res2 == CI) { 1996 // Deal with equality cases early. 1997 if (ICI.isEquality()) 1998 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); 1999 2000 // A signed comparison of sign extended values simplifies into a 2001 // signed comparison. 2002 if (isSignedExt && isSignedCmp) 2003 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); 2004 2005 // The other three cases all fold into an unsigned comparison. 2006 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); 2007 } 2008 2009 // The re-extended constant changed so the constant cannot be represented 2010 // in the shorter type. Consequently, we cannot emit a simple comparison. 2011 // All the cases that fold to true or false will have already been handled 2012 // by SimplifyICmpInst, so only deal with the tricky case. 2013 2014 if (isSignedCmp || !isSignedExt) 2015 return nullptr; 2016 2017 // Evaluate the comparison for LT (we invert for GT below). LE and GE cases 2018 // should have been folded away previously and not enter in here. 2019 2020 // We're performing an unsigned comp with a sign extended value. 2021 // This is true if the input is >= 0. [aka >s -1] 2022 Constant *NegOne = Constant::getAllOnesValue(SrcTy); 2023 Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); 2024 2025 // Finally, return the value computed. 2026 if (ICI.getPredicate() == ICmpInst::ICMP_ULT) 2027 return ReplaceInstUsesWith(ICI, Result); 2028 2029 assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); 2030 return BinaryOperator::CreateNot(Result); 2031 } 2032 2033 /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: 2034 /// I = icmp ugt (add (add A, B), CI2), CI1 2035 /// If this is of the form: 2036 /// sum = a + b 2037 /// if (sum+128 >u 255) 2038 /// Then replace it with llvm.sadd.with.overflow.i8. 2039 /// 2040 static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, 2041 ConstantInt *CI2, ConstantInt *CI1, 2042 InstCombiner &IC) { 2043 // The transformation we're trying to do here is to transform this into an 2044 // llvm.sadd.with.overflow. To do this, we have to replace the original add 2045 // with a narrower add, and discard the add-with-constant that is part of the 2046 // range check (if we can't eliminate it, this isn't profitable). 2047 2048 // In order to eliminate the add-with-constant, the compare can be its only 2049 // use. 2050 Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); 2051 if (!AddWithCst->hasOneUse()) return nullptr; 2052 2053 // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. 2054 if (!CI2->getValue().isPowerOf2()) return nullptr; 2055 unsigned NewWidth = CI2->getValue().countTrailingZeros(); 2056 if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; 2057 2058 // The width of the new add formed is 1 more than the bias. 2059 ++NewWidth; 2060 2061 // Check to see that CI1 is an all-ones value with NewWidth bits. 2062 if (CI1->getBitWidth() == NewWidth || 2063 CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) 2064 return nullptr; 2065 2066 // This is only really a signed overflow check if the inputs have been 2067 // sign-extended; check for that condition. For example, if CI2 is 2^31 and 2068 // the operands of the add are 64 bits wide, we need at least 33 sign bits. 2069 unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; 2070 if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || 2071 IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) 2072 return nullptr; 2073 2074 // In order to replace the original add with a narrower 2075 // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant 2076 // and truncates that discard the high bits of the add. Verify that this is 2077 // the case. 2078 Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); 2079 for (User *U : OrigAdd->users()) { 2080 if (U == AddWithCst) continue; 2081 2082 // Only accept truncates for now. We would really like a nice recursive 2083 // predicate like SimplifyDemandedBits, but which goes downwards the use-def 2084 // chain to see which bits of a value are actually demanded. If the 2085 // original add had another add which was then immediately truncated, we 2086 // could still do the transformation. 2087 TruncInst *TI = dyn_cast<TruncInst>(U); 2088 if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) 2089 return nullptr; 2090 } 2091 2092 // If the pattern matches, truncate the inputs to the narrower type and 2093 // use the sadd_with_overflow intrinsic to efficiently compute both the 2094 // result and the overflow bit. 2095 Module *M = I.getParent()->getParent()->getParent(); 2096 2097 Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); 2098 Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, 2099 NewType); 2100 2101 InstCombiner::BuilderTy *Builder = IC.Builder; 2102 2103 // Put the new code above the original add, in case there are any uses of the 2104 // add between the add and the compare. 2105 Builder->SetInsertPoint(OrigAdd); 2106 2107 Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); 2108 Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); 2109 CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); 2110 Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); 2111 Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); 2112 2113 // The inner add was the result of the narrow add, zero extended to the 2114 // wider type. Replace it with the result computed by the intrinsic. 2115 IC.ReplaceInstUsesWith(*OrigAdd, ZExt); 2116 2117 // The original icmp gets replaced with the overflow value. 2118 return ExtractValueInst::Create(Call, 1, "sadd.overflow"); 2119 } 2120 2121 static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, 2122 InstCombiner &IC) { 2123 // Don't bother doing this transformation for pointers, don't do it for 2124 // vectors. 2125 if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; 2126 2127 // If the add is a constant expr, then we don't bother transforming it. 2128 Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); 2129 if (!OrigAdd) return nullptr; 2130 2131 Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); 2132 2133 // Put the new code above the original add, in case there are any uses of the 2134 // add between the add and the compare. 2135 InstCombiner::BuilderTy *Builder = IC.Builder; 2136 Builder->SetInsertPoint(OrigAdd); 2137 2138 Module *M = I.getParent()->getParent()->getParent(); 2139 Type *Ty = LHS->getType(); 2140 Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); 2141 CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); 2142 Value *Add = Builder->CreateExtractValue(Call, 0); 2143 2144 IC.ReplaceInstUsesWith(*OrigAdd, Add); 2145 2146 // The original icmp gets replaced with the overflow value. 2147 return ExtractValueInst::Create(Call, 1, "uadd.overflow"); 2148 } 2149 2150 /// \brief Recognize and process idiom involving test for multiplication 2151 /// overflow. 2152 /// 2153 /// The caller has matched a pattern of the form: 2154 /// I = cmp u (mul(zext A, zext B), V 2155 /// The function checks if this is a test for overflow and if so replaces 2156 /// multiplication with call to 'mul.with.overflow' intrinsic. 2157 /// 2158 /// \param I Compare instruction. 2159 /// \param MulVal Result of 'mult' instruction. It is one of the arguments of 2160 /// the compare instruction. Must be of integer type. 2161 /// \param OtherVal The other argument of compare instruction. 2162 /// \returns Instruction which must replace the compare instruction, NULL if no 2163 /// replacement required. 2164 static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, 2165 Value *OtherVal, InstCombiner &IC) { 2166 // Don't bother doing this transformation for pointers, don't do it for 2167 // vectors. 2168 if (!isa<IntegerType>(MulVal->getType())) 2169 return nullptr; 2170 2171 assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); 2172 assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); 2173 Instruction *MulInstr = cast<Instruction>(MulVal); 2174 assert(MulInstr->getOpcode() == Instruction::Mul); 2175 2176 auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)), 2177 *RHS = cast<ZExtOperator>(MulInstr->getOperand(1)); 2178 assert(LHS->getOpcode() == Instruction::ZExt); 2179 assert(RHS->getOpcode() == Instruction::ZExt); 2180 Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); 2181 2182 // Calculate type and width of the result produced by mul.with.overflow. 2183 Type *TyA = A->getType(), *TyB = B->getType(); 2184 unsigned WidthA = TyA->getPrimitiveSizeInBits(), 2185 WidthB = TyB->getPrimitiveSizeInBits(); 2186 unsigned MulWidth; 2187 Type *MulType; 2188 if (WidthB > WidthA) { 2189 MulWidth = WidthB; 2190 MulType = TyB; 2191 } else { 2192 MulWidth = WidthA; 2193 MulType = TyA; 2194 } 2195 2196 // In order to replace the original mul with a narrower mul.with.overflow, 2197 // all uses must ignore upper bits of the product. The number of used low 2198 // bits must be not greater than the width of mul.with.overflow. 2199 if (MulVal->hasNUsesOrMore(2)) 2200 for (User *U : MulVal->users()) { 2201 if (U == &I) 2202 continue; 2203 if (TruncInst *TI = dyn_cast<TruncInst>(U)) { 2204 // Check if truncation ignores bits above MulWidth. 2205 unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); 2206 if (TruncWidth > MulWidth) 2207 return nullptr; 2208 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { 2209 // Check if AND ignores bits above MulWidth. 2210 if (BO->getOpcode() != Instruction::And) 2211 return nullptr; 2212 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { 2213 const APInt &CVal = CI->getValue(); 2214 if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) 2215 return nullptr; 2216 } 2217 } else { 2218 // Other uses prohibit this transformation. 2219 return nullptr; 2220 } 2221 } 2222 2223 // Recognize patterns 2224 switch (I.getPredicate()) { 2225 case ICmpInst::ICMP_EQ: 2226 case ICmpInst::ICMP_NE: 2227 // Recognize pattern: 2228 // mulval = mul(zext A, zext B) 2229 // cmp eq/neq mulval, zext trunc mulval 2230 if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) 2231 if (Zext->hasOneUse()) { 2232 Value *ZextArg = Zext->getOperand(0); 2233 if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) 2234 if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) 2235 break; //Recognized 2236 } 2237 2238 // Recognize pattern: 2239 // mulval = mul(zext A, zext B) 2240 // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. 2241 ConstantInt *CI; 2242 Value *ValToMask; 2243 if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { 2244 if (ValToMask != MulVal) 2245 return nullptr; 2246 const APInt &CVal = CI->getValue() + 1; 2247 if (CVal.isPowerOf2()) { 2248 unsigned MaskWidth = CVal.logBase2(); 2249 if (MaskWidth == MulWidth) 2250 break; // Recognized 2251 } 2252 } 2253 return nullptr; 2254 2255 case ICmpInst::ICMP_UGT: 2256 // Recognize pattern: 2257 // mulval = mul(zext A, zext B) 2258 // cmp ugt mulval, max 2259 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { 2260 APInt MaxVal = APInt::getMaxValue(MulWidth); 2261 MaxVal = MaxVal.zext(CI->getBitWidth()); 2262 if (MaxVal.eq(CI->getValue())) 2263 break; // Recognized 2264 } 2265 return nullptr; 2266 2267 case ICmpInst::ICMP_UGE: 2268 // Recognize pattern: 2269 // mulval = mul(zext A, zext B) 2270 // cmp uge mulval, max+1 2271 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { 2272 APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); 2273 if (MaxVal.eq(CI->getValue())) 2274 break; // Recognized 2275 } 2276 return nullptr; 2277 2278 case ICmpInst::ICMP_ULE: 2279 // Recognize pattern: 2280 // mulval = mul(zext A, zext B) 2281 // cmp ule mulval, max 2282 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { 2283 APInt MaxVal = APInt::getMaxValue(MulWidth); 2284 MaxVal = MaxVal.zext(CI->getBitWidth()); 2285 if (MaxVal.eq(CI->getValue())) 2286 break; // Recognized 2287 } 2288 return nullptr; 2289 2290 case ICmpInst::ICMP_ULT: 2291 // Recognize pattern: 2292 // mulval = mul(zext A, zext B) 2293 // cmp ule mulval, max + 1 2294 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { 2295 APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); 2296 if (MaxVal.eq(CI->getValue())) 2297 break; // Recognized 2298 } 2299 return nullptr; 2300 2301 default: 2302 return nullptr; 2303 } 2304 2305 InstCombiner::BuilderTy *Builder = IC.Builder; 2306 Builder->SetInsertPoint(MulInstr); 2307 Module *M = I.getParent()->getParent()->getParent(); 2308 2309 // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) 2310 Value *MulA = A, *MulB = B; 2311 if (WidthA < MulWidth) 2312 MulA = Builder->CreateZExt(A, MulType); 2313 if (WidthB < MulWidth) 2314 MulB = Builder->CreateZExt(B, MulType); 2315 Value *F = 2316 Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); 2317 CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); 2318 IC.Worklist.Add(MulInstr); 2319 2320 // If there are uses of mul result other than the comparison, we know that 2321 // they are truncation or binary AND. Change them to use result of 2322 // mul.with.overflow and adjust properly mask/size. 2323 if (MulVal->hasNUsesOrMore(2)) { 2324 Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); 2325 for (User *U : MulVal->users()) { 2326 if (U == &I || U == OtherVal) 2327 continue; 2328 if (TruncInst *TI = dyn_cast<TruncInst>(U)) { 2329 if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) 2330 IC.ReplaceInstUsesWith(*TI, Mul); 2331 else 2332 TI->setOperand(0, Mul); 2333 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { 2334 assert(BO->getOpcode() == Instruction::And); 2335 // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) 2336 ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); 2337 APInt ShortMask = CI->getValue().trunc(MulWidth); 2338 Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); 2339 Instruction *Zext = 2340 cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); 2341 IC.Worklist.Add(Zext); 2342 IC.ReplaceInstUsesWith(*BO, Zext); 2343 } else { 2344 llvm_unreachable("Unexpected Binary operation"); 2345 } 2346 IC.Worklist.Add(cast<Instruction>(U)); 2347 } 2348 } 2349 if (isa<Instruction>(OtherVal)) 2350 IC.Worklist.Add(cast<Instruction>(OtherVal)); 2351 2352 // The original icmp gets replaced with the overflow value, maybe inverted 2353 // depending on predicate. 2354 bool Inverse = false; 2355 switch (I.getPredicate()) { 2356 case ICmpInst::ICMP_NE: 2357 break; 2358 case ICmpInst::ICMP_EQ: 2359 Inverse = true; 2360 break; 2361 case ICmpInst::ICMP_UGT: 2362 case ICmpInst::ICMP_UGE: 2363 if (I.getOperand(0) == MulVal) 2364 break; 2365 Inverse = true; 2366 break; 2367 case ICmpInst::ICMP_ULT: 2368 case ICmpInst::ICMP_ULE: 2369 if (I.getOperand(1) == MulVal) 2370 break; 2371 Inverse = true; 2372 break; 2373 default: 2374 llvm_unreachable("Unexpected predicate"); 2375 } 2376 if (Inverse) { 2377 Value *Res = Builder->CreateExtractValue(Call, 1); 2378 return BinaryOperator::CreateNot(Res); 2379 } 2380 2381 return ExtractValueInst::Create(Call, 1); 2382 } 2383 2384 // DemandedBitsLHSMask - When performing a comparison against a constant, 2385 // it is possible that not all the bits in the LHS are demanded. This helper 2386 // method computes the mask that IS demanded. 2387 static APInt DemandedBitsLHSMask(ICmpInst &I, 2388 unsigned BitWidth, bool isSignCheck) { 2389 if (isSignCheck) 2390 return APInt::getSignBit(BitWidth); 2391 2392 ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); 2393 if (!CI) return APInt::getAllOnesValue(BitWidth); 2394 const APInt &RHS = CI->getValue(); 2395 2396 switch (I.getPredicate()) { 2397 // For a UGT comparison, we don't care about any bits that 2398 // correspond to the trailing ones of the comparand. The value of these 2399 // bits doesn't impact the outcome of the comparison, because any value 2400 // greater than the RHS must differ in a bit higher than these due to carry. 2401 case ICmpInst::ICMP_UGT: { 2402 unsigned trailingOnes = RHS.countTrailingOnes(); 2403 APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); 2404 return ~lowBitsSet; 2405 } 2406 2407 // Similarly, for a ULT comparison, we don't care about the trailing zeros. 2408 // Any value less than the RHS must differ in a higher bit because of carries. 2409 case ICmpInst::ICMP_ULT: { 2410 unsigned trailingZeros = RHS.countTrailingZeros(); 2411 APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); 2412 return ~lowBitsSet; 2413 } 2414 2415 default: 2416 return APInt::getAllOnesValue(BitWidth); 2417 } 2418 2419 } 2420 2421 /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst 2422 /// should be swapped. 2423 /// The decision is based on how many times these two operands are reused 2424 /// as subtract operands and their positions in those instructions. 2425 /// The rational is that several architectures use the same instruction for 2426 /// both subtract and cmp, thus it is better if the order of those operands 2427 /// match. 2428 /// \return true if Op0 and Op1 should be swapped. 2429 static bool swapMayExposeCSEOpportunities(const Value * Op0, 2430 const Value * Op1) { 2431 // Filter out pointer value as those cannot appears directly in subtract. 2432 // FIXME: we may want to go through inttoptrs or bitcasts. 2433 if (Op0->getType()->isPointerTy()) 2434 return false; 2435 // Count every uses of both Op0 and Op1 in a subtract. 2436 // Each time Op0 is the first operand, count -1: swapping is bad, the 2437 // subtract has already the same layout as the compare. 2438 // Each time Op0 is the second operand, count +1: swapping is good, the 2439 // subtract has a different layout as the compare. 2440 // At the end, if the benefit is greater than 0, Op0 should come second to 2441 // expose more CSE opportunities. 2442 int GlobalSwapBenefits = 0; 2443 for (const User *U : Op0->users()) { 2444 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U); 2445 if (!BinOp || BinOp->getOpcode() != Instruction::Sub) 2446 continue; 2447 // If Op0 is the first argument, this is not beneficial to swap the 2448 // arguments. 2449 int LocalSwapBenefits = -1; 2450 unsigned Op1Idx = 1; 2451 if (BinOp->getOperand(Op1Idx) == Op0) { 2452 Op1Idx = 0; 2453 LocalSwapBenefits = 1; 2454 } 2455 if (BinOp->getOperand(Op1Idx) != Op1) 2456 continue; 2457 GlobalSwapBenefits += LocalSwapBenefits; 2458 } 2459 return GlobalSwapBenefits > 0; 2460 } 2461 2462 /// \brief Check that one use is in the same block as the definition and all 2463 /// other uses are in blocks dominated by a given block 2464 /// 2465 /// \param DI Definition 2466 /// \param UI Use 2467 /// \param DB Block that must dominate all uses of \p DI outside 2468 /// the parent block 2469 /// \return true when \p UI is the only use of \p DI in the parent block 2470 /// and all other uses of \p DI are in blocks dominated by \p DB. 2471 /// 2472 bool InstCombiner::dominatesAllUses(const Instruction *DI, 2473 const Instruction *UI, 2474 const BasicBlock *DB) const { 2475 assert(DI && UI && "Instruction not defined\n"); 2476 // ignore incomplete definitions 2477 if (!DI->getParent()) 2478 return false; 2479 // DI and UI must be in the same block 2480 if (DI->getParent() != UI->getParent()) 2481 return false; 2482 // Protect from self-referencing blocks 2483 if (DI->getParent() == DB) 2484 return false; 2485 // DominatorTree available? 2486 if (!DT) 2487 return false; 2488 for (const User *U : DI->users()) { 2489 auto *Usr = cast<Instruction>(U); 2490 if (Usr != UI && !DT->dominates(DB, Usr->getParent())) 2491 return false; 2492 } 2493 return true; 2494 } 2495 2496 /// 2497 /// true when the instruction sequence within a block is select-cmp-br. 2498 /// 2499 static bool isChainSelectCmpBranch(const SelectInst *SI) { 2500 const BasicBlock *BB = SI->getParent(); 2501 if (!BB) 2502 return false; 2503 auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator()); 2504 if (!BI || BI->getNumSuccessors() != 2) 2505 return false; 2506 auto *IC = dyn_cast<ICmpInst>(BI->getCondition()); 2507 if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) 2508 return false; 2509 return true; 2510 } 2511 2512 /// 2513 /// \brief True when a select result is replaced by one of its operands 2514 /// in select-icmp sequence. This will eventually result in the elimination 2515 /// of the select. 2516 /// 2517 /// \param SI Select instruction 2518 /// \param Icmp Compare instruction 2519 /// \param SIOpd Operand that replaces the select 2520 /// 2521 /// Notes: 2522 /// - The replacement is global and requires dominator information 2523 /// - The caller is responsible for the actual replacement 2524 /// 2525 /// Example: 2526 /// 2527 /// entry: 2528 /// %4 = select i1 %3, %C* %0, %C* null 2529 /// %5 = icmp eq %C* %4, null 2530 /// br i1 %5, label %9, label %7 2531 /// ... 2532 /// ; <label>:7 ; preds = %entry 2533 /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0 2534 /// ... 2535 /// 2536 /// can be transformed to 2537 /// 2538 /// %5 = icmp eq %C* %0, null 2539 /// %6 = select i1 %3, i1 %5, i1 true 2540 /// br i1 %6, label %9, label %7 2541 /// ... 2542 /// ; <label>:7 ; preds = %entry 2543 /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0! 2544 /// 2545 /// Similar when the first operand of the select is a constant or/and 2546 /// the compare is for not equal rather than equal. 2547 /// 2548 /// NOTE: The function is only called when the select and compare constants 2549 /// are equal, the optimization can work only for EQ predicates. This is not a 2550 /// major restriction since a NE compare should be 'normalized' to an equal 2551 /// compare, which usually happens in the combiner and test case 2552 /// select-cmp-br.ll 2553 /// checks for it. 2554 bool InstCombiner::replacedSelectWithOperand(SelectInst *SI, 2555 const ICmpInst *Icmp, 2556 const unsigned SIOpd) { 2557 assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!"); 2558 if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) { 2559 BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); 2560 // The check for the unique predecessor is not the best that can be 2561 // done. But it protects efficiently against cases like when SI's 2562 // home block has two successors, Succ and Succ1, and Succ1 predecessor 2563 // of Succ. Then SI can't be replaced by SIOpd because the use that gets 2564 // replaced can be reached on either path. So the uniqueness check 2565 // guarantees that the path all uses of SI (outside SI's parent) are on 2566 // is disjoint from all other paths out of SI. But that information 2567 // is more expensive to compute, and the trade-off here is in favor 2568 // of compile-time. 2569 if (Succ->getUniquePredecessor() && dominatesAllUses(SI, Icmp, Succ)) { 2570 NumSel++; 2571 SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent()); 2572 return true; 2573 } 2574 } 2575 return false; 2576 } 2577 2578 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { 2579 bool Changed = false; 2580 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2581 unsigned Op0Cplxity = getComplexity(Op0); 2582 unsigned Op1Cplxity = getComplexity(Op1); 2583 2584 /// Orders the operands of the compare so that they are listed from most 2585 /// complex to least complex. This puts constants before unary operators, 2586 /// before binary operators. 2587 if (Op0Cplxity < Op1Cplxity || 2588 (Op0Cplxity == Op1Cplxity && 2589 swapMayExposeCSEOpportunities(Op0, Op1))) { 2590 I.swapOperands(); 2591 std::swap(Op0, Op1); 2592 Changed = true; 2593 } 2594 2595 if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC)) 2596 return ReplaceInstUsesWith(I, V); 2597 2598 // comparing -val or val with non-zero is the same as just comparing val 2599 // ie, abs(val) != 0 -> val != 0 2600 if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) 2601 { 2602 Value *Cond, *SelectTrue, *SelectFalse; 2603 if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), 2604 m_Value(SelectFalse)))) { 2605 if (Value *V = dyn_castNegVal(SelectTrue)) { 2606 if (V == SelectFalse) 2607 return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); 2608 } 2609 else if (Value *V = dyn_castNegVal(SelectFalse)) { 2610 if (V == SelectTrue) 2611 return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); 2612 } 2613 } 2614 } 2615 2616 Type *Ty = Op0->getType(); 2617 2618 // icmp's with boolean values can always be turned into bitwise operations 2619 if (Ty->isIntegerTy(1)) { 2620 switch (I.getPredicate()) { 2621 default: llvm_unreachable("Invalid icmp instruction!"); 2622 case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) 2623 Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); 2624 return BinaryOperator::CreateNot(Xor); 2625 } 2626 case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B 2627 return BinaryOperator::CreateXor(Op0, Op1); 2628 2629 case ICmpInst::ICMP_UGT: 2630 std::swap(Op0, Op1); // Change icmp ugt -> icmp ult 2631 // FALL THROUGH 2632 case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B 2633 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); 2634 return BinaryOperator::CreateAnd(Not, Op1); 2635 } 2636 case ICmpInst::ICMP_SGT: 2637 std::swap(Op0, Op1); // Change icmp sgt -> icmp slt 2638 // FALL THROUGH 2639 case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B 2640 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); 2641 return BinaryOperator::CreateAnd(Not, Op0); 2642 } 2643 case ICmpInst::ICMP_UGE: 2644 std::swap(Op0, Op1); // Change icmp uge -> icmp ule 2645 // FALL THROUGH 2646 case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B 2647 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); 2648 return BinaryOperator::CreateOr(Not, Op1); 2649 } 2650 case ICmpInst::ICMP_SGE: 2651 std::swap(Op0, Op1); // Change icmp sge -> icmp sle 2652 // FALL THROUGH 2653 case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B 2654 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); 2655 return BinaryOperator::CreateOr(Not, Op0); 2656 } 2657 } 2658 } 2659 2660 unsigned BitWidth = 0; 2661 if (Ty->isIntOrIntVectorTy()) 2662 BitWidth = Ty->getScalarSizeInBits(); 2663 else if (DL) // Pointers require DL info to get their size. 2664 BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); 2665 2666 bool isSignBit = false; 2667 2668 // See if we are doing a comparison with a constant. 2669 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 2670 Value *A = nullptr, *B = nullptr; 2671 2672 // Match the following pattern, which is a common idiom when writing 2673 // overflow-safe integer arithmetic function. The source performs an 2674 // addition in wider type, and explicitly checks for overflow using 2675 // comparisons against INT_MIN and INT_MAX. Simplify this by using the 2676 // sadd_with_overflow intrinsic. 2677 // 2678 // TODO: This could probably be generalized to handle other overflow-safe 2679 // operations if we worked out the formulas to compute the appropriate 2680 // magic constants. 2681 // 2682 // sum = a + b 2683 // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 2684 { 2685 ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI 2686 if (I.getPredicate() == ICmpInst::ICMP_UGT && 2687 match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) 2688 if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) 2689 return Res; 2690 } 2691 2692 // The following transforms are only 'worth it' if the only user of the 2693 // subtraction is the icmp. 2694 if (Op0->hasOneUse()) { 2695 // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) 2696 if (I.isEquality() && CI->isZero() && 2697 match(Op0, m_Sub(m_Value(A), m_Value(B)))) 2698 return new ICmpInst(I.getPredicate(), A, B); 2699 2700 // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B) 2701 if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() && 2702 match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) 2703 return new ICmpInst(ICmpInst::ICMP_SGE, A, B); 2704 2705 // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B) 2706 if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() && 2707 match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) 2708 return new ICmpInst(ICmpInst::ICMP_SGT, A, B); 2709 2710 // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B) 2711 if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() && 2712 match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) 2713 return new ICmpInst(ICmpInst::ICMP_SLT, A, B); 2714 2715 // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B) 2716 if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() && 2717 match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) 2718 return new ICmpInst(ICmpInst::ICMP_SLE, A, B); 2719 } 2720 2721 // If we have an icmp le or icmp ge instruction, turn it into the 2722 // appropriate icmp lt or icmp gt instruction. This allows us to rely on 2723 // them being folded in the code below. The SimplifyICmpInst code has 2724 // already handled the edge cases for us, so we just assert on them. 2725 switch (I.getPredicate()) { 2726 default: break; 2727 case ICmpInst::ICMP_ULE: 2728 assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE 2729 return new ICmpInst(ICmpInst::ICMP_ULT, Op0, 2730 Builder->getInt(CI->getValue()+1)); 2731 case ICmpInst::ICMP_SLE: 2732 assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE 2733 return new ICmpInst(ICmpInst::ICMP_SLT, Op0, 2734 Builder->getInt(CI->getValue()+1)); 2735 case ICmpInst::ICMP_UGE: 2736 assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE 2737 return new ICmpInst(ICmpInst::ICMP_UGT, Op0, 2738 Builder->getInt(CI->getValue()-1)); 2739 case ICmpInst::ICMP_SGE: 2740 assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE 2741 return new ICmpInst(ICmpInst::ICMP_SGT, Op0, 2742 Builder->getInt(CI->getValue()-1)); 2743 } 2744 2745 if (I.isEquality()) { 2746 ConstantInt *CI2; 2747 if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) || 2748 match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { 2749 // (icmp eq/ne (ashr/lshr const2, A), const1) 2750 if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2)) 2751 return Inst; 2752 } 2753 if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) { 2754 // (icmp eq/ne (shl const2, A), const1) 2755 if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2)) 2756 return Inst; 2757 } 2758 } 2759 2760 // If this comparison is a normal comparison, it demands all 2761 // bits, if it is a sign bit comparison, it only demands the sign bit. 2762 bool UnusedBit; 2763 isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); 2764 } 2765 2766 // See if we can fold the comparison based on range information we can get 2767 // by checking whether bits are known to be zero or one in the input. 2768 if (BitWidth != 0) { 2769 APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); 2770 APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); 2771 2772 if (SimplifyDemandedBits(I.getOperandUse(0), 2773 DemandedBitsLHSMask(I, BitWidth, isSignBit), 2774 Op0KnownZero, Op0KnownOne, 0)) 2775 return &I; 2776 if (SimplifyDemandedBits(I.getOperandUse(1), 2777 APInt::getAllOnesValue(BitWidth), 2778 Op1KnownZero, Op1KnownOne, 0)) 2779 return &I; 2780 2781 // Given the known and unknown bits, compute a range that the LHS could be 2782 // in. Compute the Min, Max and RHS values based on the known bits. For the 2783 // EQ and NE we use unsigned values. 2784 APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); 2785 APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); 2786 if (I.isSigned()) { 2787 ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, 2788 Op0Min, Op0Max); 2789 ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, 2790 Op1Min, Op1Max); 2791 } else { 2792 ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, 2793 Op0Min, Op0Max); 2794 ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, 2795 Op1Min, Op1Max); 2796 } 2797 2798 // If Min and Max are known to be the same, then SimplifyDemandedBits 2799 // figured out that the LHS is a constant. Just constant fold this now so 2800 // that code below can assume that Min != Max. 2801 if (!isa<Constant>(Op0) && Op0Min == Op0Max) 2802 return new ICmpInst(I.getPredicate(), 2803 ConstantInt::get(Op0->getType(), Op0Min), Op1); 2804 if (!isa<Constant>(Op1) && Op1Min == Op1Max) 2805 return new ICmpInst(I.getPredicate(), Op0, 2806 ConstantInt::get(Op1->getType(), Op1Min)); 2807 2808 // Based on the range information we know about the LHS, see if we can 2809 // simplify this comparison. For example, (x&4) < 8 is always true. 2810 switch (I.getPredicate()) { 2811 default: llvm_unreachable("Unknown icmp opcode!"); 2812 case ICmpInst::ICMP_EQ: { 2813 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) 2814 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2815 2816 // If all bits are known zero except for one, then we know at most one 2817 // bit is set. If the comparison is against zero, then this is a check 2818 // to see if *that* bit is set. 2819 APInt Op0KnownZeroInverted = ~Op0KnownZero; 2820 if (~Op1KnownZero == 0) { 2821 // If the LHS is an AND with the same constant, look through it. 2822 Value *LHS = nullptr; 2823 ConstantInt *LHSC = nullptr; 2824 if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || 2825 LHSC->getValue() != Op0KnownZeroInverted) 2826 LHS = Op0; 2827 2828 // If the LHS is 1 << x, and we know the result is a power of 2 like 8, 2829 // then turn "((1 << x)&8) == 0" into "x != 3". 2830 // or turn "((1 << x)&7) == 0" into "x > 2". 2831 Value *X = nullptr; 2832 if (match(LHS, m_Shl(m_One(), m_Value(X)))) { 2833 APInt ValToCheck = Op0KnownZeroInverted; 2834 if (ValToCheck.isPowerOf2()) { 2835 unsigned CmpVal = ValToCheck.countTrailingZeros(); 2836 return new ICmpInst(ICmpInst::ICMP_NE, X, 2837 ConstantInt::get(X->getType(), CmpVal)); 2838 } else if ((++ValToCheck).isPowerOf2()) { 2839 unsigned CmpVal = ValToCheck.countTrailingZeros() - 1; 2840 return new ICmpInst(ICmpInst::ICMP_UGT, X, 2841 ConstantInt::get(X->getType(), CmpVal)); 2842 } 2843 } 2844 2845 // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, 2846 // then turn "((8 >>u x)&1) == 0" into "x != 3". 2847 const APInt *CI; 2848 if (Op0KnownZeroInverted == 1 && 2849 match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) 2850 return new ICmpInst(ICmpInst::ICMP_NE, X, 2851 ConstantInt::get(X->getType(), 2852 CI->countTrailingZeros())); 2853 } 2854 2855 break; 2856 } 2857 case ICmpInst::ICMP_NE: { 2858 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) 2859 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2860 2861 // If all bits are known zero except for one, then we know at most one 2862 // bit is set. If the comparison is against zero, then this is a check 2863 // to see if *that* bit is set. 2864 APInt Op0KnownZeroInverted = ~Op0KnownZero; 2865 if (~Op1KnownZero == 0) { 2866 // If the LHS is an AND with the same constant, look through it. 2867 Value *LHS = nullptr; 2868 ConstantInt *LHSC = nullptr; 2869 if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || 2870 LHSC->getValue() != Op0KnownZeroInverted) 2871 LHS = Op0; 2872 2873 // If the LHS is 1 << x, and we know the result is a power of 2 like 8, 2874 // then turn "((1 << x)&8) != 0" into "x == 3". 2875 // or turn "((1 << x)&7) != 0" into "x < 3". 2876 Value *X = nullptr; 2877 if (match(LHS, m_Shl(m_One(), m_Value(X)))) { 2878 APInt ValToCheck = Op0KnownZeroInverted; 2879 if (ValToCheck.isPowerOf2()) { 2880 unsigned CmpVal = ValToCheck.countTrailingZeros(); 2881 return new ICmpInst(ICmpInst::ICMP_EQ, X, 2882 ConstantInt::get(X->getType(), CmpVal)); 2883 } else if ((++ValToCheck).isPowerOf2()) { 2884 unsigned CmpVal = ValToCheck.countTrailingZeros(); 2885 return new ICmpInst(ICmpInst::ICMP_ULT, X, 2886 ConstantInt::get(X->getType(), CmpVal)); 2887 } 2888 } 2889 2890 // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, 2891 // then turn "((8 >>u x)&1) != 0" into "x == 3". 2892 const APInt *CI; 2893 if (Op0KnownZeroInverted == 1 && 2894 match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) 2895 return new ICmpInst(ICmpInst::ICMP_EQ, X, 2896 ConstantInt::get(X->getType(), 2897 CI->countTrailingZeros())); 2898 } 2899 2900 break; 2901 } 2902 case ICmpInst::ICMP_ULT: 2903 if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) 2904 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2905 if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) 2906 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2907 if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) 2908 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 2909 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 2910 if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C 2911 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 2912 Builder->getInt(CI->getValue()-1)); 2913 2914 // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear 2915 if (CI->isMinValue(true)) 2916 return new ICmpInst(ICmpInst::ICMP_SGT, Op0, 2917 Constant::getAllOnesValue(Op0->getType())); 2918 } 2919 break; 2920 case ICmpInst::ICMP_UGT: 2921 if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) 2922 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2923 if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) 2924 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2925 2926 if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) 2927 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 2928 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 2929 if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C 2930 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 2931 Builder->getInt(CI->getValue()+1)); 2932 2933 // (x >u 2147483647) -> (x <s 0) -> true if sign bit set 2934 if (CI->isMaxValue(true)) 2935 return new ICmpInst(ICmpInst::ICMP_SLT, Op0, 2936 Constant::getNullValue(Op0->getType())); 2937 } 2938 break; 2939 case ICmpInst::ICMP_SLT: 2940 if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) 2941 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2942 if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) 2943 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2944 if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) 2945 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 2946 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 2947 if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C 2948 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 2949 Builder->getInt(CI->getValue()-1)); 2950 } 2951 break; 2952 case ICmpInst::ICMP_SGT: 2953 if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) 2954 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2955 if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) 2956 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2957 2958 if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) 2959 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 2960 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 2961 if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C 2962 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 2963 Builder->getInt(CI->getValue()+1)); 2964 } 2965 break; 2966 case ICmpInst::ICMP_SGE: 2967 assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); 2968 if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) 2969 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2970 if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) 2971 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2972 break; 2973 case ICmpInst::ICMP_SLE: 2974 assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); 2975 if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) 2976 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2977 if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) 2978 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2979 break; 2980 case ICmpInst::ICMP_UGE: 2981 assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); 2982 if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) 2983 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2984 if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) 2985 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2986 break; 2987 case ICmpInst::ICMP_ULE: 2988 assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); 2989 if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) 2990 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 2991 if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) 2992 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 2993 break; 2994 } 2995 2996 // Turn a signed comparison into an unsigned one if both operands 2997 // are known to have the same sign. 2998 if (I.isSigned() && 2999 ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || 3000 (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) 3001 return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); 3002 } 3003 3004 // Test if the ICmpInst instruction is used exclusively by a select as 3005 // part of a minimum or maximum operation. If so, refrain from doing 3006 // any other folding. This helps out other analyses which understand 3007 // non-obfuscated minimum and maximum idioms, such as ScalarEvolution 3008 // and CodeGen. And in this case, at least one of the comparison 3009 // operands has at least one user besides the compare (the select), 3010 // which would often largely negate the benefit of folding anyway. 3011 if (I.hasOneUse()) 3012 if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin())) 3013 if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || 3014 (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) 3015 return nullptr; 3016 3017 // See if we are doing a comparison between a constant and an instruction that 3018 // can be folded into the comparison. 3019 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 3020 // Since the RHS is a ConstantInt (CI), if the left hand side is an 3021 // instruction, see if that instruction also has constants so that the 3022 // instruction can be folded into the icmp 3023 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 3024 if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) 3025 return Res; 3026 } 3027 3028 // Handle icmp with constant (but not simple integer constant) RHS 3029 if (Constant *RHSC = dyn_cast<Constant>(Op1)) { 3030 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 3031 switch (LHSI->getOpcode()) { 3032 case Instruction::GetElementPtr: 3033 // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null 3034 if (RHSC->isNullValue() && 3035 cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) 3036 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), 3037 Constant::getNullValue(LHSI->getOperand(0)->getType())); 3038 break; 3039 case Instruction::PHI: 3040 // Only fold icmp into the PHI if the phi and icmp are in the same 3041 // block. If in the same block, we're encouraging jump threading. If 3042 // not, we are just pessimizing the code by making an i1 phi. 3043 if (LHSI->getParent() == I.getParent()) 3044 if (Instruction *NV = FoldOpIntoPhi(I)) 3045 return NV; 3046 break; 3047 case Instruction::Select: { 3048 // If either operand of the select is a constant, we can fold the 3049 // comparison into the select arms, which will cause one to be 3050 // constant folded and the select turned into a bitwise or. 3051 Value *Op1 = nullptr, *Op2 = nullptr; 3052 ConstantInt *CI = 0; 3053 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { 3054 Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); 3055 CI = dyn_cast<ConstantInt>(Op1); 3056 } 3057 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { 3058 Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); 3059 CI = dyn_cast<ConstantInt>(Op2); 3060 } 3061 3062 // We only want to perform this transformation if it will not lead to 3063 // additional code. This is true if either both sides of the select 3064 // fold to a constant (in which case the icmp is replaced with a select 3065 // which will usually simplify) or this is the only user of the 3066 // select (in which case we are trading a select+icmp for a simpler 3067 // select+icmp) or all uses of the select can be replaced based on 3068 // dominance information ("Global cases"). 3069 bool Transform = false; 3070 if (Op1 && Op2) 3071 Transform = true; 3072 else if (Op1 || Op2) { 3073 // Local case 3074 if (LHSI->hasOneUse()) 3075 Transform = true; 3076 // Global cases 3077 else if (CI && !CI->isZero()) 3078 // When Op1 is constant try replacing select with second operand. 3079 // Otherwise Op2 is constant and try replacing select with first 3080 // operand. 3081 Transform = replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, 3082 Op1 ? 2 : 1); 3083 } 3084 if (Transform) { 3085 if (!Op1) 3086 Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), 3087 RHSC, I.getName()); 3088 if (!Op2) 3089 Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), 3090 RHSC, I.getName()); 3091 return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); 3092 } 3093 break; 3094 } 3095 case Instruction::IntToPtr: 3096 // icmp pred inttoptr(X), null -> icmp pred X, 0 3097 if (RHSC->isNullValue() && DL && 3098 DL->getIntPtrType(RHSC->getType()) == 3099 LHSI->getOperand(0)->getType()) 3100 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), 3101 Constant::getNullValue(LHSI->getOperand(0)->getType())); 3102 break; 3103 3104 case Instruction::Load: 3105 // Try to optimize things like "A[i] > 4" to index computations. 3106 if (GetElementPtrInst *GEP = 3107 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { 3108 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 3109 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 3110 !cast<LoadInst>(LHSI)->isVolatile()) 3111 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) 3112 return Res; 3113 } 3114 break; 3115 } 3116 } 3117 3118 // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. 3119 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) 3120 if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) 3121 return NI; 3122 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) 3123 if (Instruction *NI = FoldGEPICmp(GEP, Op0, 3124 ICmpInst::getSwappedPredicate(I.getPredicate()), I)) 3125 return NI; 3126 3127 // Test to see if the operands of the icmp are casted versions of other 3128 // values. If the ptr->ptr cast can be stripped off both arguments, we do so 3129 // now. 3130 if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { 3131 if (Op0->getType()->isPointerTy() && 3132 (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 3133 // We keep moving the cast from the left operand over to the right 3134 // operand, where it can often be eliminated completely. 3135 Op0 = CI->getOperand(0); 3136 3137 // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast 3138 // so eliminate it as well. 3139 if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) 3140 Op1 = CI2->getOperand(0); 3141 3142 // If Op1 is a constant, we can fold the cast into the constant. 3143 if (Op0->getType() != Op1->getType()) { 3144 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 3145 Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); 3146 } else { 3147 // Otherwise, cast the RHS right before the icmp 3148 Op1 = Builder->CreateBitCast(Op1, Op0->getType()); 3149 } 3150 } 3151 return new ICmpInst(I.getPredicate(), Op0, Op1); 3152 } 3153 } 3154 3155 if (isa<CastInst>(Op0)) { 3156 // Handle the special case of: icmp (cast bool to X), <cst> 3157 // This comes up when you have code like 3158 // int X = A < B; 3159 // if (X) ... 3160 // For generality, we handle any zero-extension of any operand comparison 3161 // with a constant or another cast from the same type. 3162 if (isa<Constant>(Op1) || isa<CastInst>(Op1)) 3163 if (Instruction *R = visitICmpInstWithCastAndCast(I)) 3164 return R; 3165 } 3166 3167 // Special logic for binary operators. 3168 BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); 3169 BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); 3170 if (BO0 || BO1) { 3171 CmpInst::Predicate Pred = I.getPredicate(); 3172 bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; 3173 if (BO0 && isa<OverflowingBinaryOperator>(BO0)) 3174 NoOp0WrapProblem = ICmpInst::isEquality(Pred) || 3175 (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || 3176 (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); 3177 if (BO1 && isa<OverflowingBinaryOperator>(BO1)) 3178 NoOp1WrapProblem = ICmpInst::isEquality(Pred) || 3179 (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || 3180 (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); 3181 3182 // Analyze the case when either Op0 or Op1 is an add instruction. 3183 // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). 3184 Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; 3185 if (BO0 && BO0->getOpcode() == Instruction::Add) 3186 A = BO0->getOperand(0), B = BO0->getOperand(1); 3187 if (BO1 && BO1->getOpcode() == Instruction::Add) 3188 C = BO1->getOperand(0), D = BO1->getOperand(1); 3189 3190 // icmp (X+cst) < 0 --> X < -cst 3191 if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero())) 3192 if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B)) 3193 if (!RHSC->isMinValue(/*isSigned=*/true)) 3194 return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC)); 3195 3196 // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. 3197 if ((A == Op1 || B == Op1) && NoOp0WrapProblem) 3198 return new ICmpInst(Pred, A == Op1 ? B : A, 3199 Constant::getNullValue(Op1->getType())); 3200 3201 // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. 3202 if ((C == Op0 || D == Op0) && NoOp1WrapProblem) 3203 return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), 3204 C == Op0 ? D : C); 3205 3206 // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. 3207 if (A && C && (A == C || A == D || B == C || B == D) && 3208 NoOp0WrapProblem && NoOp1WrapProblem && 3209 // Try not to increase register pressure. 3210 BO0->hasOneUse() && BO1->hasOneUse()) { 3211 // Determine Y and Z in the form icmp (X+Y), (X+Z). 3212 Value *Y, *Z; 3213 if (A == C) { 3214 // C + B == C + D -> B == D 3215 Y = B; 3216 Z = D; 3217 } else if (A == D) { 3218 // D + B == C + D -> B == C 3219 Y = B; 3220 Z = C; 3221 } else if (B == C) { 3222 // A + C == C + D -> A == D 3223 Y = A; 3224 Z = D; 3225 } else { 3226 assert(B == D); 3227 // A + D == C + D -> A == C 3228 Y = A; 3229 Z = C; 3230 } 3231 return new ICmpInst(Pred, Y, Z); 3232 } 3233 3234 // icmp slt (X + -1), Y -> icmp sle X, Y 3235 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && 3236 match(B, m_AllOnes())) 3237 return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); 3238 3239 // icmp sge (X + -1), Y -> icmp sgt X, Y 3240 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && 3241 match(B, m_AllOnes())) 3242 return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); 3243 3244 // icmp sle (X + 1), Y -> icmp slt X, Y 3245 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && 3246 match(B, m_One())) 3247 return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); 3248 3249 // icmp sgt (X + 1), Y -> icmp sge X, Y 3250 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && 3251 match(B, m_One())) 3252 return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); 3253 3254 // if C1 has greater magnitude than C2: 3255 // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y 3256 // s.t. C3 = C1 - C2 3257 // 3258 // if C2 has greater magnitude than C1: 3259 // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) 3260 // s.t. C3 = C2 - C1 3261 if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && 3262 (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) 3263 if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) 3264 if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { 3265 const APInt &AP1 = C1->getValue(); 3266 const APInt &AP2 = C2->getValue(); 3267 if (AP1.isNegative() == AP2.isNegative()) { 3268 APInt AP1Abs = C1->getValue().abs(); 3269 APInt AP2Abs = C2->getValue().abs(); 3270 if (AP1Abs.uge(AP2Abs)) { 3271 ConstantInt *C3 = Builder->getInt(AP1 - AP2); 3272 Value *NewAdd = Builder->CreateNSWAdd(A, C3); 3273 return new ICmpInst(Pred, NewAdd, C); 3274 } else { 3275 ConstantInt *C3 = Builder->getInt(AP2 - AP1); 3276 Value *NewAdd = Builder->CreateNSWAdd(C, C3); 3277 return new ICmpInst(Pred, A, NewAdd); 3278 } 3279 } 3280 } 3281 3282 3283 // Analyze the case when either Op0 or Op1 is a sub instruction. 3284 // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). 3285 A = nullptr; B = nullptr; C = nullptr; D = nullptr; 3286 if (BO0 && BO0->getOpcode() == Instruction::Sub) 3287 A = BO0->getOperand(0), B = BO0->getOperand(1); 3288 if (BO1 && BO1->getOpcode() == Instruction::Sub) 3289 C = BO1->getOperand(0), D = BO1->getOperand(1); 3290 3291 // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. 3292 if (A == Op1 && NoOp0WrapProblem) 3293 return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); 3294 3295 // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. 3296 if (C == Op0 && NoOp1WrapProblem) 3297 return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); 3298 3299 // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. 3300 if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && 3301 // Try not to increase register pressure. 3302 BO0->hasOneUse() && BO1->hasOneUse()) 3303 return new ICmpInst(Pred, A, C); 3304 3305 // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. 3306 if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && 3307 // Try not to increase register pressure. 3308 BO0->hasOneUse() && BO1->hasOneUse()) 3309 return new ICmpInst(Pred, D, B); 3310 3311 // icmp (0-X) < cst --> x > -cst 3312 if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { 3313 Value *X; 3314 if (match(BO0, m_Neg(m_Value(X)))) 3315 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) 3316 if (!RHSC->isMinValue(/*isSigned=*/true)) 3317 return new ICmpInst(I.getSwappedPredicate(), X, 3318 ConstantExpr::getNeg(RHSC)); 3319 } 3320 3321 BinaryOperator *SRem = nullptr; 3322 // icmp (srem X, Y), Y 3323 if (BO0 && BO0->getOpcode() == Instruction::SRem && 3324 Op1 == BO0->getOperand(1)) 3325 SRem = BO0; 3326 // icmp Y, (srem X, Y) 3327 else if (BO1 && BO1->getOpcode() == Instruction::SRem && 3328 Op0 == BO1->getOperand(1)) 3329 SRem = BO1; 3330 if (SRem) { 3331 // We don't check hasOneUse to avoid increasing register pressure because 3332 // the value we use is the same value this instruction was already using. 3333 switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { 3334 default: break; 3335 case ICmpInst::ICMP_EQ: 3336 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); 3337 case ICmpInst::ICMP_NE: 3338 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); 3339 case ICmpInst::ICMP_SGT: 3340 case ICmpInst::ICMP_SGE: 3341 return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), 3342 Constant::getAllOnesValue(SRem->getType())); 3343 case ICmpInst::ICMP_SLT: 3344 case ICmpInst::ICMP_SLE: 3345 return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), 3346 Constant::getNullValue(SRem->getType())); 3347 } 3348 } 3349 3350 if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && 3351 BO0->hasOneUse() && BO1->hasOneUse() && 3352 BO0->getOperand(1) == BO1->getOperand(1)) { 3353 switch (BO0->getOpcode()) { 3354 default: break; 3355 case Instruction::Add: 3356 case Instruction::Sub: 3357 case Instruction::Xor: 3358 if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b 3359 return new ICmpInst(I.getPredicate(), BO0->getOperand(0), 3360 BO1->getOperand(0)); 3361 // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b 3362 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { 3363 if (CI->getValue().isSignBit()) { 3364 ICmpInst::Predicate Pred = I.isSigned() 3365 ? I.getUnsignedPredicate() 3366 : I.getSignedPredicate(); 3367 return new ICmpInst(Pred, BO0->getOperand(0), 3368 BO1->getOperand(0)); 3369 } 3370 3371 if (CI->isMaxValue(true)) { 3372 ICmpInst::Predicate Pred = I.isSigned() 3373 ? I.getUnsignedPredicate() 3374 : I.getSignedPredicate(); 3375 Pred = I.getSwappedPredicate(Pred); 3376 return new ICmpInst(Pred, BO0->getOperand(0), 3377 BO1->getOperand(0)); 3378 } 3379 } 3380 break; 3381 case Instruction::Mul: 3382 if (!I.isEquality()) 3383 break; 3384 3385 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { 3386 // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask 3387 // Mask = -1 >> count-trailing-zeros(Cst). 3388 if (!CI->isZero() && !CI->isOne()) { 3389 const APInt &AP = CI->getValue(); 3390 ConstantInt *Mask = ConstantInt::get(I.getContext(), 3391 APInt::getLowBitsSet(AP.getBitWidth(), 3392 AP.getBitWidth() - 3393 AP.countTrailingZeros())); 3394 Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); 3395 Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); 3396 return new ICmpInst(I.getPredicate(), And1, And2); 3397 } 3398 } 3399 break; 3400 case Instruction::UDiv: 3401 case Instruction::LShr: 3402 if (I.isSigned()) 3403 break; 3404 // fall-through 3405 case Instruction::SDiv: 3406 case Instruction::AShr: 3407 if (!BO0->isExact() || !BO1->isExact()) 3408 break; 3409 return new ICmpInst(I.getPredicate(), BO0->getOperand(0), 3410 BO1->getOperand(0)); 3411 case Instruction::Shl: { 3412 bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); 3413 bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); 3414 if (!NUW && !NSW) 3415 break; 3416 if (!NSW && I.isSigned()) 3417 break; 3418 return new ICmpInst(I.getPredicate(), BO0->getOperand(0), 3419 BO1->getOperand(0)); 3420 } 3421 } 3422 } 3423 } 3424 3425 { Value *A, *B; 3426 // Transform (A & ~B) == 0 --> (A & B) != 0 3427 // and (A & ~B) != 0 --> (A & B) == 0 3428 // if A is a power of 2. 3429 if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && 3430 match(Op1, m_Zero()) && 3431 isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality()) 3432 return new ICmpInst(I.getInversePredicate(), 3433 Builder->CreateAnd(A, B), 3434 Op1); 3435 3436 // ~x < ~y --> y < x 3437 // ~x < cst --> ~cst < x 3438 if (match(Op0, m_Not(m_Value(A)))) { 3439 if (match(Op1, m_Not(m_Value(B)))) 3440 return new ICmpInst(I.getPredicate(), B, A); 3441 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) 3442 return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); 3443 } 3444 3445 // (a+b) <u a --> llvm.uadd.with.overflow. 3446 // (a+b) <u b --> llvm.uadd.with.overflow. 3447 if (I.getPredicate() == ICmpInst::ICMP_ULT && 3448 match(Op0, m_Add(m_Value(A), m_Value(B))) && 3449 (Op1 == A || Op1 == B)) 3450 if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) 3451 return R; 3452 3453 // a >u (a+b) --> llvm.uadd.with.overflow. 3454 // b >u (a+b) --> llvm.uadd.with.overflow. 3455 if (I.getPredicate() == ICmpInst::ICMP_UGT && 3456 match(Op1, m_Add(m_Value(A), m_Value(B))) && 3457 (Op0 == A || Op0 == B)) 3458 if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) 3459 return R; 3460 3461 // (zext a) * (zext b) --> llvm.umul.with.overflow. 3462 if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { 3463 if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) 3464 return R; 3465 } 3466 if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { 3467 if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) 3468 return R; 3469 } 3470 } 3471 3472 if (I.isEquality()) { 3473 Value *A, *B, *C, *D; 3474 3475 if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 3476 if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 3477 Value *OtherVal = A == Op1 ? B : A; 3478 return new ICmpInst(I.getPredicate(), OtherVal, 3479 Constant::getNullValue(A->getType())); 3480 } 3481 3482 if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { 3483 // A^c1 == C^c2 --> A == C^(c1^c2) 3484 ConstantInt *C1, *C2; 3485 if (match(B, m_ConstantInt(C1)) && 3486 match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { 3487 Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); 3488 Value *Xor = Builder->CreateXor(C, NC); 3489 return new ICmpInst(I.getPredicate(), A, Xor); 3490 } 3491 3492 // A^B == A^D -> B == D 3493 if (A == C) return new ICmpInst(I.getPredicate(), B, D); 3494 if (A == D) return new ICmpInst(I.getPredicate(), B, C); 3495 if (B == C) return new ICmpInst(I.getPredicate(), A, D); 3496 if (B == D) return new ICmpInst(I.getPredicate(), A, C); 3497 } 3498 } 3499 3500 if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && 3501 (A == Op0 || B == Op0)) { 3502 // A == (A^B) -> B == 0 3503 Value *OtherVal = A == Op0 ? B : A; 3504 return new ICmpInst(I.getPredicate(), OtherVal, 3505 Constant::getNullValue(A->getType())); 3506 } 3507 3508 // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 3509 if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && 3510 match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { 3511 Value *X = nullptr, *Y = nullptr, *Z = nullptr; 3512 3513 if (A == C) { 3514 X = B; Y = D; Z = A; 3515 } else if (A == D) { 3516 X = B; Y = C; Z = A; 3517 } else if (B == C) { 3518 X = A; Y = D; Z = B; 3519 } else if (B == D) { 3520 X = A; Y = C; Z = B; 3521 } 3522 3523 if (X) { // Build (X^Y) & Z 3524 Op1 = Builder->CreateXor(X, Y); 3525 Op1 = Builder->CreateAnd(Op1, Z); 3526 I.setOperand(0, Op1); 3527 I.setOperand(1, Constant::getNullValue(Op1->getType())); 3528 return &I; 3529 } 3530 } 3531 3532 // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) 3533 // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) 3534 ConstantInt *Cst1; 3535 if ((Op0->hasOneUse() && 3536 match(Op0, m_ZExt(m_Value(A))) && 3537 match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || 3538 (Op1->hasOneUse() && 3539 match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && 3540 match(Op1, m_ZExt(m_Value(A))))) { 3541 APInt Pow2 = Cst1->getValue() + 1; 3542 if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && 3543 Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) 3544 return new ICmpInst(I.getPredicate(), A, 3545 Builder->CreateTrunc(B, A->getType())); 3546 } 3547 3548 // (A >> C) == (B >> C) --> (A^B) u< (1 << C) 3549 // For lshr and ashr pairs. 3550 if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && 3551 match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || 3552 (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && 3553 match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { 3554 unsigned TypeBits = Cst1->getBitWidth(); 3555 unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); 3556 if (ShAmt < TypeBits && ShAmt != 0) { 3557 ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE 3558 ? ICmpInst::ICMP_UGE 3559 : ICmpInst::ICMP_ULT; 3560 Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); 3561 APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); 3562 return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); 3563 } 3564 } 3565 3566 // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to 3567 // "icmp (and X, mask), cst" 3568 uint64_t ShAmt = 0; 3569 if (Op0->hasOneUse() && 3570 match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), 3571 m_ConstantInt(ShAmt))))) && 3572 match(Op1, m_ConstantInt(Cst1)) && 3573 // Only do this when A has multiple uses. This is most important to do 3574 // when it exposes other optimizations. 3575 !A->hasOneUse()) { 3576 unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); 3577 3578 if (ShAmt < ASize) { 3579 APInt MaskV = 3580 APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); 3581 MaskV <<= ShAmt; 3582 3583 APInt CmpV = Cst1->getValue().zext(ASize); 3584 CmpV <<= ShAmt; 3585 3586 Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); 3587 return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); 3588 } 3589 } 3590 } 3591 3592 // The 'cmpxchg' instruction returns an aggregate containing the old value and 3593 // an i1 which indicates whether or not we successfully did the swap. 3594 // 3595 // Replace comparisons between the old value and the expected value with the 3596 // indicator that 'cmpxchg' returns. 3597 // 3598 // N.B. This transform is only valid when the 'cmpxchg' is not permitted to 3599 // spuriously fail. In those cases, the old value may equal the expected 3600 // value but it is possible for the swap to not occur. 3601 if (I.getPredicate() == ICmpInst::ICMP_EQ) 3602 if (auto *EVI = dyn_cast<ExtractValueInst>(Op0)) 3603 if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand())) 3604 if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 && 3605 !ACXI->isWeak()) 3606 return ExtractValueInst::Create(ACXI, 1); 3607 3608 { 3609 Value *X; ConstantInt *Cst; 3610 // icmp X+Cst, X 3611 if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) 3612 return FoldICmpAddOpCst(I, X, Cst, I.getPredicate()); 3613 3614 // icmp X, X+Cst 3615 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) 3616 return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); 3617 } 3618 return Changed ? &I : nullptr; 3619 } 3620 3621 /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. 3622 Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, 3623 Instruction *LHSI, 3624 Constant *RHSC) { 3625 if (!isa<ConstantFP>(RHSC)) return nullptr; 3626 const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); 3627 3628 // Get the width of the mantissa. We don't want to hack on conversions that 3629 // might lose information from the integer, e.g. "i64 -> float" 3630 int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); 3631 if (MantissaWidth == -1) return nullptr; // Unknown. 3632 3633 IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); 3634 3635 // Check to see that the input is converted from an integer type that is small 3636 // enough that preserves all bits. TODO: check here for "known" sign bits. 3637 // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. 3638 unsigned InputSize = IntTy->getScalarSizeInBits(); 3639 3640 // If this is a uitofp instruction, we need an extra bit to hold the sign. 3641 bool LHSUnsigned = isa<UIToFPInst>(LHSI); 3642 if (LHSUnsigned) 3643 ++InputSize; 3644 3645 if (I.isEquality()) { 3646 FCmpInst::Predicate P = I.getPredicate(); 3647 bool IsExact = false; 3648 APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned); 3649 RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact); 3650 3651 // If the floating point constant isn't an integer value, we know if we will 3652 // ever compare equal / not equal to it. 3653 if (!IsExact) { 3654 // TODO: Can never be -0.0 and other non-representable values 3655 APFloat RHSRoundInt(RHS); 3656 RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); 3657 if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { 3658 if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) 3659 return ReplaceInstUsesWith(I, Builder->getFalse()); 3660 3661 assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); 3662 return ReplaceInstUsesWith(I, Builder->getTrue()); 3663 } 3664 } 3665 3666 // TODO: If the constant is exactly representable, is it always OK to do 3667 // equality compares as integer? 3668 } 3669 3670 // Comparisons with zero are a special case where we know we won't lose 3671 // information. 3672 bool IsCmpZero = RHS.isPosZero(); 3673 3674 // If the conversion would lose info, don't hack on this. 3675 if ((int)InputSize > MantissaWidth && !IsCmpZero) 3676 return nullptr; 3677 3678 // Otherwise, we can potentially simplify the comparison. We know that it 3679 // will always come through as an integer value and we know the constant is 3680 // not a NAN (it would have been previously simplified). 3681 assert(!RHS.isNaN() && "NaN comparison not already folded!"); 3682 3683 ICmpInst::Predicate Pred; 3684 switch (I.getPredicate()) { 3685 default: llvm_unreachable("Unexpected predicate!"); 3686 case FCmpInst::FCMP_UEQ: 3687 case FCmpInst::FCMP_OEQ: 3688 Pred = ICmpInst::ICMP_EQ; 3689 break; 3690 case FCmpInst::FCMP_UGT: 3691 case FCmpInst::FCMP_OGT: 3692 Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; 3693 break; 3694 case FCmpInst::FCMP_UGE: 3695 case FCmpInst::FCMP_OGE: 3696 Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; 3697 break; 3698 case FCmpInst::FCMP_ULT: 3699 case FCmpInst::FCMP_OLT: 3700 Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; 3701 break; 3702 case FCmpInst::FCMP_ULE: 3703 case FCmpInst::FCMP_OLE: 3704 Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; 3705 break; 3706 case FCmpInst::FCMP_UNE: 3707 case FCmpInst::FCMP_ONE: 3708 Pred = ICmpInst::ICMP_NE; 3709 break; 3710 case FCmpInst::FCMP_ORD: 3711 return ReplaceInstUsesWith(I, Builder->getTrue()); 3712 case FCmpInst::FCMP_UNO: 3713 return ReplaceInstUsesWith(I, Builder->getFalse()); 3714 } 3715 3716 // Now we know that the APFloat is a normal number, zero or inf. 3717 3718 // See if the FP constant is too large for the integer. For example, 3719 // comparing an i8 to 300.0. 3720 unsigned IntWidth = IntTy->getScalarSizeInBits(); 3721 3722 if (!LHSUnsigned) { 3723 // If the RHS value is > SignedMax, fold the comparison. This handles +INF 3724 // and large values. 3725 APFloat SMax(RHS.getSemantics()); 3726 SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, 3727 APFloat::rmNearestTiesToEven); 3728 if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 3729 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || 3730 Pred == ICmpInst::ICMP_SLE) 3731 return ReplaceInstUsesWith(I, Builder->getTrue()); 3732 return ReplaceInstUsesWith(I, Builder->getFalse()); 3733 } 3734 } else { 3735 // If the RHS value is > UnsignedMax, fold the comparison. This handles 3736 // +INF and large values. 3737 APFloat UMax(RHS.getSemantics()); 3738 UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, 3739 APFloat::rmNearestTiesToEven); 3740 if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 3741 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || 3742 Pred == ICmpInst::ICMP_ULE) 3743 return ReplaceInstUsesWith(I, Builder->getTrue()); 3744 return ReplaceInstUsesWith(I, Builder->getFalse()); 3745 } 3746 } 3747 3748 if (!LHSUnsigned) { 3749 // See if the RHS value is < SignedMin. 3750 APFloat SMin(RHS.getSemantics()); 3751 SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, 3752 APFloat::rmNearestTiesToEven); 3753 if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 3754 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || 3755 Pred == ICmpInst::ICMP_SGE) 3756 return ReplaceInstUsesWith(I, Builder->getTrue()); 3757 return ReplaceInstUsesWith(I, Builder->getFalse()); 3758 } 3759 } else { 3760 // See if the RHS value is < UnsignedMin. 3761 APFloat SMin(RHS.getSemantics()); 3762 SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, 3763 APFloat::rmNearestTiesToEven); 3764 if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 3765 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || 3766 Pred == ICmpInst::ICMP_UGE) 3767 return ReplaceInstUsesWith(I, Builder->getTrue()); 3768 return ReplaceInstUsesWith(I, Builder->getFalse()); 3769 } 3770 } 3771 3772 // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or 3773 // [0, UMAX], but it may still be fractional. See if it is fractional by 3774 // casting the FP value to the integer value and back, checking for equality. 3775 // Don't do this for zero, because -0.0 is not fractional. 3776 Constant *RHSInt = LHSUnsigned 3777 ? ConstantExpr::getFPToUI(RHSC, IntTy) 3778 : ConstantExpr::getFPToSI(RHSC, IntTy); 3779 if (!RHS.isZero()) { 3780 bool Equal = LHSUnsigned 3781 ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC 3782 : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; 3783 if (!Equal) { 3784 // If we had a comparison against a fractional value, we have to adjust 3785 // the compare predicate and sometimes the value. RHSC is rounded towards 3786 // zero at this point. 3787 switch (Pred) { 3788 default: llvm_unreachable("Unexpected integer comparison!"); 3789 case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true 3790 return ReplaceInstUsesWith(I, Builder->getTrue()); 3791 case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false 3792 return ReplaceInstUsesWith(I, Builder->getFalse()); 3793 case ICmpInst::ICMP_ULE: 3794 // (float)int <= 4.4 --> int <= 4 3795 // (float)int <= -4.4 --> false 3796 if (RHS.isNegative()) 3797 return ReplaceInstUsesWith(I, Builder->getFalse()); 3798 break; 3799 case ICmpInst::ICMP_SLE: 3800 // (float)int <= 4.4 --> int <= 4 3801 // (float)int <= -4.4 --> int < -4 3802 if (RHS.isNegative()) 3803 Pred = ICmpInst::ICMP_SLT; 3804 break; 3805 case ICmpInst::ICMP_ULT: 3806 // (float)int < -4.4 --> false 3807 // (float)int < 4.4 --> int <= 4 3808 if (RHS.isNegative()) 3809 return ReplaceInstUsesWith(I, Builder->getFalse()); 3810 Pred = ICmpInst::ICMP_ULE; 3811 break; 3812 case ICmpInst::ICMP_SLT: 3813 // (float)int < -4.4 --> int < -4 3814 // (float)int < 4.4 --> int <= 4 3815 if (!RHS.isNegative()) 3816 Pred = ICmpInst::ICMP_SLE; 3817 break; 3818 case ICmpInst::ICMP_UGT: 3819 // (float)int > 4.4 --> int > 4 3820 // (float)int > -4.4 --> true 3821 if (RHS.isNegative()) 3822 return ReplaceInstUsesWith(I, Builder->getTrue()); 3823 break; 3824 case ICmpInst::ICMP_SGT: 3825 // (float)int > 4.4 --> int > 4 3826 // (float)int > -4.4 --> int >= -4 3827 if (RHS.isNegative()) 3828 Pred = ICmpInst::ICMP_SGE; 3829 break; 3830 case ICmpInst::ICMP_UGE: 3831 // (float)int >= -4.4 --> true 3832 // (float)int >= 4.4 --> int > 4 3833 if (RHS.isNegative()) 3834 return ReplaceInstUsesWith(I, Builder->getTrue()); 3835 Pred = ICmpInst::ICMP_UGT; 3836 break; 3837 case ICmpInst::ICMP_SGE: 3838 // (float)int >= -4.4 --> int >= -4 3839 // (float)int >= 4.4 --> int > 4 3840 if (!RHS.isNegative()) 3841 Pred = ICmpInst::ICMP_SGT; 3842 break; 3843 } 3844 } 3845 } 3846 3847 // Lower this FP comparison into an appropriate integer version of the 3848 // comparison. 3849 return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); 3850 } 3851 3852 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { 3853 bool Changed = false; 3854 3855 /// Orders the operands of the compare so that they are listed from most 3856 /// complex to least complex. This puts constants before unary operators, 3857 /// before binary operators. 3858 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { 3859 I.swapOperands(); 3860 Changed = true; 3861 } 3862 3863 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3864 3865 if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC)) 3866 return ReplaceInstUsesWith(I, V); 3867 3868 // Simplify 'fcmp pred X, X' 3869 if (Op0 == Op1) { 3870 switch (I.getPredicate()) { 3871 default: llvm_unreachable("Unknown predicate!"); 3872 case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) 3873 case FCmpInst::FCMP_ULT: // True if unordered or less than 3874 case FCmpInst::FCMP_UGT: // True if unordered or greater than 3875 case FCmpInst::FCMP_UNE: // True if unordered or not equal 3876 // Canonicalize these to be 'fcmp uno %X, 0.0'. 3877 I.setPredicate(FCmpInst::FCMP_UNO); 3878 I.setOperand(1, Constant::getNullValue(Op0->getType())); 3879 return &I; 3880 3881 case FCmpInst::FCMP_ORD: // True if ordered (no nans) 3882 case FCmpInst::FCMP_OEQ: // True if ordered and equal 3883 case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal 3884 case FCmpInst::FCMP_OLE: // True if ordered and less than or equal 3885 // Canonicalize these to be 'fcmp ord %X, 0.0'. 3886 I.setPredicate(FCmpInst::FCMP_ORD); 3887 I.setOperand(1, Constant::getNullValue(Op0->getType())); 3888 return &I; 3889 } 3890 } 3891 3892 // Handle fcmp with constant RHS 3893 if (Constant *RHSC = dyn_cast<Constant>(Op1)) { 3894 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 3895 switch (LHSI->getOpcode()) { 3896 case Instruction::FPExt: { 3897 // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless 3898 FPExtInst *LHSExt = cast<FPExtInst>(LHSI); 3899 ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); 3900 if (!RHSF) 3901 break; 3902 3903 const fltSemantics *Sem; 3904 // FIXME: This shouldn't be here. 3905 if (LHSExt->getSrcTy()->isHalfTy()) 3906 Sem = &APFloat::IEEEhalf; 3907 else if (LHSExt->getSrcTy()->isFloatTy()) 3908 Sem = &APFloat::IEEEsingle; 3909 else if (LHSExt->getSrcTy()->isDoubleTy()) 3910 Sem = &APFloat::IEEEdouble; 3911 else if (LHSExt->getSrcTy()->isFP128Ty()) 3912 Sem = &APFloat::IEEEquad; 3913 else if (LHSExt->getSrcTy()->isX86_FP80Ty()) 3914 Sem = &APFloat::x87DoubleExtended; 3915 else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) 3916 Sem = &APFloat::PPCDoubleDouble; 3917 else 3918 break; 3919 3920 bool Lossy; 3921 APFloat F = RHSF->getValueAPF(); 3922 F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); 3923 3924 // Avoid lossy conversions and denormals. Zero is a special case 3925 // that's OK to convert. 3926 APFloat Fabs = F; 3927 Fabs.clearSign(); 3928 if (!Lossy && 3929 ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != 3930 APFloat::cmpLessThan) || Fabs.isZero())) 3931 3932 return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), 3933 ConstantFP::get(RHSC->getContext(), F)); 3934 break; 3935 } 3936 case Instruction::PHI: 3937 // Only fold fcmp into the PHI if the phi and fcmp are in the same 3938 // block. If in the same block, we're encouraging jump threading. If 3939 // not, we are just pessimizing the code by making an i1 phi. 3940 if (LHSI->getParent() == I.getParent()) 3941 if (Instruction *NV = FoldOpIntoPhi(I)) 3942 return NV; 3943 break; 3944 case Instruction::SIToFP: 3945 case Instruction::UIToFP: 3946 if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) 3947 return NV; 3948 break; 3949 case Instruction::FSub: { 3950 // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C 3951 Value *Op; 3952 if (match(LHSI, m_FNeg(m_Value(Op)))) 3953 return new FCmpInst(I.getSwappedPredicate(), Op, 3954 ConstantExpr::getFNeg(RHSC)); 3955 break; 3956 } 3957 case Instruction::Load: 3958 if (GetElementPtrInst *GEP = 3959 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { 3960 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 3961 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 3962 !cast<LoadInst>(LHSI)->isVolatile()) 3963 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) 3964 return Res; 3965 } 3966 break; 3967 case Instruction::Call: { 3968 if (!RHSC->isNullValue()) 3969 break; 3970 3971 CallInst *CI = cast<CallInst>(LHSI); 3972 const Function *F = CI->getCalledFunction(); 3973 if (!F) 3974 break; 3975 3976 // Various optimization for fabs compared with zero. 3977 LibFunc::Func Func; 3978 if (F->getIntrinsicID() == Intrinsic::fabs || 3979 (TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && 3980 (Func == LibFunc::fabs || Func == LibFunc::fabsf || 3981 Func == LibFunc::fabsl))) { 3982 switch (I.getPredicate()) { 3983 default: 3984 break; 3985 // fabs(x) < 0 --> false 3986 case FCmpInst::FCMP_OLT: 3987 return ReplaceInstUsesWith(I, Builder->getFalse()); 3988 // fabs(x) > 0 --> x != 0 3989 case FCmpInst::FCMP_OGT: 3990 return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC); 3991 // fabs(x) <= 0 --> x == 0 3992 case FCmpInst::FCMP_OLE: 3993 return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC); 3994 // fabs(x) >= 0 --> !isnan(x) 3995 case FCmpInst::FCMP_OGE: 3996 return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC); 3997 // fabs(x) == 0 --> x == 0 3998 // fabs(x) != 0 --> x != 0 3999 case FCmpInst::FCMP_OEQ: 4000 case FCmpInst::FCMP_UEQ: 4001 case FCmpInst::FCMP_ONE: 4002 case FCmpInst::FCMP_UNE: 4003 return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), RHSC); 4004 } 4005 } 4006 } 4007 } 4008 } 4009 4010 // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y 4011 Value *X, *Y; 4012 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) 4013 return new FCmpInst(I.getSwappedPredicate(), X, Y); 4014 4015 // fcmp (fpext x), (fpext y) -> fcmp x, y 4016 if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) 4017 if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) 4018 if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) 4019 return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), 4020 RHSExt->getOperand(0)); 4021 4022 return Changed ? &I : nullptr; 4023 } 4024