1 //===- InstCombineCalls.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the visitCall, visitInvoke, and visitCallBr functions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "InstCombineInternal.h" 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/None.h" 19 #include "llvm/ADT/Optional.h" 20 #include "llvm/ADT/STLFunctionalExtras.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/Analysis/AssumeBundleQueries.h" 26 #include "llvm/Analysis/AssumptionCache.h" 27 #include "llvm/Analysis/InstructionSimplify.h" 28 #include "llvm/Analysis/Loads.h" 29 #include "llvm/Analysis/MemoryBuiltins.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/Analysis/VectorUtils.h" 32 #include "llvm/IR/Attributes.h" 33 #include "llvm/IR/BasicBlock.h" 34 #include "llvm/IR/Constant.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/GlobalVariable.h" 40 #include "llvm/IR/InlineAsm.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Intrinsics.h" 46 #include "llvm/IR/IntrinsicsAArch64.h" 47 #include "llvm/IR/IntrinsicsAMDGPU.h" 48 #include "llvm/IR/IntrinsicsARM.h" 49 #include "llvm/IR/IntrinsicsHexagon.h" 50 #include "llvm/IR/LLVMContext.h" 51 #include "llvm/IR/Metadata.h" 52 #include "llvm/IR/PatternMatch.h" 53 #include "llvm/IR/Statepoint.h" 54 #include "llvm/IR/Type.h" 55 #include "llvm/IR/User.h" 56 #include "llvm/IR/Value.h" 57 #include "llvm/IR/ValueHandle.h" 58 #include "llvm/Support/AtomicOrdering.h" 59 #include "llvm/Support/Casting.h" 60 #include "llvm/Support/CommandLine.h" 61 #include "llvm/Support/Compiler.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/ErrorHandling.h" 64 #include "llvm/Support/KnownBits.h" 65 #include "llvm/Support/MathExtras.h" 66 #include "llvm/Support/raw_ostream.h" 67 #include "llvm/Transforms/InstCombine/InstCombiner.h" 68 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 69 #include "llvm/Transforms/Utils/Local.h" 70 #include "llvm/Transforms/Utils/SimplifyLibCalls.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <utility> 75 #include <vector> 76 77 #define DEBUG_TYPE "instcombine" 78 #include "llvm/Transforms/Utils/InstructionWorklist.h" 79 80 using namespace llvm; 81 using namespace PatternMatch; 82 83 STATISTIC(NumSimplified, "Number of library calls simplified"); 84 85 static cl::opt<unsigned> GuardWideningWindow( 86 "instcombine-guard-widening-window", 87 cl::init(3), 88 cl::desc("How wide an instruction window to bypass looking for " 89 "another guard")); 90 91 namespace llvm { 92 /// enable preservation of attributes in assume like: 93 /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ] 94 extern cl::opt<bool> EnableKnowledgeRetention; 95 } // namespace llvm 96 97 /// Return the specified type promoted as it would be to pass though a va_arg 98 /// area. 99 static Type *getPromotedType(Type *Ty) { 100 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 101 if (ITy->getBitWidth() < 32) 102 return Type::getInt32Ty(Ty->getContext()); 103 } 104 return Ty; 105 } 106 107 /// Recognize a memcpy/memmove from a trivially otherwise unused alloca. 108 /// TODO: This should probably be integrated with visitAllocSites, but that 109 /// requires a deeper change to allow either unread or unwritten objects. 110 static bool hasUndefSource(AnyMemTransferInst *MI) { 111 auto *Src = MI->getRawSource(); 112 while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) { 113 if (!Src->hasOneUse()) 114 return false; 115 Src = cast<Instruction>(Src)->getOperand(0); 116 } 117 return isa<AllocaInst>(Src) && Src->hasOneUse(); 118 } 119 120 Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { 121 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); 122 MaybeAlign CopyDstAlign = MI->getDestAlign(); 123 if (!CopyDstAlign || *CopyDstAlign < DstAlign) { 124 MI->setDestAlignment(DstAlign); 125 return MI; 126 } 127 128 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT); 129 MaybeAlign CopySrcAlign = MI->getSourceAlign(); 130 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) { 131 MI->setSourceAlignment(SrcAlign); 132 return MI; 133 } 134 135 // If we have a store to a location which is known constant, we can conclude 136 // that the store must be storing the constant value (else the memory 137 // wouldn't be constant), and this must be a noop. 138 if (AA->pointsToConstantMemory(MI->getDest())) { 139 // Set the size of the copy to 0, it will be deleted on the next iteration. 140 MI->setLength(Constant::getNullValue(MI->getLength()->getType())); 141 return MI; 142 } 143 144 // If the source is provably undef, the memcpy/memmove doesn't do anything 145 // (unless the transfer is volatile). 146 if (hasUndefSource(MI) && !MI->isVolatile()) { 147 // Set the size of the copy to 0, it will be deleted on the next iteration. 148 MI->setLength(Constant::getNullValue(MI->getLength()->getType())); 149 return MI; 150 } 151 152 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 153 // load/store. 154 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength()); 155 if (!MemOpLength) return nullptr; 156 157 // Source and destination pointer types are always "i8*" for intrinsic. See 158 // if the size is something we can handle with a single primitive load/store. 159 // A single load+store correctly handles overlapping memory in the memmove 160 // case. 161 uint64_t Size = MemOpLength->getLimitedValue(); 162 assert(Size && "0-sized memory transferring should be removed already."); 163 164 if (Size > 8 || (Size&(Size-1))) 165 return nullptr; // If not 1/2/4/8 bytes, exit. 166 167 // If it is an atomic and alignment is less than the size then we will 168 // introduce the unaligned memory access which will be later transformed 169 // into libcall in CodeGen. This is not evident performance gain so disable 170 // it now. 171 if (isa<AtomicMemTransferInst>(MI)) 172 if (*CopyDstAlign < Size || *CopySrcAlign < Size) 173 return nullptr; 174 175 // Use an integer load+store unless we can find something better. 176 unsigned SrcAddrSp = 177 cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); 178 unsigned DstAddrSp = 179 cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); 180 181 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); 182 Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); 183 Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); 184 185 // If the memcpy has metadata describing the members, see if we can get the 186 // TBAA tag describing our copy. 187 MDNode *CopyMD = nullptr; 188 if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) { 189 CopyMD = M; 190 } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { 191 if (M->getNumOperands() == 3 && M->getOperand(0) && 192 mdconst::hasa<ConstantInt>(M->getOperand(0)) && 193 mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && 194 M->getOperand(1) && 195 mdconst::hasa<ConstantInt>(M->getOperand(1)) && 196 mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == 197 Size && 198 M->getOperand(2) && isa<MDNode>(M->getOperand(2))) 199 CopyMD = cast<MDNode>(M->getOperand(2)); 200 } 201 202 Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); 203 Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); 204 LoadInst *L = Builder.CreateLoad(IntType, Src); 205 // Alignment from the mem intrinsic will be better, so use it. 206 L->setAlignment(*CopySrcAlign); 207 if (CopyMD) 208 L->setMetadata(LLVMContext::MD_tbaa, CopyMD); 209 MDNode *LoopMemParallelMD = 210 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access); 211 if (LoopMemParallelMD) 212 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); 213 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group); 214 if (AccessGroupMD) 215 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); 216 217 StoreInst *S = Builder.CreateStore(L, Dest); 218 // Alignment from the mem intrinsic will be better, so use it. 219 S->setAlignment(*CopyDstAlign); 220 if (CopyMD) 221 S->setMetadata(LLVMContext::MD_tbaa, CopyMD); 222 if (LoopMemParallelMD) 223 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); 224 if (AccessGroupMD) 225 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); 226 227 if (auto *MT = dyn_cast<MemTransferInst>(MI)) { 228 // non-atomics can be volatile 229 L->setVolatile(MT->isVolatile()); 230 S->setVolatile(MT->isVolatile()); 231 } 232 if (isa<AtomicMemTransferInst>(MI)) { 233 // atomics have to be unordered 234 L->setOrdering(AtomicOrdering::Unordered); 235 S->setOrdering(AtomicOrdering::Unordered); 236 } 237 238 // Set the size of the copy to 0, it will be deleted on the next iteration. 239 MI->setLength(Constant::getNullValue(MemOpLength->getType())); 240 return MI; 241 } 242 243 Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { 244 const Align KnownAlignment = 245 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); 246 MaybeAlign MemSetAlign = MI->getDestAlign(); 247 if (!MemSetAlign || *MemSetAlign < KnownAlignment) { 248 MI->setDestAlignment(KnownAlignment); 249 return MI; 250 } 251 252 // If we have a store to a location which is known constant, we can conclude 253 // that the store must be storing the constant value (else the memory 254 // wouldn't be constant), and this must be a noop. 255 if (AA->pointsToConstantMemory(MI->getDest())) { 256 // Set the size of the copy to 0, it will be deleted on the next iteration. 257 MI->setLength(Constant::getNullValue(MI->getLength()->getType())); 258 return MI; 259 } 260 261 // Extract the length and alignment and fill if they are constant. 262 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 263 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 264 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) 265 return nullptr; 266 const uint64_t Len = LenC->getLimitedValue(); 267 assert(Len && "0-sized memory setting should be removed already."); 268 const Align Alignment = assumeAligned(MI->getDestAlignment()); 269 270 // If it is an atomic and alignment is less than the size then we will 271 // introduce the unaligned memory access which will be later transformed 272 // into libcall in CodeGen. This is not evident performance gain so disable 273 // it now. 274 if (isa<AtomicMemSetInst>(MI)) 275 if (Alignment < Len) 276 return nullptr; 277 278 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 279 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 280 Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 281 282 Value *Dest = MI->getDest(); 283 unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); 284 Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); 285 Dest = Builder.CreateBitCast(Dest, NewDstPtrTy); 286 287 // Extract the fill value and store. 288 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 289 StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, 290 MI->isVolatile()); 291 S->setAlignment(Alignment); 292 if (isa<AtomicMemSetInst>(MI)) 293 S->setOrdering(AtomicOrdering::Unordered); 294 295 // Set the size of the copy to 0, it will be deleted on the next iteration. 296 MI->setLength(Constant::getNullValue(LenC->getType())); 297 return MI; 298 } 299 300 return nullptr; 301 } 302 303 // TODO, Obvious Missing Transforms: 304 // * Narrow width by halfs excluding zero/undef lanes 305 Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { 306 Value *LoadPtr = II.getArgOperand(0); 307 const Align Alignment = 308 cast<ConstantInt>(II.getArgOperand(1))->getAlignValue(); 309 310 // If the mask is all ones or undefs, this is a plain vector load of the 1st 311 // argument. 312 if (maskIsAllOneOrUndef(II.getArgOperand(2))) { 313 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, 314 "unmaskedload"); 315 L->copyMetadata(II); 316 return L; 317 } 318 319 // If we can unconditionally load from this address, replace with a 320 // load/select idiom. TODO: use DT for context sensitive query 321 if (isDereferenceablePointer(LoadPtr, II.getType(), 322 II.getModule()->getDataLayout(), &II, nullptr)) { 323 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, 324 "unmaskedload"); 325 LI->copyMetadata(II); 326 return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); 327 } 328 329 return nullptr; 330 } 331 332 // TODO, Obvious Missing Transforms: 333 // * Single constant active lane -> store 334 // * Narrow width by halfs excluding zero/undef lanes 335 Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { 336 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); 337 if (!ConstMask) 338 return nullptr; 339 340 // If the mask is all zeros, this instruction does nothing. 341 if (ConstMask->isNullValue()) 342 return eraseInstFromFunction(II); 343 344 // If the mask is all ones, this is a plain vector store of the 1st argument. 345 if (ConstMask->isAllOnesValue()) { 346 Value *StorePtr = II.getArgOperand(1); 347 Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); 348 StoreInst *S = 349 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); 350 S->copyMetadata(II); 351 return S; 352 } 353 354 if (isa<ScalableVectorType>(ConstMask->getType())) 355 return nullptr; 356 357 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts 358 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); 359 APInt UndefElts(DemandedElts.getBitWidth(), 0); 360 if (Value *V = 361 SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) 362 return replaceOperand(II, 0, V); 363 364 return nullptr; 365 } 366 367 // TODO, Obvious Missing Transforms: 368 // * Single constant active lane load -> load 369 // * Dereferenceable address & few lanes -> scalarize speculative load/selects 370 // * Adjacent vector addresses -> masked.load 371 // * Narrow width by halfs excluding zero/undef lanes 372 // * Vector incrementing address -> vector masked load 373 Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { 374 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2)); 375 if (!ConstMask) 376 return nullptr; 377 378 // Vector splat address w/known mask -> scalar load 379 // Fold the gather to load the source vector first lane 380 // because it is reloading the same value each time 381 if (ConstMask->isAllOnesValue()) 382 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) { 383 auto *VecTy = cast<VectorType>(II.getType()); 384 const Align Alignment = 385 cast<ConstantInt>(II.getArgOperand(1))->getAlignValue(); 386 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr, 387 Alignment, "load.scalar"); 388 Value *Shuf = 389 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast"); 390 return replaceInstUsesWith(II, cast<Instruction>(Shuf)); 391 } 392 393 return nullptr; 394 } 395 396 // TODO, Obvious Missing Transforms: 397 // * Single constant active lane -> store 398 // * Adjacent vector addresses -> masked.store 399 // * Narrow store width by halfs excluding zero/undef lanes 400 // * Vector incrementing address -> vector masked store 401 Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { 402 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); 403 if (!ConstMask) 404 return nullptr; 405 406 // If the mask is all zeros, a scatter does nothing. 407 if (ConstMask->isNullValue()) 408 return eraseInstFromFunction(II); 409 410 // Vector splat address -> scalar store 411 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) { 412 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr 413 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) { 414 Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); 415 StoreInst *S = 416 new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment); 417 S->copyMetadata(II); 418 return S; 419 } 420 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, 421 // lastlane), ptr 422 if (ConstMask->isAllOnesValue()) { 423 Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); 424 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType()); 425 ElementCount VF = WideLoadTy->getElementCount(); 426 Constant *EC = 427 ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue()); 428 Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC; 429 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1)); 430 Value *Extract = 431 Builder.CreateExtractElement(II.getArgOperand(0), LastLane); 432 StoreInst *S = 433 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment); 434 S->copyMetadata(II); 435 return S; 436 } 437 } 438 if (isa<ScalableVectorType>(ConstMask->getType())) 439 return nullptr; 440 441 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts 442 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask); 443 APInt UndefElts(DemandedElts.getBitWidth(), 0); 444 if (Value *V = 445 SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts)) 446 return replaceOperand(II, 0, V); 447 if (Value *V = 448 SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts)) 449 return replaceOperand(II, 1, V); 450 451 return nullptr; 452 } 453 454 /// This function transforms launder.invariant.group and strip.invariant.group 455 /// like: 456 /// launder(launder(%x)) -> launder(%x) (the result is not the argument) 457 /// launder(strip(%x)) -> launder(%x) 458 /// strip(strip(%x)) -> strip(%x) (the result is not the argument) 459 /// strip(launder(%x)) -> strip(%x) 460 /// This is legal because it preserves the most recent information about 461 /// the presence or absence of invariant.group. 462 static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, 463 InstCombinerImpl &IC) { 464 auto *Arg = II.getArgOperand(0); 465 auto *StrippedArg = Arg->stripPointerCasts(); 466 auto *StrippedInvariantGroupsArg = StrippedArg; 467 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) { 468 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group && 469 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group) 470 break; 471 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts(); 472 } 473 if (StrippedArg == StrippedInvariantGroupsArg) 474 return nullptr; // No launders/strips to remove. 475 476 Value *Result = nullptr; 477 478 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group) 479 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg); 480 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group) 481 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg); 482 else 483 llvm_unreachable( 484 "simplifyInvariantGroupIntrinsic only handles launder and strip"); 485 if (Result->getType()->getPointerAddressSpace() != 486 II.getType()->getPointerAddressSpace()) 487 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType()); 488 if (Result->getType() != II.getType()) 489 Result = IC.Builder.CreateBitCast(Result, II.getType()); 490 491 return cast<Instruction>(Result); 492 } 493 494 static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { 495 assert((II.getIntrinsicID() == Intrinsic::cttz || 496 II.getIntrinsicID() == Intrinsic::ctlz) && 497 "Expected cttz or ctlz intrinsic"); 498 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; 499 Value *Op0 = II.getArgOperand(0); 500 Value *Op1 = II.getArgOperand(1); 501 Value *X; 502 // ctlz(bitreverse(x)) -> cttz(x) 503 // cttz(bitreverse(x)) -> ctlz(x) 504 if (match(Op0, m_BitReverse(m_Value(X)))) { 505 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz; 506 Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType()); 507 return CallInst::Create(F, {X, II.getArgOperand(1)}); 508 } 509 510 if (II.getType()->isIntOrIntVectorTy(1)) { 511 // ctlz/cttz i1 Op0 --> not Op0 512 if (match(Op1, m_Zero())) 513 return BinaryOperator::CreateNot(Op0); 514 // If zero is poison, then the input can be assumed to be "true", so the 515 // instruction simplifies to "false". 516 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1"); 517 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType())); 518 } 519 520 // If the operand is a select with constant arm(s), try to hoist ctlz/cttz. 521 if (auto *Sel = dyn_cast<SelectInst>(Op0)) 522 if (Instruction *R = IC.FoldOpIntoSelect(II, Sel)) 523 return R; 524 525 if (IsTZ) { 526 // cttz(-x) -> cttz(x) 527 if (match(Op0, m_Neg(m_Value(X)))) 528 return IC.replaceOperand(II, 0, X); 529 530 // cttz(sext(x)) -> cttz(zext(x)) 531 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) { 532 auto *Zext = IC.Builder.CreateZExt(X, II.getType()); 533 auto *CttzZext = 534 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1); 535 return IC.replaceInstUsesWith(II, CttzZext); 536 } 537 538 // Zext doesn't change the number of trailing zeros, so narrow: 539 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'. 540 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) { 541 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X, 542 IC.Builder.getTrue()); 543 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType()); 544 return IC.replaceInstUsesWith(II, ZextCttz); 545 } 546 547 // cttz(abs(x)) -> cttz(x) 548 // cttz(nabs(x)) -> cttz(x) 549 Value *Y; 550 SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; 551 if (SPF == SPF_ABS || SPF == SPF_NABS) 552 return IC.replaceOperand(II, 0, X); 553 554 if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) 555 return IC.replaceOperand(II, 0, X); 556 } 557 558 KnownBits Known = IC.computeKnownBits(Op0, 0, &II); 559 560 // Create a mask for bits above (ctlz) or below (cttz) the first known one. 561 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros() 562 : Known.countMaxLeadingZeros(); 563 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros() 564 : Known.countMinLeadingZeros(); 565 566 // If all bits above (ctlz) or below (cttz) the first known one are known 567 // zero, this value is constant. 568 // FIXME: This should be in InstSimplify because we're replacing an 569 // instruction with a constant. 570 if (PossibleZeros == DefiniteZeros) { 571 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros); 572 return IC.replaceInstUsesWith(II, C); 573 } 574 575 // If the input to cttz/ctlz is known to be non-zero, 576 // then change the 'ZeroIsPoison' parameter to 'true' 577 // because we know the zero behavior can't affect the result. 578 if (!Known.One.isZero() || 579 isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, 580 &IC.getDominatorTree())) { 581 if (!match(II.getArgOperand(1), m_One())) 582 return IC.replaceOperand(II, 1, IC.Builder.getTrue()); 583 } 584 585 // Add range metadata since known bits can't completely reflect what we know. 586 // TODO: Handle splat vectors. 587 auto *IT = dyn_cast<IntegerType>(Op0->getType()); 588 if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { 589 Metadata *LowAndHigh[] = { 590 ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), 591 ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; 592 II.setMetadata(LLVMContext::MD_range, 593 MDNode::get(II.getContext(), LowAndHigh)); 594 return &II; 595 } 596 597 return nullptr; 598 } 599 600 static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { 601 assert(II.getIntrinsicID() == Intrinsic::ctpop && 602 "Expected ctpop intrinsic"); 603 Type *Ty = II.getType(); 604 unsigned BitWidth = Ty->getScalarSizeInBits(); 605 Value *Op0 = II.getArgOperand(0); 606 Value *X, *Y; 607 608 // ctpop(bitreverse(x)) -> ctpop(x) 609 // ctpop(bswap(x)) -> ctpop(x) 610 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) 611 return IC.replaceOperand(II, 0, X); 612 613 // ctpop(rot(x)) -> ctpop(x) 614 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) || 615 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) && 616 X == Y) 617 return IC.replaceOperand(II, 0, X); 618 619 // ctpop(x | -x) -> bitwidth - cttz(x, false) 620 if (Op0->hasOneUse() && 621 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) { 622 Function *F = 623 Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); 624 auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()}); 625 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); 626 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); 627 } 628 629 // ctpop(~x & (x - 1)) -> cttz(x, false) 630 if (match(Op0, 631 m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { 632 Function *F = 633 Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); 634 return CallInst::Create(F, {X, IC.Builder.getFalse()}); 635 } 636 637 // Zext doesn't change the number of set bits, so narrow: 638 // ctpop (zext X) --> zext (ctpop X) 639 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) { 640 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X); 641 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty); 642 } 643 644 // If the operand is a select with constant arm(s), try to hoist ctpop. 645 if (auto *Sel = dyn_cast<SelectInst>(Op0)) 646 if (Instruction *R = IC.FoldOpIntoSelect(II, Sel)) 647 return R; 648 649 KnownBits Known(BitWidth); 650 IC.computeKnownBits(Op0, Known, 0, &II); 651 652 // If all bits are zero except for exactly one fixed bit, then the result 653 // must be 0 or 1, and we can get that answer by shifting to LSB: 654 // ctpop (X & 32) --> (X & 32) >> 5 655 if ((~Known.Zero).isPowerOf2()) 656 return BinaryOperator::CreateLShr( 657 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2())); 658 659 // FIXME: Try to simplify vectors of integers. 660 auto *IT = dyn_cast<IntegerType>(Ty); 661 if (!IT) 662 return nullptr; 663 664 // Add range metadata since known bits can't completely reflect what we know. 665 unsigned MinCount = Known.countMinPopulation(); 666 unsigned MaxCount = Known.countMaxPopulation(); 667 if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { 668 Metadata *LowAndHigh[] = { 669 ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), 670 ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; 671 II.setMetadata(LLVMContext::MD_range, 672 MDNode::get(II.getContext(), LowAndHigh)); 673 return &II; 674 } 675 676 return nullptr; 677 } 678 679 /// Convert a table lookup to shufflevector if the mask is constant. 680 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in 681 /// which case we could lower the shufflevector with rev64 instructions 682 /// as it's actually a byte reverse. 683 static Value *simplifyNeonTbl1(const IntrinsicInst &II, 684 InstCombiner::BuilderTy &Builder) { 685 // Bail out if the mask is not a constant. 686 auto *C = dyn_cast<Constant>(II.getArgOperand(1)); 687 if (!C) 688 return nullptr; 689 690 auto *VecTy = cast<FixedVectorType>(II.getType()); 691 unsigned NumElts = VecTy->getNumElements(); 692 693 // Only perform this transformation for <8 x i8> vector types. 694 if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8) 695 return nullptr; 696 697 int Indexes[8]; 698 699 for (unsigned I = 0; I < NumElts; ++I) { 700 Constant *COp = C->getAggregateElement(I); 701 702 if (!COp || !isa<ConstantInt>(COp)) 703 return nullptr; 704 705 Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue(); 706 707 // Make sure the mask indices are in range. 708 if ((unsigned)Indexes[I] >= NumElts) 709 return nullptr; 710 } 711 712 auto *V1 = II.getArgOperand(0); 713 auto *V2 = Constant::getNullValue(V1->getType()); 714 return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes)); 715 } 716 717 // Returns true iff the 2 intrinsics have the same operands, limiting the 718 // comparison to the first NumOperands. 719 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, 720 unsigned NumOperands) { 721 assert(I.arg_size() >= NumOperands && "Not enough operands"); 722 assert(E.arg_size() >= NumOperands && "Not enough operands"); 723 for (unsigned i = 0; i < NumOperands; i++) 724 if (I.getArgOperand(i) != E.getArgOperand(i)) 725 return false; 726 return true; 727 } 728 729 // Remove trivially empty start/end intrinsic ranges, i.e. a start 730 // immediately followed by an end (ignoring debuginfo or other 731 // start/end intrinsics in between). As this handles only the most trivial 732 // cases, tracking the nesting level is not needed: 733 // 734 // call @llvm.foo.start(i1 0) 735 // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed 736 // call @llvm.foo.end(i1 0) 737 // call @llvm.foo.end(i1 0) ; &I 738 static bool 739 removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, 740 std::function<bool(const IntrinsicInst &)> IsStart) { 741 // We start from the end intrinsic and scan backwards, so that InstCombine 742 // has already processed (and potentially removed) all the instructions 743 // before the end intrinsic. 744 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend()); 745 for (; BI != BE; ++BI) { 746 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) { 747 if (I->isDebugOrPseudoInst() || 748 I->getIntrinsicID() == EndI.getIntrinsicID()) 749 continue; 750 if (IsStart(*I)) { 751 if (haveSameOperands(EndI, *I, EndI.arg_size())) { 752 IC.eraseInstFromFunction(*I); 753 IC.eraseInstFromFunction(EndI); 754 return true; 755 } 756 // Skip start intrinsics that don't pair with this end intrinsic. 757 continue; 758 } 759 } 760 break; 761 } 762 763 return false; 764 } 765 766 Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { 767 removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) { 768 return I.getIntrinsicID() == Intrinsic::vastart || 769 I.getIntrinsicID() == Intrinsic::vacopy; 770 }); 771 return nullptr; 772 } 773 774 static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { 775 assert(Call.arg_size() > 1 && "Need at least 2 args to swap"); 776 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1); 777 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) { 778 Call.setArgOperand(0, Arg1); 779 Call.setArgOperand(1, Arg0); 780 return &Call; 781 } 782 return nullptr; 783 } 784 785 /// Creates a result tuple for an overflow intrinsic \p II with a given 786 /// \p Result and a constant \p Overflow value. 787 static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, 788 Constant *Overflow) { 789 Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; 790 StructType *ST = cast<StructType>(II->getType()); 791 Constant *Struct = ConstantStruct::get(ST, V); 792 return InsertValueInst::Create(Struct, Result, 0); 793 } 794 795 Instruction * 796 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { 797 WithOverflowInst *WO = cast<WithOverflowInst>(II); 798 Value *OperationResult = nullptr; 799 Constant *OverflowResult = nullptr; 800 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(), 801 WO->getRHS(), *WO, OperationResult, OverflowResult)) 802 return createOverflowTuple(WO, OperationResult, OverflowResult); 803 return nullptr; 804 } 805 806 static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI, 807 const DataLayout &DL, AssumptionCache *AC, 808 DominatorTree *DT) { 809 KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); 810 if (Known.isNonNegative()) 811 return false; 812 if (Known.isNegative()) 813 return true; 814 815 Value *X, *Y; 816 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y)))) 817 return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL); 818 819 return isImpliedByDomCondition( 820 ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); 821 } 822 823 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This 824 /// can trigger other combines. 825 static Instruction *moveAddAfterMinMax(IntrinsicInst *II, 826 InstCombiner::BuilderTy &Builder) { 827 Intrinsic::ID MinMaxID = II->getIntrinsicID(); 828 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin || 829 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) && 830 "Expected a min or max intrinsic"); 831 832 // TODO: Match vectors with undef elements, but undef may not propagate. 833 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1); 834 Value *X; 835 const APInt *C0, *C1; 836 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) || 837 !match(Op1, m_APInt(C1))) 838 return nullptr; 839 840 // Check for necessary no-wrap and overflow constraints. 841 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin; 842 auto *Add = cast<BinaryOperator>(Op0); 843 if ((IsSigned && !Add->hasNoSignedWrap()) || 844 (!IsSigned && !Add->hasNoUnsignedWrap())) 845 return nullptr; 846 847 // If the constant difference overflows, then instsimplify should reduce the 848 // min/max to the add or C1. 849 bool Overflow; 850 APInt CDiff = 851 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow); 852 assert(!Overflow && "Expected simplify of min/max"); 853 854 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0 855 // Note: the "mismatched" no-overflow setting does not propagate. 856 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff); 857 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC); 858 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1)) 859 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1)); 860 } 861 /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. 862 Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) { 863 Type *Ty = MinMax1.getType(); 864 865 // We are looking for a tree of: 866 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) 867 // Where the min and max could be reversed 868 Instruction *MinMax2; 869 BinaryOperator *AddSub; 870 const APInt *MinValue, *MaxValue; 871 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { 872 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) 873 return nullptr; 874 } else if (match(&MinMax1, 875 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { 876 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) 877 return nullptr; 878 } else 879 return nullptr; 880 881 // Check that the constants clamp a saturate, and that the new type would be 882 // sensible to convert to. 883 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) 884 return nullptr; 885 // In what bitwidth can this be treated as saturating arithmetics? 886 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; 887 // FIXME: This isn't quite right for vectors, but using the scalar type is a 888 // good first approximation for what should be done there. 889 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) 890 return nullptr; 891 892 // Also make sure that the inner min/max and the add/sub have one use. 893 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse()) 894 return nullptr; 895 896 // Create the new type (which can be a vector type) 897 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); 898 899 Intrinsic::ID IntrinsicID; 900 if (AddSub->getOpcode() == Instruction::Add) 901 IntrinsicID = Intrinsic::sadd_sat; 902 else if (AddSub->getOpcode() == Instruction::Sub) 903 IntrinsicID = Intrinsic::ssub_sat; 904 else 905 return nullptr; 906 907 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This 908 // is usually achieved via a sext from a smaller type. 909 if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) > 910 NewBitWidth || 911 ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) 912 return nullptr; 913 914 // Finally create and return the sat intrinsic, truncated to the new type 915 Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); 916 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); 917 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); 918 Value *Sat = Builder.CreateCall(F, {AT, BT}); 919 return CastInst::Create(Instruction::SExt, Sat, Ty); 920 } 921 922 923 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output 924 /// can only be one of two possible constant values -- turn that into a select 925 /// of constants. 926 static Instruction *foldClampRangeOfTwo(IntrinsicInst *II, 927 InstCombiner::BuilderTy &Builder) { 928 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); 929 Value *X; 930 const APInt *C0, *C1; 931 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse()) 932 return nullptr; 933 934 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 935 switch (II->getIntrinsicID()) { 936 case Intrinsic::smax: 937 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1) 938 Pred = ICmpInst::ICMP_SGT; 939 break; 940 case Intrinsic::smin: 941 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1) 942 Pred = ICmpInst::ICMP_SLT; 943 break; 944 case Intrinsic::umax: 945 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1) 946 Pred = ICmpInst::ICMP_UGT; 947 break; 948 case Intrinsic::umin: 949 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1) 950 Pred = ICmpInst::ICMP_ULT; 951 break; 952 default: 953 llvm_unreachable("Expected min/max intrinsic"); 954 } 955 if (Pred == CmpInst::BAD_ICMP_PREDICATE) 956 return nullptr; 957 958 // max (min X, 42), 41 --> X > 41 ? 42 : 41 959 // min (max X, 42), 43 --> X < 43 ? 42 : 43 960 Value *Cmp = Builder.CreateICmp(Pred, X, I1); 961 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1); 962 } 963 964 /// If this min/max has a constant operand and an operand that is a matching 965 /// min/max with a constant operand, constant-fold the 2 constant operands. 966 static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) { 967 Intrinsic::ID MinMaxID = II->getIntrinsicID(); 968 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 969 if (!LHS || LHS->getIntrinsicID() != MinMaxID) 970 return nullptr; 971 972 Constant *C0, *C1; 973 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) || 974 !match(II->getArgOperand(1), m_ImmConstant(C1))) 975 return nullptr; 976 977 // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC 978 ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID); 979 Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1); 980 Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1); 981 982 Module *Mod = II->getModule(); 983 Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType()); 984 return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC}); 985 } 986 987 /// If this min/max has a matching min/max operand with a constant, try to push 988 /// the constant operand into this instruction. This can enable more folds. 989 static Instruction * 990 reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, 991 InstCombiner::BuilderTy &Builder) { 992 // Match and capture a min/max operand candidate. 993 Value *X, *Y; 994 Constant *C; 995 Instruction *Inner; 996 if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd( 997 m_Instruction(Inner), 998 m_MaxOrMin(m_Value(X), m_ImmConstant(C)))), 999 m_Value(Y)))) 1000 return nullptr; 1001 1002 // The inner op must match. Check for constants to avoid infinite loops. 1003 Intrinsic::ID MinMaxID = II->getIntrinsicID(); 1004 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner); 1005 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID || 1006 match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) 1007 return nullptr; 1008 1009 // max (max X, C), Y --> max (max X, Y), C 1010 Function *MinMax = 1011 Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType()); 1012 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y); 1013 NewInner->takeName(Inner); 1014 return CallInst::Create(MinMax, {NewInner, C}); 1015 } 1016 1017 /// Reduce a sequence of min/max intrinsics with a common operand. 1018 static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { 1019 // Match 3 of the same min/max ops. Example: umin(umin(), umin()). 1020 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 1021 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1)); 1022 Intrinsic::ID MinMaxID = II->getIntrinsicID(); 1023 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID || 1024 RHS->getIntrinsicID() != MinMaxID || 1025 (!LHS->hasOneUse() && !RHS->hasOneUse())) 1026 return nullptr; 1027 1028 Value *A = LHS->getArgOperand(0); 1029 Value *B = LHS->getArgOperand(1); 1030 Value *C = RHS->getArgOperand(0); 1031 Value *D = RHS->getArgOperand(1); 1032 1033 // Look for a common operand. 1034 Value *MinMaxOp = nullptr; 1035 Value *ThirdOp = nullptr; 1036 if (LHS->hasOneUse()) { 1037 // If the LHS is only used in this chain and the RHS is used outside of it, 1038 // reuse the RHS min/max because that will eliminate the LHS. 1039 if (D == A || C == A) { 1040 // min(min(a, b), min(c, a)) --> min(min(c, a), b) 1041 // min(min(a, b), min(a, d)) --> min(min(a, d), b) 1042 MinMaxOp = RHS; 1043 ThirdOp = B; 1044 } else if (D == B || C == B) { 1045 // min(min(a, b), min(c, b)) --> min(min(c, b), a) 1046 // min(min(a, b), min(b, d)) --> min(min(b, d), a) 1047 MinMaxOp = RHS; 1048 ThirdOp = A; 1049 } 1050 } else { 1051 assert(RHS->hasOneUse() && "Expected one-use operand"); 1052 // Reuse the LHS. This will eliminate the RHS. 1053 if (D == A || D == B) { 1054 // min(min(a, b), min(c, a)) --> min(min(a, b), c) 1055 // min(min(a, b), min(c, b)) --> min(min(a, b), c) 1056 MinMaxOp = LHS; 1057 ThirdOp = C; 1058 } else if (C == A || C == B) { 1059 // min(min(a, b), min(b, d)) --> min(min(a, b), d) 1060 // min(min(a, b), min(c, b)) --> min(min(a, b), d) 1061 MinMaxOp = LHS; 1062 ThirdOp = D; 1063 } 1064 } 1065 1066 if (!MinMaxOp || !ThirdOp) 1067 return nullptr; 1068 1069 Module *Mod = II->getModule(); 1070 Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType()); 1071 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp }); 1072 } 1073 1074 /// CallInst simplification. This mostly only handles folding of intrinsic 1075 /// instructions. For normal calls, it allows visitCallBase to do the heavy 1076 /// lifting. 1077 Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { 1078 // Don't try to simplify calls without uses. It will not do anything useful, 1079 // but will result in the following folds being skipped. 1080 if (!CI.use_empty()) 1081 if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) 1082 return replaceInstUsesWith(CI, V); 1083 1084 if (isFreeCall(&CI, &TLI)) 1085 return visitFree(CI); 1086 1087 // If the caller function (i.e. us, the function that contains this CallInst) 1088 // is nounwind, mark the call as nounwind, even if the callee isn't. 1089 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { 1090 CI.setDoesNotThrow(); 1091 return &CI; 1092 } 1093 1094 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 1095 if (!II) return visitCallBase(CI); 1096 1097 // For atomic unordered mem intrinsics if len is not a positive or 1098 // not a multiple of element size then behavior is undefined. 1099 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II)) 1100 if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength())) 1101 if (NumBytes->getSExtValue() < 0 || 1102 (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) { 1103 CreateNonTerminatorUnreachable(AMI); 1104 assert(AMI->getType()->isVoidTy() && 1105 "non void atomic unordered mem intrinsic"); 1106 return eraseInstFromFunction(*AMI); 1107 } 1108 1109 // Intrinsics cannot occur in an invoke or a callbr, so handle them here 1110 // instead of in visitCallBase. 1111 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) { 1112 bool Changed = false; 1113 1114 // memmove/cpy/set of zero bytes is a noop. 1115 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 1116 if (NumBytes->isNullValue()) 1117 return eraseInstFromFunction(CI); 1118 1119 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 1120 if (CI->getZExtValue() == 1) { 1121 // Replace the instruction with just byte operations. We would 1122 // transform other cases to loads/stores, but we don't know if 1123 // alignment is sufficient. 1124 } 1125 } 1126 1127 // No other transformations apply to volatile transfers. 1128 if (auto *M = dyn_cast<MemIntrinsic>(MI)) 1129 if (M->isVolatile()) 1130 return nullptr; 1131 1132 // If we have a memmove and the source operation is a constant global, 1133 // then the source and dest pointers can't alias, so we can change this 1134 // into a call to memcpy. 1135 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) { 1136 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 1137 if (GVSrc->isConstant()) { 1138 Module *M = CI.getModule(); 1139 Intrinsic::ID MemCpyID = 1140 isa<AtomicMemMoveInst>(MMI) 1141 ? Intrinsic::memcpy_element_unordered_atomic 1142 : Intrinsic::memcpy; 1143 Type *Tys[3] = { CI.getArgOperand(0)->getType(), 1144 CI.getArgOperand(1)->getType(), 1145 CI.getArgOperand(2)->getType() }; 1146 CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); 1147 Changed = true; 1148 } 1149 } 1150 1151 if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) { 1152 // memmove(x,x,size) -> noop. 1153 if (MTI->getSource() == MTI->getDest()) 1154 return eraseInstFromFunction(CI); 1155 } 1156 1157 // If we can determine a pointer alignment that is bigger than currently 1158 // set, update the alignment. 1159 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) { 1160 if (Instruction *I = SimplifyAnyMemTransfer(MTI)) 1161 return I; 1162 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) { 1163 if (Instruction *I = SimplifyAnyMemSet(MSI)) 1164 return I; 1165 } 1166 1167 if (Changed) return II; 1168 } 1169 1170 // For fixed width vector result intrinsics, use the generic demanded vector 1171 // support. 1172 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) { 1173 auto VWidth = IIFVTy->getNumElements(); 1174 APInt UndefElts(VWidth, 0); 1175 APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); 1176 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { 1177 if (V != II) 1178 return replaceInstUsesWith(*II, V); 1179 return II; 1180 } 1181 } 1182 1183 if (II->isCommutative()) { 1184 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) 1185 return NewCall; 1186 } 1187 1188 Intrinsic::ID IID = II->getIntrinsicID(); 1189 switch (IID) { 1190 case Intrinsic::objectsize: 1191 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false)) 1192 return replaceInstUsesWith(CI, V); 1193 return nullptr; 1194 case Intrinsic::abs: { 1195 Value *IIOperand = II->getArgOperand(0); 1196 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue(); 1197 1198 // abs(-x) -> abs(x) 1199 // TODO: Copy nsw if it was present on the neg? 1200 Value *X; 1201 if (match(IIOperand, m_Neg(m_Value(X)))) 1202 return replaceOperand(*II, 0, X); 1203 if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X))))) 1204 return replaceOperand(*II, 0, X); 1205 if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) 1206 return replaceOperand(*II, 0, X); 1207 1208 if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { 1209 // abs(x) -> x if x >= 0 1210 if (!*Sign) 1211 return replaceInstUsesWith(*II, IIOperand); 1212 1213 // abs(x) -> -x if x < 0 1214 if (IntMinIsPoison) 1215 return BinaryOperator::CreateNSWNeg(IIOperand); 1216 return BinaryOperator::CreateNeg(IIOperand); 1217 } 1218 1219 // abs (sext X) --> zext (abs X*) 1220 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing. 1221 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) { 1222 Value *NarrowAbs = 1223 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse()); 1224 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType()); 1225 } 1226 1227 // Match a complicated way to check if a number is odd/even: 1228 // abs (srem X, 2) --> and X, 1 1229 const APInt *C; 1230 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2) 1231 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1)); 1232 1233 break; 1234 } 1235 case Intrinsic::umin: { 1236 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); 1237 // umin(x, 1) == zext(x != 0) 1238 if (match(I1, m_One())) { 1239 Value *Zero = Constant::getNullValue(I0->getType()); 1240 Value *Cmp = Builder.CreateICmpNE(I0, Zero); 1241 return CastInst::Create(Instruction::ZExt, Cmp, II->getType()); 1242 } 1243 LLVM_FALLTHROUGH; 1244 } 1245 case Intrinsic::umax: { 1246 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); 1247 Value *X, *Y; 1248 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) && 1249 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { 1250 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); 1251 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); 1252 } 1253 Constant *C; 1254 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) && 1255 I0->hasOneUse()) { 1256 Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType()); 1257 if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) { 1258 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC); 1259 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); 1260 } 1261 } 1262 // If both operands of unsigned min/max are sign-extended, it is still ok 1263 // to narrow the operation. 1264 LLVM_FALLTHROUGH; 1265 } 1266 case Intrinsic::smax: 1267 case Intrinsic::smin: { 1268 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); 1269 Value *X, *Y; 1270 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) && 1271 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { 1272 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); 1273 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType()); 1274 } 1275 1276 Constant *C; 1277 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) && 1278 I0->hasOneUse()) { 1279 Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType()); 1280 if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) { 1281 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC); 1282 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType()); 1283 } 1284 } 1285 1286 if (IID == Intrinsic::smax || IID == Intrinsic::smin) { 1287 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y) 1288 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y) 1289 // TODO: Canonicalize neg after min/max if I1 is constant. 1290 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) && 1291 (I0->hasOneUse() || I1->hasOneUse())) { 1292 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID); 1293 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y); 1294 return BinaryOperator::CreateNSWNeg(InvMaxMin); 1295 } 1296 } 1297 1298 // If we can eliminate ~A and Y is free to invert: 1299 // max ~A, Y --> ~(min A, ~Y) 1300 // 1301 // Examples: 1302 // max ~A, ~Y --> ~(min A, Y) 1303 // max ~A, C --> ~(min A, ~C) 1304 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z)) 1305 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { 1306 Value *A; 1307 if (match(X, m_OneUse(m_Not(m_Value(A)))) && 1308 !isFreeToInvert(A, A->hasOneUse()) && 1309 isFreeToInvert(Y, Y->hasOneUse())) { 1310 Value *NotY = Builder.CreateNot(Y); 1311 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID); 1312 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY); 1313 return BinaryOperator::CreateNot(InvMaxMin); 1314 } 1315 return nullptr; 1316 }; 1317 1318 if (Instruction *I = moveNotAfterMinMax(I0, I1)) 1319 return I; 1320 if (Instruction *I = moveNotAfterMinMax(I1, I0)) 1321 return I; 1322 1323 if (Instruction *I = moveAddAfterMinMax(II, Builder)) 1324 return I; 1325 1326 // smax(X, -X) --> abs(X) 1327 // smin(X, -X) --> -abs(X) 1328 // umax(X, -X) --> -abs(X) 1329 // umin(X, -X) --> abs(X) 1330 if (isKnownNegation(I0, I1)) { 1331 // We can choose either operand as the input to abs(), but if we can 1332 // eliminate the only use of a value, that's better for subsequent 1333 // transforms/analysis. 1334 if (I0->hasOneUse() && !I1->hasOneUse()) 1335 std::swap(I0, I1); 1336 1337 // This is some variant of abs(). See if we can propagate 'nsw' to the abs 1338 // operation and potentially its negation. 1339 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true); 1340 Value *Abs = Builder.CreateBinaryIntrinsic( 1341 Intrinsic::abs, I0, 1342 ConstantInt::getBool(II->getContext(), IntMinIsPoison)); 1343 1344 // We don't have a "nabs" intrinsic, so negate if needed based on the 1345 // max/min operation. 1346 if (IID == Intrinsic::smin || IID == Intrinsic::umax) 1347 Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison); 1348 return replaceInstUsesWith(CI, Abs); 1349 } 1350 1351 if (Instruction *Sel = foldClampRangeOfTwo(II, Builder)) 1352 return Sel; 1353 1354 if (Instruction *SAdd = matchSAddSubSat(*II)) 1355 return SAdd; 1356 1357 if (match(I1, m_ImmConstant())) 1358 if (auto *Sel = dyn_cast<SelectInst>(I0)) 1359 if (Instruction *R = FoldOpIntoSelect(*II, Sel)) 1360 return R; 1361 1362 if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II)) 1363 return NewMinMax; 1364 1365 if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder)) 1366 return R; 1367 1368 if (Instruction *NewMinMax = factorizeMinMaxTree(II)) 1369 return NewMinMax; 1370 1371 break; 1372 } 1373 case Intrinsic::bswap: { 1374 Value *IIOperand = II->getArgOperand(0); 1375 Value *X = nullptr; 1376 1377 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as 1378 // inverse-shift-of-bswap: 1379 // bswap (shl X, C) --> lshr (bswap X), C 1380 // bswap (lshr X, C) --> shl (bswap X), C 1381 // TODO: Use knownbits to allow variable shift and non-splat vector match. 1382 BinaryOperator *BO; 1383 if (match(IIOperand, m_OneUse(m_BinOp(BO)))) { 1384 const APInt *C; 1385 if (match(BO, m_LogicalShift(m_Value(X), m_APIntAllowUndef(C))) && 1386 (*C & 7) == 0) { 1387 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X); 1388 BinaryOperator::BinaryOps InverseShift = 1389 BO->getOpcode() == Instruction::Shl ? Instruction::LShr 1390 : Instruction::Shl; 1391 return BinaryOperator::Create(InverseShift, NewSwap, BO->getOperand(1)); 1392 } 1393 } 1394 1395 KnownBits Known = computeKnownBits(IIOperand, 0, II); 1396 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8); 1397 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8); 1398 unsigned BW = Known.getBitWidth(); 1399 1400 // bswap(x) -> shift(x) if x has exactly one "active byte" 1401 if (BW - LZ - TZ == 8) { 1402 assert(LZ != TZ && "active byte cannot be in the middle"); 1403 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x 1404 return BinaryOperator::CreateNUWShl( 1405 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ)); 1406 // -> lshr(x) if the "active byte" is in the high part of x 1407 return BinaryOperator::CreateExactLShr( 1408 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ)); 1409 } 1410 1411 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 1412 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { 1413 unsigned C = X->getType()->getScalarSizeInBits() - BW; 1414 Value *CV = ConstantInt::get(X->getType(), C); 1415 Value *V = Builder.CreateLShr(X, CV); 1416 return new TruncInst(V, IIOperand->getType()); 1417 } 1418 break; 1419 } 1420 case Intrinsic::masked_load: 1421 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II)) 1422 return replaceInstUsesWith(CI, SimplifiedMaskedOp); 1423 break; 1424 case Intrinsic::masked_store: 1425 return simplifyMaskedStore(*II); 1426 case Intrinsic::masked_gather: 1427 return simplifyMaskedGather(*II); 1428 case Intrinsic::masked_scatter: 1429 return simplifyMaskedScatter(*II); 1430 case Intrinsic::launder_invariant_group: 1431 case Intrinsic::strip_invariant_group: 1432 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this)) 1433 return replaceInstUsesWith(*II, SkippedBarrier); 1434 break; 1435 case Intrinsic::powi: 1436 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 1437 // 0 and 1 are handled in instsimplify 1438 // powi(x, -1) -> 1/x 1439 if (Power->isMinusOne()) 1440 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0), 1441 II->getArgOperand(0), II); 1442 // powi(x, 2) -> x*x 1443 if (Power->equalsInt(2)) 1444 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0), 1445 II->getArgOperand(0), II); 1446 1447 if (!Power->getValue()[0]) { 1448 Value *X; 1449 // If power is even: 1450 // powi(-x, p) -> powi(x, p) 1451 // powi(fabs(x), p) -> powi(x, p) 1452 // powi(copysign(x, y), p) -> powi(x, p) 1453 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) || 1454 match(II->getArgOperand(0), m_FAbs(m_Value(X))) || 1455 match(II->getArgOperand(0), 1456 m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value()))) 1457 return replaceOperand(*II, 0, X); 1458 } 1459 } 1460 break; 1461 1462 case Intrinsic::cttz: 1463 case Intrinsic::ctlz: 1464 if (auto *I = foldCttzCtlz(*II, *this)) 1465 return I; 1466 break; 1467 1468 case Intrinsic::ctpop: 1469 if (auto *I = foldCtpop(*II, *this)) 1470 return I; 1471 break; 1472 1473 case Intrinsic::fshl: 1474 case Intrinsic::fshr: { 1475 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1); 1476 Type *Ty = II->getType(); 1477 unsigned BitWidth = Ty->getScalarSizeInBits(); 1478 Constant *ShAmtC; 1479 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && 1480 !ShAmtC->containsConstantExpression()) { 1481 // Canonicalize a shift amount constant operand to modulo the bit-width. 1482 Constant *WidthC = ConstantInt::get(Ty, BitWidth); 1483 Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC); 1484 if (ModuloC != ShAmtC) 1485 return replaceOperand(*II, 2, ModuloC); 1486 1487 assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) == 1488 ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) && 1489 "Shift amount expected to be modulo bitwidth"); 1490 1491 // Canonicalize funnel shift right by constant to funnel shift left. This 1492 // is not entirely arbitrary. For historical reasons, the backend may 1493 // recognize rotate left patterns but miss rotate right patterns. 1494 if (IID == Intrinsic::fshr) { 1495 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) 1496 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC); 1497 Module *Mod = II->getModule(); 1498 Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty); 1499 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC }); 1500 } 1501 assert(IID == Intrinsic::fshl && 1502 "All funnel shifts by simple constants should go left"); 1503 1504 // fshl(X, 0, C) --> shl X, C 1505 // fshl(X, undef, C) --> shl X, C 1506 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef())) 1507 return BinaryOperator::CreateShl(Op0, ShAmtC); 1508 1509 // fshl(0, X, C) --> lshr X, (BW-C) 1510 // fshl(undef, X, C) --> lshr X, (BW-C) 1511 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef())) 1512 return BinaryOperator::CreateLShr(Op1, 1513 ConstantExpr::getSub(WidthC, ShAmtC)); 1514 1515 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form) 1516 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) { 1517 Module *Mod = II->getModule(); 1518 Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty); 1519 return CallInst::Create(Bswap, { Op0 }); 1520 } 1521 } 1522 1523 // Left or right might be masked. 1524 if (SimplifyDemandedInstructionBits(*II)) 1525 return &CI; 1526 1527 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, 1528 // so only the low bits of the shift amount are demanded if the bitwidth is 1529 // a power-of-2. 1530 if (!isPowerOf2_32(BitWidth)) 1531 break; 1532 APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth)); 1533 KnownBits Op2Known(BitWidth); 1534 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known)) 1535 return &CI; 1536 break; 1537 } 1538 case Intrinsic::uadd_with_overflow: 1539 case Intrinsic::sadd_with_overflow: { 1540 if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 1541 return I; 1542 1543 // Given 2 constant operands whose sum does not overflow: 1544 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 1545 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 1546 Value *X; 1547 const APInt *C0, *C1; 1548 Value *Arg0 = II->getArgOperand(0); 1549 Value *Arg1 = II->getArgOperand(1); 1550 bool IsSigned = IID == Intrinsic::sadd_with_overflow; 1551 bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0))) 1552 : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0))); 1553 if (HasNWAdd && match(Arg1, m_APInt(C1))) { 1554 bool Overflow; 1555 APInt NewC = 1556 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow); 1557 if (!Overflow) 1558 return replaceInstUsesWith( 1559 *II, Builder.CreateBinaryIntrinsic( 1560 IID, X, ConstantInt::get(Arg1->getType(), NewC))); 1561 } 1562 break; 1563 } 1564 1565 case Intrinsic::umul_with_overflow: 1566 case Intrinsic::smul_with_overflow: 1567 case Intrinsic::usub_with_overflow: 1568 if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 1569 return I; 1570 break; 1571 1572 case Intrinsic::ssub_with_overflow: { 1573 if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) 1574 return I; 1575 1576 Constant *C; 1577 Value *Arg0 = II->getArgOperand(0); 1578 Value *Arg1 = II->getArgOperand(1); 1579 // Given a constant C that is not the minimum signed value 1580 // for an integer of a given bit width: 1581 // 1582 // ssubo X, C -> saddo X, -C 1583 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) { 1584 Value *NegVal = ConstantExpr::getNeg(C); 1585 // Build a saddo call that is equivalent to the discovered 1586 // ssubo call. 1587 return replaceInstUsesWith( 1588 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, 1589 Arg0, NegVal)); 1590 } 1591 1592 break; 1593 } 1594 1595 case Intrinsic::uadd_sat: 1596 case Intrinsic::sadd_sat: 1597 case Intrinsic::usub_sat: 1598 case Intrinsic::ssub_sat: { 1599 SaturatingInst *SI = cast<SaturatingInst>(II); 1600 Type *Ty = SI->getType(); 1601 Value *Arg0 = SI->getLHS(); 1602 Value *Arg1 = SI->getRHS(); 1603 1604 // Make use of known overflow information. 1605 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(), 1606 Arg0, Arg1, SI); 1607 switch (OR) { 1608 case OverflowResult::MayOverflow: 1609 break; 1610 case OverflowResult::NeverOverflows: 1611 if (SI->isSigned()) 1612 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1); 1613 else 1614 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1); 1615 case OverflowResult::AlwaysOverflowsLow: { 1616 unsigned BitWidth = Ty->getScalarSizeInBits(); 1617 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned()); 1618 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min)); 1619 } 1620 case OverflowResult::AlwaysOverflowsHigh: { 1621 unsigned BitWidth = Ty->getScalarSizeInBits(); 1622 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned()); 1623 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max)); 1624 } 1625 } 1626 1627 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN 1628 Constant *C; 1629 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) && 1630 C->isNotMinSignedValue()) { 1631 Value *NegVal = ConstantExpr::getNeg(C); 1632 return replaceInstUsesWith( 1633 *II, Builder.CreateBinaryIntrinsic( 1634 Intrinsic::sadd_sat, Arg0, NegVal)); 1635 } 1636 1637 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) 1638 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2)) 1639 // if Val and Val2 have the same sign 1640 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) { 1641 Value *X; 1642 const APInt *Val, *Val2; 1643 APInt NewVal; 1644 bool IsUnsigned = 1645 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat; 1646 if (Other->getIntrinsicID() == IID && 1647 match(Arg1, m_APInt(Val)) && 1648 match(Other->getArgOperand(0), m_Value(X)) && 1649 match(Other->getArgOperand(1), m_APInt(Val2))) { 1650 if (IsUnsigned) 1651 NewVal = Val->uadd_sat(*Val2); 1652 else if (Val->isNonNegative() == Val2->isNonNegative()) { 1653 bool Overflow; 1654 NewVal = Val->sadd_ov(*Val2, Overflow); 1655 if (Overflow) { 1656 // Both adds together may add more than SignedMaxValue 1657 // without saturating the final result. 1658 break; 1659 } 1660 } else { 1661 // Cannot fold saturated addition with different signs. 1662 break; 1663 } 1664 1665 return replaceInstUsesWith( 1666 *II, Builder.CreateBinaryIntrinsic( 1667 IID, X, ConstantInt::get(II->getType(), NewVal))); 1668 } 1669 } 1670 break; 1671 } 1672 1673 case Intrinsic::minnum: 1674 case Intrinsic::maxnum: 1675 case Intrinsic::minimum: 1676 case Intrinsic::maximum: { 1677 Value *Arg0 = II->getArgOperand(0); 1678 Value *Arg1 = II->getArgOperand(1); 1679 Value *X, *Y; 1680 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) && 1681 (Arg0->hasOneUse() || Arg1->hasOneUse())) { 1682 // If both operands are negated, invert the call and negate the result: 1683 // min(-X, -Y) --> -(max(X, Y)) 1684 // max(-X, -Y) --> -(min(X, Y)) 1685 Intrinsic::ID NewIID; 1686 switch (IID) { 1687 case Intrinsic::maxnum: 1688 NewIID = Intrinsic::minnum; 1689 break; 1690 case Intrinsic::minnum: 1691 NewIID = Intrinsic::maxnum; 1692 break; 1693 case Intrinsic::maximum: 1694 NewIID = Intrinsic::minimum; 1695 break; 1696 case Intrinsic::minimum: 1697 NewIID = Intrinsic::maximum; 1698 break; 1699 default: 1700 llvm_unreachable("unexpected intrinsic ID"); 1701 } 1702 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II); 1703 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall); 1704 FNeg->copyIRFlags(II); 1705 return FNeg; 1706 } 1707 1708 // m(m(X, C2), C1) -> m(X, C) 1709 const APFloat *C1, *C2; 1710 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) { 1711 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) && 1712 ((match(M->getArgOperand(0), m_Value(X)) && 1713 match(M->getArgOperand(1), m_APFloat(C2))) || 1714 (match(M->getArgOperand(1), m_Value(X)) && 1715 match(M->getArgOperand(0), m_APFloat(C2))))) { 1716 APFloat Res(0.0); 1717 switch (IID) { 1718 case Intrinsic::maxnum: 1719 Res = maxnum(*C1, *C2); 1720 break; 1721 case Intrinsic::minnum: 1722 Res = minnum(*C1, *C2); 1723 break; 1724 case Intrinsic::maximum: 1725 Res = maximum(*C1, *C2); 1726 break; 1727 case Intrinsic::minimum: 1728 Res = minimum(*C1, *C2); 1729 break; 1730 default: 1731 llvm_unreachable("unexpected intrinsic ID"); 1732 } 1733 Instruction *NewCall = Builder.CreateBinaryIntrinsic( 1734 IID, X, ConstantFP::get(Arg0->getType(), Res), II); 1735 // TODO: Conservatively intersecting FMF. If Res == C2, the transform 1736 // was a simplification (so Arg0 and its original flags could 1737 // propagate?) 1738 NewCall->andIRFlags(M); 1739 return replaceInstUsesWith(*II, NewCall); 1740 } 1741 } 1742 1743 // m((fpext X), (fpext Y)) -> fpext (m(X, Y)) 1744 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) && 1745 match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) && 1746 X->getType() == Y->getType()) { 1747 Value *NewCall = 1748 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName()); 1749 return new FPExtInst(NewCall, II->getType()); 1750 } 1751 1752 // max X, -X --> fabs X 1753 // min X, -X --> -(fabs X) 1754 // TODO: Remove one-use limitation? That is obviously better for max. 1755 // It would be an extra instruction for min (fnabs), but that is 1756 // still likely better for analysis and codegen. 1757 if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) || 1758 (match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) { 1759 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II); 1760 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum) 1761 R = Builder.CreateFNegFMF(R, II); 1762 return replaceInstUsesWith(*II, R); 1763 } 1764 1765 break; 1766 } 1767 case Intrinsic::fmuladd: { 1768 // Canonicalize fast fmuladd to the separate fmul + fadd. 1769 if (II->isFast()) { 1770 BuilderTy::FastMathFlagGuard Guard(Builder); 1771 Builder.setFastMathFlags(II->getFastMathFlags()); 1772 Value *Mul = Builder.CreateFMul(II->getArgOperand(0), 1773 II->getArgOperand(1)); 1774 Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2)); 1775 Add->takeName(II); 1776 return replaceInstUsesWith(*II, Add); 1777 } 1778 1779 // Try to simplify the underlying FMul. 1780 if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), 1781 II->getFastMathFlags(), 1782 SQ.getWithInstruction(II))) { 1783 auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); 1784 FAdd->copyFastMathFlags(II); 1785 return FAdd; 1786 } 1787 1788 LLVM_FALLTHROUGH; 1789 } 1790 case Intrinsic::fma: { 1791 // fma fneg(x), fneg(y), z -> fma x, y, z 1792 Value *Src0 = II->getArgOperand(0); 1793 Value *Src1 = II->getArgOperand(1); 1794 Value *X, *Y; 1795 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) { 1796 replaceOperand(*II, 0, X); 1797 replaceOperand(*II, 1, Y); 1798 return II; 1799 } 1800 1801 // fma fabs(x), fabs(x), z -> fma x, x, z 1802 if (match(Src0, m_FAbs(m_Value(X))) && 1803 match(Src1, m_FAbs(m_Specific(X)))) { 1804 replaceOperand(*II, 0, X); 1805 replaceOperand(*II, 1, X); 1806 return II; 1807 } 1808 1809 // Try to simplify the underlying FMul. We can only apply simplifications 1810 // that do not require rounding. 1811 if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), 1812 II->getFastMathFlags(), 1813 SQ.getWithInstruction(II))) { 1814 auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); 1815 FAdd->copyFastMathFlags(II); 1816 return FAdd; 1817 } 1818 1819 // fma x, y, 0 -> fmul x, y 1820 // This is always valid for -0.0, but requires nsz for +0.0 as 1821 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own. 1822 if (match(II->getArgOperand(2), m_NegZeroFP()) || 1823 (match(II->getArgOperand(2), m_PosZeroFP()) && 1824 II->getFastMathFlags().noSignedZeros())) 1825 return BinaryOperator::CreateFMulFMF(Src0, Src1, II); 1826 1827 break; 1828 } 1829 case Intrinsic::copysign: { 1830 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1); 1831 if (SignBitMustBeZero(Sign, &TLI)) { 1832 // If we know that the sign argument is positive, reduce to FABS: 1833 // copysign Mag, +Sign --> fabs Mag 1834 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); 1835 return replaceInstUsesWith(*II, Fabs); 1836 } 1837 // TODO: There should be a ValueTracking sibling like SignBitMustBeOne. 1838 const APFloat *C; 1839 if (match(Sign, m_APFloat(C)) && C->isNegative()) { 1840 // If we know that the sign argument is negative, reduce to FNABS: 1841 // copysign Mag, -Sign --> fneg (fabs Mag) 1842 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); 1843 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II)); 1844 } 1845 1846 // Propagate sign argument through nested calls: 1847 // copysign Mag, (copysign ?, X) --> copysign Mag, X 1848 Value *X; 1849 if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) 1850 return replaceOperand(*II, 1, X); 1851 1852 // Peek through changes of magnitude's sign-bit. This call rewrites those: 1853 // copysign (fabs X), Sign --> copysign X, Sign 1854 // copysign (fneg X), Sign --> copysign X, Sign 1855 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X)))) 1856 return replaceOperand(*II, 0, X); 1857 1858 break; 1859 } 1860 case Intrinsic::fabs: { 1861 Value *Cond, *TVal, *FVal; 1862 if (match(II->getArgOperand(0), 1863 m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) { 1864 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF 1865 if (isa<Constant>(TVal) && isa<Constant>(FVal)) { 1866 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal}); 1867 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal}); 1868 return SelectInst::Create(Cond, AbsT, AbsF); 1869 } 1870 // fabs (select Cond, -FVal, FVal) --> fabs FVal 1871 if (match(TVal, m_FNeg(m_Specific(FVal)))) 1872 return replaceOperand(*II, 0, FVal); 1873 // fabs (select Cond, TVal, -TVal) --> fabs TVal 1874 if (match(FVal, m_FNeg(m_Specific(TVal)))) 1875 return replaceOperand(*II, 0, TVal); 1876 } 1877 1878 LLVM_FALLTHROUGH; 1879 } 1880 case Intrinsic::ceil: 1881 case Intrinsic::floor: 1882 case Intrinsic::round: 1883 case Intrinsic::roundeven: 1884 case Intrinsic::nearbyint: 1885 case Intrinsic::rint: 1886 case Intrinsic::trunc: { 1887 Value *ExtSrc; 1888 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) { 1889 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x) 1890 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II); 1891 return new FPExtInst(NarrowII, II->getType()); 1892 } 1893 break; 1894 } 1895 case Intrinsic::cos: 1896 case Intrinsic::amdgcn_cos: { 1897 Value *X; 1898 Value *Src = II->getArgOperand(0); 1899 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) { 1900 // cos(-x) -> cos(x) 1901 // cos(fabs(x)) -> cos(x) 1902 return replaceOperand(*II, 0, X); 1903 } 1904 break; 1905 } 1906 case Intrinsic::sin: { 1907 Value *X; 1908 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) { 1909 // sin(-x) --> -sin(x) 1910 Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II); 1911 Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin); 1912 FNeg->copyFastMathFlags(II); 1913 return FNeg; 1914 } 1915 break; 1916 } 1917 1918 case Intrinsic::arm_neon_vtbl1: 1919 case Intrinsic::aarch64_neon_tbl1: 1920 if (Value *V = simplifyNeonTbl1(*II, Builder)) 1921 return replaceInstUsesWith(*II, V); 1922 break; 1923 1924 case Intrinsic::arm_neon_vmulls: 1925 case Intrinsic::arm_neon_vmullu: 1926 case Intrinsic::aarch64_neon_smull: 1927 case Intrinsic::aarch64_neon_umull: { 1928 Value *Arg0 = II->getArgOperand(0); 1929 Value *Arg1 = II->getArgOperand(1); 1930 1931 // Handle mul by zero first: 1932 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { 1933 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); 1934 } 1935 1936 // Check for constant LHS & RHS - in this case we just simplify. 1937 bool Zext = (IID == Intrinsic::arm_neon_vmullu || 1938 IID == Intrinsic::aarch64_neon_umull); 1939 VectorType *NewVT = cast<VectorType>(II->getType()); 1940 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { 1941 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { 1942 CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext); 1943 CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext); 1944 1945 return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1)); 1946 } 1947 1948 // Couldn't simplify - canonicalize constant to the RHS. 1949 std::swap(Arg0, Arg1); 1950 } 1951 1952 // Handle mul by one: 1953 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) 1954 if (ConstantInt *Splat = 1955 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) 1956 if (Splat->isOne()) 1957 return CastInst::CreateIntegerCast(Arg0, II->getType(), 1958 /*isSigned=*/!Zext); 1959 1960 break; 1961 } 1962 case Intrinsic::arm_neon_aesd: 1963 case Intrinsic::arm_neon_aese: 1964 case Intrinsic::aarch64_crypto_aesd: 1965 case Intrinsic::aarch64_crypto_aese: { 1966 Value *DataArg = II->getArgOperand(0); 1967 Value *KeyArg = II->getArgOperand(1); 1968 1969 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR 1970 Value *Data, *Key; 1971 if (match(KeyArg, m_ZeroInt()) && 1972 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { 1973 replaceOperand(*II, 0, Data); 1974 replaceOperand(*II, 1, Key); 1975 return II; 1976 } 1977 break; 1978 } 1979 case Intrinsic::hexagon_V6_vandvrt: 1980 case Intrinsic::hexagon_V6_vandvrt_128B: { 1981 // Simplify Q -> V -> Q conversion. 1982 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 1983 Intrinsic::ID ID0 = Op0->getIntrinsicID(); 1984 if (ID0 != Intrinsic::hexagon_V6_vandqrt && 1985 ID0 != Intrinsic::hexagon_V6_vandqrt_128B) 1986 break; 1987 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1); 1988 uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue(); 1989 uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue(); 1990 // Check if every byte has common bits in Bytes and Mask. 1991 uint64_t C = Bytes1 & Mask1; 1992 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000)) 1993 return replaceInstUsesWith(*II, Op0->getArgOperand(0)); 1994 } 1995 break; 1996 } 1997 case Intrinsic::stackrestore: { 1998 enum class ClassifyResult { 1999 None, 2000 Alloca, 2001 StackRestore, 2002 CallWithSideEffects, 2003 }; 2004 auto Classify = [](const Instruction *I) { 2005 if (isa<AllocaInst>(I)) 2006 return ClassifyResult::Alloca; 2007 2008 if (auto *CI = dyn_cast<CallInst>(I)) { 2009 if (auto *II = dyn_cast<IntrinsicInst>(CI)) { 2010 if (II->getIntrinsicID() == Intrinsic::stackrestore) 2011 return ClassifyResult::StackRestore; 2012 2013 if (II->mayHaveSideEffects()) 2014 return ClassifyResult::CallWithSideEffects; 2015 } else { 2016 // Consider all non-intrinsic calls to be side effects 2017 return ClassifyResult::CallWithSideEffects; 2018 } 2019 } 2020 2021 return ClassifyResult::None; 2022 }; 2023 2024 // If the stacksave and the stackrestore are in the same BB, and there is 2025 // no intervening call, alloca, or stackrestore of a different stacksave, 2026 // remove the restore. This can happen when variable allocas are DCE'd. 2027 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 2028 if (SS->getIntrinsicID() == Intrinsic::stacksave && 2029 SS->getParent() == II->getParent()) { 2030 BasicBlock::iterator BI(SS); 2031 bool CannotRemove = false; 2032 for (++BI; &*BI != II; ++BI) { 2033 switch (Classify(&*BI)) { 2034 case ClassifyResult::None: 2035 // So far so good, look at next instructions. 2036 break; 2037 2038 case ClassifyResult::StackRestore: 2039 // If we found an intervening stackrestore for a different 2040 // stacksave, we can't remove the stackrestore. Otherwise, continue. 2041 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS) 2042 CannotRemove = true; 2043 break; 2044 2045 case ClassifyResult::Alloca: 2046 case ClassifyResult::CallWithSideEffects: 2047 // If we found an alloca, a non-intrinsic call, or an intrinsic 2048 // call with side effects, we can't remove the stackrestore. 2049 CannotRemove = true; 2050 break; 2051 } 2052 if (CannotRemove) 2053 break; 2054 } 2055 2056 if (!CannotRemove) 2057 return eraseInstFromFunction(CI); 2058 } 2059 } 2060 2061 // Scan down this block to see if there is another stack restore in the 2062 // same block without an intervening call/alloca. 2063 BasicBlock::iterator BI(II); 2064 Instruction *TI = II->getParent()->getTerminator(); 2065 bool CannotRemove = false; 2066 for (++BI; &*BI != TI; ++BI) { 2067 switch (Classify(&*BI)) { 2068 case ClassifyResult::None: 2069 // So far so good, look at next instructions. 2070 break; 2071 2072 case ClassifyResult::StackRestore: 2073 // If there is a stackrestore below this one, remove this one. 2074 return eraseInstFromFunction(CI); 2075 2076 case ClassifyResult::Alloca: 2077 case ClassifyResult::CallWithSideEffects: 2078 // If we found an alloca, a non-intrinsic call, or an intrinsic call 2079 // with side effects (such as llvm.stacksave and llvm.read_register), 2080 // we can't remove the stack restore. 2081 CannotRemove = true; 2082 break; 2083 } 2084 if (CannotRemove) 2085 break; 2086 } 2087 2088 // If the stack restore is in a return, resume, or unwind block and if there 2089 // are no allocas or calls between the restore and the return, nuke the 2090 // restore. 2091 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) 2092 return eraseInstFromFunction(CI); 2093 break; 2094 } 2095 case Intrinsic::lifetime_end: 2096 // Asan needs to poison memory to detect invalid access which is possible 2097 // even for empty lifetime range. 2098 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || 2099 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) || 2100 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) 2101 break; 2102 2103 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) { 2104 return I.getIntrinsicID() == Intrinsic::lifetime_start; 2105 })) 2106 return nullptr; 2107 break; 2108 case Intrinsic::assume: { 2109 Value *IIOperand = II->getArgOperand(0); 2110 SmallVector<OperandBundleDef, 4> OpBundles; 2111 II->getOperandBundlesAsDefs(OpBundles); 2112 2113 /// This will remove the boolean Condition from the assume given as 2114 /// argument and remove the assume if it becomes useless. 2115 /// always returns nullptr for use as a return values. 2116 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * { 2117 assert(isa<AssumeInst>(Assume)); 2118 if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II))) 2119 return eraseInstFromFunction(CI); 2120 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext())); 2121 return nullptr; 2122 }; 2123 // Remove an assume if it is followed by an identical assume. 2124 // TODO: Do we need this? Unless there are conflicting assumptions, the 2125 // computeKnownBits(IIOperand) below here eliminates redundant assumes. 2126 Instruction *Next = II->getNextNonDebugInstruction(); 2127 if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand)))) 2128 return RemoveConditionFromAssume(Next); 2129 2130 // Canonicalize assume(a && b) -> assume(a); assume(b); 2131 // Note: New assumption intrinsics created here are registered by 2132 // the InstCombineIRInserter object. 2133 FunctionType *AssumeIntrinsicTy = II->getFunctionType(); 2134 Value *AssumeIntrinsic = II->getCalledOperand(); 2135 Value *A, *B; 2136 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) { 2137 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, 2138 II->getName()); 2139 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName()); 2140 return eraseInstFromFunction(*II); 2141 } 2142 // assume(!(a || b)) -> assume(!a); assume(!b); 2143 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) { 2144 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, 2145 Builder.CreateNot(A), OpBundles, II->getName()); 2146 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, 2147 Builder.CreateNot(B), II->getName()); 2148 return eraseInstFromFunction(*II); 2149 } 2150 2151 // assume( (load addr) != null ) -> add 'nonnull' metadata to load 2152 // (if assume is valid at the load) 2153 CmpInst::Predicate Pred; 2154 Instruction *LHS; 2155 if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && 2156 Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && 2157 LHS->getType()->isPointerTy() && 2158 isValidAssumeForContext(II, LHS, &DT)) { 2159 MDNode *MD = MDNode::get(II->getContext(), None); 2160 LHS->setMetadata(LLVMContext::MD_nonnull, MD); 2161 return RemoveConditionFromAssume(II); 2162 2163 // TODO: apply nonnull return attributes to calls and invokes 2164 // TODO: apply range metadata for range check patterns? 2165 } 2166 2167 // Convert nonnull assume like: 2168 // %A = icmp ne i32* %PTR, null 2169 // call void @llvm.assume(i1 %A) 2170 // into 2171 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ] 2172 if (EnableKnowledgeRetention && 2173 match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) && 2174 Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) { 2175 if (auto *Replacement = buildAssumeFromKnowledge( 2176 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) { 2177 2178 Replacement->insertBefore(Next); 2179 AC.registerAssumption(Replacement); 2180 return RemoveConditionFromAssume(II); 2181 } 2182 } 2183 2184 // Convert alignment assume like: 2185 // %B = ptrtoint i32* %A to i64 2186 // %C = and i64 %B, Constant 2187 // %D = icmp eq i64 %C, 0 2188 // call void @llvm.assume(i1 %D) 2189 // into 2190 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)] 2191 uint64_t AlignMask; 2192 if (EnableKnowledgeRetention && 2193 match(IIOperand, 2194 m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)), 2195 m_Zero())) && 2196 Pred == CmpInst::ICMP_EQ) { 2197 if (isPowerOf2_64(AlignMask + 1)) { 2198 uint64_t Offset = 0; 2199 match(A, m_Add(m_Value(A), m_ConstantInt(Offset))); 2200 if (match(A, m_PtrToInt(m_Value(A)))) { 2201 /// Note: this doesn't preserve the offset information but merges 2202 /// offset and alignment. 2203 /// TODO: we can generate a GEP instead of merging the alignment with 2204 /// the offset. 2205 RetainedKnowledge RK{Attribute::Alignment, 2206 (unsigned)MinAlign(Offset, AlignMask + 1), A}; 2207 if (auto *Replacement = 2208 buildAssumeFromKnowledge(RK, Next, &AC, &DT)) { 2209 2210 Replacement->insertAfter(II); 2211 AC.registerAssumption(Replacement); 2212 } 2213 return RemoveConditionFromAssume(II); 2214 } 2215 } 2216 } 2217 2218 /// Canonicalize Knowledge in operand bundles. 2219 if (EnableKnowledgeRetention && II->hasOperandBundles()) { 2220 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { 2221 auto &BOI = II->bundle_op_info_begin()[Idx]; 2222 RetainedKnowledge RK = 2223 llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI); 2224 if (BOI.End - BOI.Begin > 2) 2225 continue; // Prevent reducing knowledge in an align with offset since 2226 // extracting a RetainedKnowledge form them looses offset 2227 // information 2228 RetainedKnowledge CanonRK = 2229 llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK, 2230 &getAssumptionCache(), 2231 &getDominatorTree()); 2232 if (CanonRK == RK) 2233 continue; 2234 if (!CanonRK) { 2235 if (BOI.End - BOI.Begin > 0) { 2236 Worklist.pushValue(II->op_begin()[BOI.Begin]); 2237 Value::dropDroppableUse(II->op_begin()[BOI.Begin]); 2238 } 2239 continue; 2240 } 2241 assert(RK.AttrKind == CanonRK.AttrKind); 2242 if (BOI.End - BOI.Begin > 0) 2243 II->op_begin()[BOI.Begin].set(CanonRK.WasOn); 2244 if (BOI.End - BOI.Begin > 1) 2245 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get( 2246 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue)); 2247 if (RK.WasOn) 2248 Worklist.pushValue(RK.WasOn); 2249 return II; 2250 } 2251 } 2252 2253 // If there is a dominating assume with the same condition as this one, 2254 // then this one is redundant, and should be removed. 2255 KnownBits Known(1); 2256 computeKnownBits(IIOperand, Known, 0, II); 2257 if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) 2258 return eraseInstFromFunction(*II); 2259 2260 // Update the cache of affected values for this assumption (we might be 2261 // here because we just simplified the condition). 2262 AC.updateAffectedValues(cast<AssumeInst>(II)); 2263 break; 2264 } 2265 case Intrinsic::experimental_guard: { 2266 // Is this guard followed by another guard? We scan forward over a small 2267 // fixed window of instructions to handle common cases with conditions 2268 // computed between guards. 2269 Instruction *NextInst = II->getNextNonDebugInstruction(); 2270 for (unsigned i = 0; i < GuardWideningWindow; i++) { 2271 // Note: Using context-free form to avoid compile time blow up 2272 if (!isSafeToSpeculativelyExecute(NextInst)) 2273 break; 2274 NextInst = NextInst->getNextNonDebugInstruction(); 2275 } 2276 Value *NextCond = nullptr; 2277 if (match(NextInst, 2278 m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) { 2279 Value *CurrCond = II->getArgOperand(0); 2280 2281 // Remove a guard that it is immediately preceded by an identical guard. 2282 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). 2283 if (CurrCond != NextCond) { 2284 Instruction *MoveI = II->getNextNonDebugInstruction(); 2285 while (MoveI != NextInst) { 2286 auto *Temp = MoveI; 2287 MoveI = MoveI->getNextNonDebugInstruction(); 2288 Temp->moveBefore(II); 2289 } 2290 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond)); 2291 } 2292 eraseInstFromFunction(*NextInst); 2293 return II; 2294 } 2295 break; 2296 } 2297 case Intrinsic::experimental_vector_insert: { 2298 Value *Vec = II->getArgOperand(0); 2299 Value *SubVec = II->getArgOperand(1); 2300 Value *Idx = II->getArgOperand(2); 2301 auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); 2302 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); 2303 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType()); 2304 2305 // Only canonicalize if the destination vector, Vec, and SubVec are all 2306 // fixed vectors. 2307 if (DstTy && VecTy && SubVecTy) { 2308 unsigned DstNumElts = DstTy->getNumElements(); 2309 unsigned VecNumElts = VecTy->getNumElements(); 2310 unsigned SubVecNumElts = SubVecTy->getNumElements(); 2311 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); 2312 2313 // An insert that entirely overwrites Vec with SubVec is a nop. 2314 if (VecNumElts == SubVecNumElts) 2315 return replaceInstUsesWith(CI, SubVec); 2316 2317 // Widen SubVec into a vector of the same width as Vec, since 2318 // shufflevector requires the two input vectors to be the same width. 2319 // Elements beyond the bounds of SubVec within the widened vector are 2320 // undefined. 2321 SmallVector<int, 8> WidenMask; 2322 unsigned i; 2323 for (i = 0; i != SubVecNumElts; ++i) 2324 WidenMask.push_back(i); 2325 for (; i != VecNumElts; ++i) 2326 WidenMask.push_back(UndefMaskElem); 2327 2328 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); 2329 2330 SmallVector<int, 8> Mask; 2331 for (unsigned i = 0; i != IdxN; ++i) 2332 Mask.push_back(i); 2333 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) 2334 Mask.push_back(i); 2335 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) 2336 Mask.push_back(i); 2337 2338 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); 2339 return replaceInstUsesWith(CI, Shuffle); 2340 } 2341 break; 2342 } 2343 case Intrinsic::experimental_vector_extract: { 2344 Value *Vec = II->getArgOperand(0); 2345 Value *Idx = II->getArgOperand(1); 2346 2347 auto *DstTy = dyn_cast<FixedVectorType>(II->getType()); 2348 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); 2349 2350 // Only canonicalize if the the destination vector and Vec are fixed 2351 // vectors. 2352 if (DstTy && VecTy) { 2353 unsigned DstNumElts = DstTy->getNumElements(); 2354 unsigned VecNumElts = VecTy->getNumElements(); 2355 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); 2356 2357 // Extracting the entirety of Vec is a nop. 2358 if (VecNumElts == DstNumElts) { 2359 replaceInstUsesWith(CI, Vec); 2360 return eraseInstFromFunction(CI); 2361 } 2362 2363 SmallVector<int, 8> Mask; 2364 for (unsigned i = 0; i != DstNumElts; ++i) 2365 Mask.push_back(IdxN + i); 2366 2367 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask); 2368 return replaceInstUsesWith(CI, Shuffle); 2369 } 2370 break; 2371 } 2372 case Intrinsic::experimental_vector_reverse: { 2373 Value *BO0, *BO1, *X, *Y; 2374 Value *Vec = II->getArgOperand(0); 2375 if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) { 2376 auto *OldBinOp = cast<BinaryOperator>(Vec); 2377 if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>( 2378 m_Value(X)))) { 2379 // rev(binop rev(X), rev(Y)) --> binop X, Y 2380 if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>( 2381 m_Value(Y)))) 2382 return replaceInstUsesWith(CI, 2383 BinaryOperator::CreateWithCopiedFlags( 2384 OldBinOp->getOpcode(), X, Y, OldBinOp, 2385 OldBinOp->getName(), II)); 2386 // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat 2387 if (isSplatValue(BO1)) 2388 return replaceInstUsesWith(CI, 2389 BinaryOperator::CreateWithCopiedFlags( 2390 OldBinOp->getOpcode(), X, BO1, 2391 OldBinOp, OldBinOp->getName(), II)); 2392 } 2393 // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y 2394 if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>( 2395 m_Value(Y))) && 2396 isSplatValue(BO0)) 2397 return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags( 2398 OldBinOp->getOpcode(), BO0, Y, 2399 OldBinOp, OldBinOp->getName(), II)); 2400 } 2401 // rev(unop rev(X)) --> unop X 2402 if (match(Vec, m_OneUse(m_UnOp( 2403 m_Intrinsic<Intrinsic::experimental_vector_reverse>( 2404 m_Value(X)))))) { 2405 auto *OldUnOp = cast<UnaryOperator>(Vec); 2406 auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( 2407 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II); 2408 return replaceInstUsesWith(CI, NewUnOp); 2409 } 2410 break; 2411 } 2412 case Intrinsic::vector_reduce_or: 2413 case Intrinsic::vector_reduce_and: { 2414 // Canonicalize logical or/and reductions: 2415 // Or reduction for i1 is represented as: 2416 // %val = bitcast <ReduxWidth x i1> to iReduxWidth 2417 // %res = cmp ne iReduxWidth %val, 0 2418 // And reduction for i1 is represented as: 2419 // %val = bitcast <ReduxWidth x i1> to iReduxWidth 2420 // %res = cmp eq iReduxWidth %val, 11111 2421 Value *Arg = II->getArgOperand(0); 2422 Value *Vect; 2423 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2424 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2425 if (FTy->getElementType() == Builder.getInt1Ty()) { 2426 Value *Res = Builder.CreateBitCast( 2427 Vect, Builder.getIntNTy(FTy->getNumElements())); 2428 if (IID == Intrinsic::vector_reduce_and) { 2429 Res = Builder.CreateICmpEQ( 2430 Res, ConstantInt::getAllOnesValue(Res->getType())); 2431 } else { 2432 assert(IID == Intrinsic::vector_reduce_or && 2433 "Expected or reduction."); 2434 Res = Builder.CreateIsNotNull(Res); 2435 } 2436 if (Arg != Vect) 2437 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res, 2438 II->getType()); 2439 return replaceInstUsesWith(CI, Res); 2440 } 2441 } 2442 LLVM_FALLTHROUGH; 2443 } 2444 case Intrinsic::vector_reduce_add: { 2445 if (IID == Intrinsic::vector_reduce_add) { 2446 // Convert vector_reduce_add(ZExt(<n x i1>)) to 2447 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). 2448 // Convert vector_reduce_add(SExt(<n x i1>)) to 2449 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). 2450 // Convert vector_reduce_add(<n x i1>) to 2451 // Trunc(ctpop(bitcast <n x i1> to in)). 2452 Value *Arg = II->getArgOperand(0); 2453 Value *Vect; 2454 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2455 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2456 if (FTy->getElementType() == Builder.getInt1Ty()) { 2457 Value *V = Builder.CreateBitCast( 2458 Vect, Builder.getIntNTy(FTy->getNumElements())); 2459 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V); 2460 if (Res->getType() != II->getType()) 2461 Res = Builder.CreateZExtOrTrunc(Res, II->getType()); 2462 if (Arg != Vect && 2463 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt) 2464 Res = Builder.CreateNeg(Res); 2465 return replaceInstUsesWith(CI, Res); 2466 } 2467 } 2468 } 2469 LLVM_FALLTHROUGH; 2470 } 2471 case Intrinsic::vector_reduce_xor: { 2472 if (IID == Intrinsic::vector_reduce_xor) { 2473 // Exclusive disjunction reduction over the vector with 2474 // (potentially-extended) i1 element type is actually a 2475 // (potentially-extended) arithmetic `add` reduction over the original 2476 // non-extended value: 2477 // vector_reduce_xor(?ext(<n x i1>)) 2478 // --> 2479 // ?ext(vector_reduce_add(<n x i1>)) 2480 Value *Arg = II->getArgOperand(0); 2481 Value *Vect; 2482 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2483 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2484 if (FTy->getElementType() == Builder.getInt1Ty()) { 2485 Value *Res = Builder.CreateAddReduce(Vect); 2486 if (Arg != Vect) 2487 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res, 2488 II->getType()); 2489 return replaceInstUsesWith(CI, Res); 2490 } 2491 } 2492 } 2493 LLVM_FALLTHROUGH; 2494 } 2495 case Intrinsic::vector_reduce_mul: { 2496 if (IID == Intrinsic::vector_reduce_mul) { 2497 // Multiplicative reduction over the vector with (potentially-extended) 2498 // i1 element type is actually a (potentially zero-extended) 2499 // logical `and` reduction over the original non-extended value: 2500 // vector_reduce_mul(?ext(<n x i1>)) 2501 // --> 2502 // zext(vector_reduce_and(<n x i1>)) 2503 Value *Arg = II->getArgOperand(0); 2504 Value *Vect; 2505 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2506 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2507 if (FTy->getElementType() == Builder.getInt1Ty()) { 2508 Value *Res = Builder.CreateAndReduce(Vect); 2509 if (Res->getType() != II->getType()) 2510 Res = Builder.CreateZExt(Res, II->getType()); 2511 return replaceInstUsesWith(CI, Res); 2512 } 2513 } 2514 } 2515 LLVM_FALLTHROUGH; 2516 } 2517 case Intrinsic::vector_reduce_umin: 2518 case Intrinsic::vector_reduce_umax: { 2519 if (IID == Intrinsic::vector_reduce_umin || 2520 IID == Intrinsic::vector_reduce_umax) { 2521 // UMin/UMax reduction over the vector with (potentially-extended) 2522 // i1 element type is actually a (potentially-extended) 2523 // logical `and`/`or` reduction over the original non-extended value: 2524 // vector_reduce_u{min,max}(?ext(<n x i1>)) 2525 // --> 2526 // ?ext(vector_reduce_{and,or}(<n x i1>)) 2527 Value *Arg = II->getArgOperand(0); 2528 Value *Vect; 2529 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2530 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2531 if (FTy->getElementType() == Builder.getInt1Ty()) { 2532 Value *Res = IID == Intrinsic::vector_reduce_umin 2533 ? Builder.CreateAndReduce(Vect) 2534 : Builder.CreateOrReduce(Vect); 2535 if (Arg != Vect) 2536 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res, 2537 II->getType()); 2538 return replaceInstUsesWith(CI, Res); 2539 } 2540 } 2541 } 2542 LLVM_FALLTHROUGH; 2543 } 2544 case Intrinsic::vector_reduce_smin: 2545 case Intrinsic::vector_reduce_smax: { 2546 if (IID == Intrinsic::vector_reduce_smin || 2547 IID == Intrinsic::vector_reduce_smax) { 2548 // SMin/SMax reduction over the vector with (potentially-extended) 2549 // i1 element type is actually a (potentially-extended) 2550 // logical `and`/`or` reduction over the original non-extended value: 2551 // vector_reduce_s{min,max}(<n x i1>) 2552 // --> 2553 // vector_reduce_{or,and}(<n x i1>) 2554 // and 2555 // vector_reduce_s{min,max}(sext(<n x i1>)) 2556 // --> 2557 // sext(vector_reduce_{or,and}(<n x i1>)) 2558 // and 2559 // vector_reduce_s{min,max}(zext(<n x i1>)) 2560 // --> 2561 // zext(vector_reduce_{and,or}(<n x i1>)) 2562 Value *Arg = II->getArgOperand(0); 2563 Value *Vect; 2564 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { 2565 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) 2566 if (FTy->getElementType() == Builder.getInt1Ty()) { 2567 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd; 2568 if (Arg != Vect) 2569 ExtOpc = cast<CastInst>(Arg)->getOpcode(); 2570 Value *Res = ((IID == Intrinsic::vector_reduce_smin) == 2571 (ExtOpc == Instruction::CastOps::ZExt)) 2572 ? Builder.CreateAndReduce(Vect) 2573 : Builder.CreateOrReduce(Vect); 2574 if (Arg != Vect) 2575 Res = Builder.CreateCast(ExtOpc, Res, II->getType()); 2576 return replaceInstUsesWith(CI, Res); 2577 } 2578 } 2579 } 2580 LLVM_FALLTHROUGH; 2581 } 2582 case Intrinsic::vector_reduce_fmax: 2583 case Intrinsic::vector_reduce_fmin: 2584 case Intrinsic::vector_reduce_fadd: 2585 case Intrinsic::vector_reduce_fmul: { 2586 bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd && 2587 IID != Intrinsic::vector_reduce_fmul) || 2588 II->hasAllowReassoc(); 2589 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || 2590 IID == Intrinsic::vector_reduce_fmul) 2591 ? 1 2592 : 0; 2593 Value *Arg = II->getArgOperand(ArgIdx); 2594 Value *V; 2595 ArrayRef<int> Mask; 2596 if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated || 2597 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) || 2598 !cast<ShuffleVectorInst>(Arg)->isSingleSource()) 2599 break; 2600 int Sz = Mask.size(); 2601 SmallBitVector UsedIndices(Sz); 2602 for (int Idx : Mask) { 2603 if (Idx == UndefMaskElem || UsedIndices.test(Idx)) 2604 break; 2605 UsedIndices.set(Idx); 2606 } 2607 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or 2608 // other changes. 2609 if (UsedIndices.all()) { 2610 replaceUse(II->getOperandUse(ArgIdx), V); 2611 return nullptr; 2612 } 2613 break; 2614 } 2615 default: { 2616 // Handle target specific intrinsics 2617 Optional<Instruction *> V = targetInstCombineIntrinsic(*II); 2618 if (V.hasValue()) 2619 return V.getValue(); 2620 break; 2621 } 2622 } 2623 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke 2624 // context, so it is handled in visitCallBase and we should trigger it. 2625 return visitCallBase(*II); 2626 } 2627 2628 // Fence instruction simplification 2629 Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { 2630 auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction()); 2631 // This check is solely here to handle arbitrary target-dependent syncscopes. 2632 // TODO: Can remove if does not matter in practice. 2633 if (NFI && FI.isIdenticalTo(NFI)) 2634 return eraseInstFromFunction(FI); 2635 2636 // Returns true if FI1 is identical or stronger fence than FI2. 2637 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { 2638 auto FI1SyncScope = FI1->getSyncScopeID(); 2639 // Consider same scope, where scope is global or single-thread. 2640 if (FI1SyncScope != FI2->getSyncScopeID() || 2641 (FI1SyncScope != SyncScope::System && 2642 FI1SyncScope != SyncScope::SingleThread)) 2643 return false; 2644 2645 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering()); 2646 }; 2647 if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) 2648 return eraseInstFromFunction(FI); 2649 2650 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction())) 2651 if (isIdenticalOrStrongerFence(PFI, &FI)) 2652 return eraseInstFromFunction(FI); 2653 return nullptr; 2654 } 2655 2656 // InvokeInst simplification 2657 Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) { 2658 return visitCallBase(II); 2659 } 2660 2661 // CallBrInst simplification 2662 Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { 2663 return visitCallBase(CBI); 2664 } 2665 2666 /// If this cast does not affect the value passed through the varargs area, we 2667 /// can eliminate the use of the cast. 2668 static bool isSafeToEliminateVarargsCast(const CallBase &Call, 2669 const DataLayout &DL, 2670 const CastInst *const CI, 2671 const int ix) { 2672 if (!CI->isLosslessCast()) 2673 return false; 2674 2675 // If this is a GC intrinsic, avoid munging types. We need types for 2676 // statepoint reconstruction in SelectionDAG. 2677 // TODO: This is probably something which should be expanded to all 2678 // intrinsics since the entire point of intrinsics is that 2679 // they are understandable by the optimizer. 2680 if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) || 2681 isa<GCResultInst>(Call)) 2682 return false; 2683 2684 // Opaque pointers are compatible with any byval types. 2685 PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType()); 2686 if (SrcTy->isOpaque()) 2687 return true; 2688 2689 // The size of ByVal or InAlloca arguments is derived from the type, so we 2690 // can't change to a type with a different size. If the size were 2691 // passed explicitly we could avoid this check. 2692 if (!Call.isPassPointeeByValueArgument(ix)) 2693 return true; 2694 2695 // The transform currently only handles type replacement for byval, not other 2696 // type-carrying attributes. 2697 if (!Call.isByValArgument(ix)) 2698 return false; 2699 2700 Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType(); 2701 Type *DstElemTy = Call.getParamByValType(ix); 2702 if (!SrcElemTy->isSized() || !DstElemTy->isSized()) 2703 return false; 2704 if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy)) 2705 return false; 2706 return true; 2707 } 2708 2709 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { 2710 if (!CI->getCalledFunction()) return nullptr; 2711 2712 // Skip optimizing notail and musttail calls so 2713 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. 2714 // LibCallSimplifier::optimizeCall should try to preseve tail calls though. 2715 if (CI->isMustTailCall() || CI->isNoTailCall()) 2716 return nullptr; 2717 2718 auto InstCombineRAUW = [this](Instruction *From, Value *With) { 2719 replaceInstUsesWith(*From, With); 2720 }; 2721 auto InstCombineErase = [this](Instruction *I) { 2722 eraseInstFromFunction(*I); 2723 }; 2724 LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW, 2725 InstCombineErase); 2726 if (Value *With = Simplifier.optimizeCall(CI, Builder)) { 2727 ++NumSimplified; 2728 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); 2729 } 2730 2731 return nullptr; 2732 } 2733 2734 static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) { 2735 // Strip off at most one level of pointer casts, looking for an alloca. This 2736 // is good enough in practice and simpler than handling any number of casts. 2737 Value *Underlying = TrampMem->stripPointerCasts(); 2738 if (Underlying != TrampMem && 2739 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) 2740 return nullptr; 2741 if (!isa<AllocaInst>(Underlying)) 2742 return nullptr; 2743 2744 IntrinsicInst *InitTrampoline = nullptr; 2745 for (User *U : TrampMem->users()) { 2746 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 2747 if (!II) 2748 return nullptr; 2749 if (II->getIntrinsicID() == Intrinsic::init_trampoline) { 2750 if (InitTrampoline) 2751 // More than one init_trampoline writes to this value. Give up. 2752 return nullptr; 2753 InitTrampoline = II; 2754 continue; 2755 } 2756 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) 2757 // Allow any number of calls to adjust.trampoline. 2758 continue; 2759 return nullptr; 2760 } 2761 2762 // No call to init.trampoline found. 2763 if (!InitTrampoline) 2764 return nullptr; 2765 2766 // Check that the alloca is being used in the expected way. 2767 if (InitTrampoline->getOperand(0) != TrampMem) 2768 return nullptr; 2769 2770 return InitTrampoline; 2771 } 2772 2773 static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, 2774 Value *TrampMem) { 2775 // Visit all the previous instructions in the basic block, and try to find a 2776 // init.trampoline which has a direct path to the adjust.trampoline. 2777 for (BasicBlock::iterator I = AdjustTramp->getIterator(), 2778 E = AdjustTramp->getParent()->begin(); 2779 I != E;) { 2780 Instruction *Inst = &*--I; 2781 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 2782 if (II->getIntrinsicID() == Intrinsic::init_trampoline && 2783 II->getOperand(0) == TrampMem) 2784 return II; 2785 if (Inst->mayWriteToMemory()) 2786 return nullptr; 2787 } 2788 return nullptr; 2789 } 2790 2791 // Given a call to llvm.adjust.trampoline, find and return the corresponding 2792 // call to llvm.init.trampoline if the call to the trampoline can be optimized 2793 // to a direct call to a function. Otherwise return NULL. 2794 static IntrinsicInst *findInitTrampoline(Value *Callee) { 2795 Callee = Callee->stripPointerCasts(); 2796 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); 2797 if (!AdjustTramp || 2798 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) 2799 return nullptr; 2800 2801 Value *TrampMem = AdjustTramp->getOperand(0); 2802 2803 if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem)) 2804 return IT; 2805 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem)) 2806 return IT; 2807 return nullptr; 2808 } 2809 2810 bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, 2811 const TargetLibraryInfo *TLI) { 2812 // Note: We only handle cases which can't be driven from generic attributes 2813 // here. So, for example, nonnull and noalias (which are common properties 2814 // of some allocation functions) are expected to be handled via annotation 2815 // of the respective allocator declaration with generic attributes. 2816 bool Changed = false; 2817 2818 if (isAllocationFn(&Call, TLI)) { 2819 uint64_t Size; 2820 ObjectSizeOpts Opts; 2821 if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { 2822 // TODO: We really should just emit deref_or_null here and then 2823 // let the generic inference code combine that with nonnull. 2824 if (Call.hasRetAttr(Attribute::NonNull)) { 2825 Changed = !Call.hasRetAttr(Attribute::Dereferenceable); 2826 Call.addRetAttr( 2827 Attribute::getWithDereferenceableBytes(Call.getContext(), Size)); 2828 } else { 2829 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); 2830 Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( 2831 Call.getContext(), Size)); 2832 } 2833 } 2834 } 2835 2836 // Add alignment attribute if alignment is a power of two constant. 2837 Value *Alignment = getAllocAlignment(&Call, TLI); 2838 if (!Alignment) 2839 return Changed; 2840 2841 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment); 2842 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) { 2843 uint64_t AlignmentVal = AlignOpC->getZExtValue(); 2844 if (llvm::isPowerOf2_64(AlignmentVal)) { 2845 Align ExistingAlign = Call.getRetAlign().valueOrOne(); 2846 Align NewAlign = Align(AlignmentVal); 2847 if (NewAlign > ExistingAlign) { 2848 Call.addRetAttr( 2849 Attribute::getWithAlignment(Call.getContext(), NewAlign)); 2850 Changed = true; 2851 } 2852 } 2853 } 2854 return Changed; 2855 } 2856 2857 /// Improvements for call, callbr and invoke instructions. 2858 Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { 2859 bool Changed = annotateAnyAllocSite(Call, &TLI); 2860 2861 // Mark any parameters that are known to be non-null with the nonnull 2862 // attribute. This is helpful for inlining calls to functions with null 2863 // checks on their arguments. 2864 SmallVector<unsigned, 4> ArgNos; 2865 unsigned ArgNo = 0; 2866 2867 for (Value *V : Call.args()) { 2868 if (V->getType()->isPointerTy() && 2869 !Call.paramHasAttr(ArgNo, Attribute::NonNull) && 2870 isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) 2871 ArgNos.push_back(ArgNo); 2872 ArgNo++; 2873 } 2874 2875 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly."); 2876 2877 if (!ArgNos.empty()) { 2878 AttributeList AS = Call.getAttributes(); 2879 LLVMContext &Ctx = Call.getContext(); 2880 AS = AS.addParamAttribute(Ctx, ArgNos, 2881 Attribute::get(Ctx, Attribute::NonNull)); 2882 Call.setAttributes(AS); 2883 Changed = true; 2884 } 2885 2886 // If the callee is a pointer to a function, attempt to move any casts to the 2887 // arguments of the call/callbr/invoke. 2888 Value *Callee = Call.getCalledOperand(); 2889 Function *CalleeF = dyn_cast<Function>(Callee); 2890 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) && 2891 transformConstExprCastCall(Call)) 2892 return nullptr; 2893 2894 if (CalleeF) { 2895 // Remove the convergent attr on calls when the callee is not convergent. 2896 if (Call.isConvergent() && !CalleeF->isConvergent() && 2897 !CalleeF->isIntrinsic()) { 2898 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call 2899 << "\n"); 2900 Call.setNotConvergent(); 2901 return &Call; 2902 } 2903 2904 // If the call and callee calling conventions don't match, and neither one 2905 // of the calling conventions is compatible with C calling convention 2906 // this call must be unreachable, as the call is undefined. 2907 if ((CalleeF->getCallingConv() != Call.getCallingConv() && 2908 !(CalleeF->getCallingConv() == llvm::CallingConv::C && 2909 TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) && 2910 !(Call.getCallingConv() == llvm::CallingConv::C && 2911 TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) && 2912 // Only do this for calls to a function with a body. A prototype may 2913 // not actually end up matching the implementation's calling conv for a 2914 // variety of reasons (e.g. it may be written in assembly). 2915 !CalleeF->isDeclaration()) { 2916 Instruction *OldCall = &Call; 2917 CreateNonTerminatorUnreachable(OldCall); 2918 // If OldCall does not return void then replaceInstUsesWith poison. 2919 // This allows ValueHandlers and custom metadata to adjust itself. 2920 if (!OldCall->getType()->isVoidTy()) 2921 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType())); 2922 if (isa<CallInst>(OldCall)) 2923 return eraseInstFromFunction(*OldCall); 2924 2925 // We cannot remove an invoke or a callbr, because it would change thexi 2926 // CFG, just change the callee to a null pointer. 2927 cast<CallBase>(OldCall)->setCalledFunction( 2928 CalleeF->getFunctionType(), 2929 Constant::getNullValue(CalleeF->getType())); 2930 return nullptr; 2931 } 2932 } 2933 2934 // Calling a null function pointer is undefined if a null address isn't 2935 // dereferenceable. 2936 if ((isa<ConstantPointerNull>(Callee) && 2937 !NullPointerIsDefined(Call.getFunction())) || 2938 isa<UndefValue>(Callee)) { 2939 // If Call does not return void then replaceInstUsesWith poison. 2940 // This allows ValueHandlers and custom metadata to adjust itself. 2941 if (!Call.getType()->isVoidTy()) 2942 replaceInstUsesWith(Call, PoisonValue::get(Call.getType())); 2943 2944 if (Call.isTerminator()) { 2945 // Can't remove an invoke or callbr because we cannot change the CFG. 2946 return nullptr; 2947 } 2948 2949 // This instruction is not reachable, just remove it. 2950 CreateNonTerminatorUnreachable(&Call); 2951 return eraseInstFromFunction(Call); 2952 } 2953 2954 if (IntrinsicInst *II = findInitTrampoline(Callee)) 2955 return transformCallThroughTrampoline(Call, *II); 2956 2957 // TODO: Drop this transform once opaque pointer transition is done. 2958 FunctionType *FTy = Call.getFunctionType(); 2959 if (FTy->isVarArg()) { 2960 int ix = FTy->getNumParams(); 2961 // See if we can optimize any arguments passed through the varargs area of 2962 // the call. 2963 for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end(); 2964 I != E; ++I, ++ix) { 2965 CastInst *CI = dyn_cast<CastInst>(*I); 2966 if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) { 2967 replaceUse(*I, CI->getOperand(0)); 2968 2969 // Update the byval type to match the pointer type. 2970 // Not necessary for opaque pointers. 2971 PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType()); 2972 if (!NewTy->isOpaque() && Call.isByValArgument(ix)) { 2973 Call.removeParamAttr(ix, Attribute::ByVal); 2974 Call.addParamAttr(ix, Attribute::getWithByValType( 2975 Call.getContext(), 2976 NewTy->getNonOpaquePointerElementType())); 2977 } 2978 Changed = true; 2979 } 2980 } 2981 } 2982 2983 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) { 2984 InlineAsm *IA = cast<InlineAsm>(Callee); 2985 if (!IA->canThrow()) { 2986 // Normal inline asm calls cannot throw - mark them 2987 // 'nounwind'. 2988 Call.setDoesNotThrow(); 2989 Changed = true; 2990 } 2991 } 2992 2993 // Try to optimize the call if possible, we require DataLayout for most of 2994 // this. None of these calls are seen as possibly dead so go ahead and 2995 // delete the instruction now. 2996 if (CallInst *CI = dyn_cast<CallInst>(&Call)) { 2997 Instruction *I = tryOptimizeCall(CI); 2998 // If we changed something return the result, etc. Otherwise let 2999 // the fallthrough check. 3000 if (I) return eraseInstFromFunction(*I); 3001 } 3002 3003 if (!Call.use_empty() && !Call.isMustTailCall()) 3004 if (Value *ReturnedArg = Call.getReturnedArgOperand()) { 3005 Type *CallTy = Call.getType(); 3006 Type *RetArgTy = ReturnedArg->getType(); 3007 if (RetArgTy->canLosslesslyBitCastTo(CallTy)) 3008 return replaceInstUsesWith( 3009 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy)); 3010 } 3011 3012 if (isAllocationFn(&Call, &TLI) && 3013 isAllocRemovable(&cast<CallBase>(Call), &TLI)) 3014 return visitAllocSite(Call); 3015 3016 // Handle intrinsics which can be used in both call and invoke context. 3017 switch (Call.getIntrinsicID()) { 3018 case Intrinsic::experimental_gc_statepoint: { 3019 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call); 3020 SmallPtrSet<Value *, 32> LiveGcValues; 3021 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { 3022 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); 3023 3024 // Remove the relocation if unused. 3025 if (GCR.use_empty()) { 3026 eraseInstFromFunction(GCR); 3027 continue; 3028 } 3029 3030 Value *DerivedPtr = GCR.getDerivedPtr(); 3031 Value *BasePtr = GCR.getBasePtr(); 3032 3033 // Undef is undef, even after relocation. 3034 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) { 3035 replaceInstUsesWith(GCR, UndefValue::get(GCR.getType())); 3036 eraseInstFromFunction(GCR); 3037 continue; 3038 } 3039 3040 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) { 3041 // The relocation of null will be null for most any collector. 3042 // TODO: provide a hook for this in GCStrategy. There might be some 3043 // weird collector this property does not hold for. 3044 if (isa<ConstantPointerNull>(DerivedPtr)) { 3045 // Use null-pointer of gc_relocate's type to replace it. 3046 replaceInstUsesWith(GCR, ConstantPointerNull::get(PT)); 3047 eraseInstFromFunction(GCR); 3048 continue; 3049 } 3050 3051 // isKnownNonNull -> nonnull attribute 3052 if (!GCR.hasRetAttr(Attribute::NonNull) && 3053 isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) { 3054 GCR.addRetAttr(Attribute::NonNull); 3055 // We discovered new fact, re-check users. 3056 Worklist.pushUsersToWorkList(GCR); 3057 } 3058 } 3059 3060 // If we have two copies of the same pointer in the statepoint argument 3061 // list, canonicalize to one. This may let us common gc.relocates. 3062 if (GCR.getBasePtr() == GCR.getDerivedPtr() && 3063 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { 3064 auto *OpIntTy = GCR.getOperand(2)->getType(); 3065 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); 3066 } 3067 3068 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) 3069 // Canonicalize on the type from the uses to the defs 3070 3071 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) 3072 LiveGcValues.insert(BasePtr); 3073 LiveGcValues.insert(DerivedPtr); 3074 } 3075 Optional<OperandBundleUse> Bundle = 3076 GCSP.getOperandBundle(LLVMContext::OB_gc_live); 3077 unsigned NumOfGCLives = LiveGcValues.size(); 3078 if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size()) 3079 break; 3080 // We can reduce the size of gc live bundle. 3081 DenseMap<Value *, unsigned> Val2Idx; 3082 std::vector<Value *> NewLiveGc; 3083 for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) { 3084 Value *V = Bundle->Inputs[I]; 3085 if (Val2Idx.count(V)) 3086 continue; 3087 if (LiveGcValues.count(V)) { 3088 Val2Idx[V] = NewLiveGc.size(); 3089 NewLiveGc.push_back(V); 3090 } else 3091 Val2Idx[V] = NumOfGCLives; 3092 } 3093 // Update all gc.relocates 3094 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { 3095 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); 3096 Value *BasePtr = GCR.getBasePtr(); 3097 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives && 3098 "Missed live gc for base pointer"); 3099 auto *OpIntTy1 = GCR.getOperand(1)->getType(); 3100 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr])); 3101 Value *DerivedPtr = GCR.getDerivedPtr(); 3102 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives && 3103 "Missed live gc for derived pointer"); 3104 auto *OpIntTy2 = GCR.getOperand(2)->getType(); 3105 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr])); 3106 } 3107 // Create new statepoint instruction. 3108 OperandBundleDef NewBundle("gc-live", NewLiveGc); 3109 return CallBase::Create(&Call, NewBundle); 3110 } 3111 default: { break; } 3112 } 3113 3114 return Changed ? &Call : nullptr; 3115 } 3116 3117 /// If the callee is a constexpr cast of a function, attempt to move the cast to 3118 /// the arguments of the call/callbr/invoke. 3119 bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { 3120 auto *Callee = 3121 dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts()); 3122 if (!Callee) 3123 return false; 3124 3125 // If this is a call to a thunk function, don't remove the cast. Thunks are 3126 // used to transparently forward all incoming parameters and outgoing return 3127 // values, so it's important to leave the cast in place. 3128 if (Callee->hasFnAttribute("thunk")) 3129 return false; 3130 3131 // If this is a musttail call, the callee's prototype must match the caller's 3132 // prototype with the exception of pointee types. The code below doesn't 3133 // implement that, so we can't do this transform. 3134 // TODO: Do the transform if it only requires adding pointer casts. 3135 if (Call.isMustTailCall()) 3136 return false; 3137 3138 Instruction *Caller = &Call; 3139 const AttributeList &CallerPAL = Call.getAttributes(); 3140 3141 // Okay, this is a cast from a function to a different type. Unless doing so 3142 // would cause a type conversion of one of our arguments, change this call to 3143 // be a direct call with arguments casted to the appropriate types. 3144 FunctionType *FT = Callee->getFunctionType(); 3145 Type *OldRetTy = Caller->getType(); 3146 Type *NewRetTy = FT->getReturnType(); 3147 3148 // Check to see if we are changing the return type... 3149 if (OldRetTy != NewRetTy) { 3150 3151 if (NewRetTy->isStructTy()) 3152 return false; // TODO: Handle multiple return values. 3153 3154 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) { 3155 if (Callee->isDeclaration()) 3156 return false; // Cannot transform this return value. 3157 3158 if (!Caller->use_empty() && 3159 // void -> non-void is handled specially 3160 !NewRetTy->isVoidTy()) 3161 return false; // Cannot transform this return value. 3162 } 3163 3164 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 3165 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); 3166 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy))) 3167 return false; // Attribute not compatible with transformed value. 3168 } 3169 3170 // If the callbase is an invoke/callbr instruction, and the return value is 3171 // used by a PHI node in a successor, we cannot change the return type of 3172 // the call because there is no place to put the cast instruction (without 3173 // breaking the critical edge). Bail out in this case. 3174 if (!Caller->use_empty()) { 3175 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 3176 for (User *U : II->users()) 3177 if (PHINode *PN = dyn_cast<PHINode>(U)) 3178 if (PN->getParent() == II->getNormalDest() || 3179 PN->getParent() == II->getUnwindDest()) 3180 return false; 3181 // FIXME: Be conservative for callbr to avoid a quadratic search. 3182 if (isa<CallBrInst>(Caller)) 3183 return false; 3184 } 3185 } 3186 3187 unsigned NumActualArgs = Call.arg_size(); 3188 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 3189 3190 // Prevent us turning: 3191 // declare void @takes_i32_inalloca(i32* inalloca) 3192 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) 3193 // 3194 // into: 3195 // call void @takes_i32_inalloca(i32* null) 3196 // 3197 // Similarly, avoid folding away bitcasts of byval calls. 3198 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 3199 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 3200 return false; 3201 3202 auto AI = Call.arg_begin(); 3203 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 3204 Type *ParamTy = FT->getParamType(i); 3205 Type *ActTy = (*AI)->getType(); 3206 3207 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL)) 3208 return false; // Cannot transform this parameter value. 3209 3210 // Check if there are any incompatible attributes we cannot drop safely. 3211 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i)) 3212 .overlaps(AttributeFuncs::typeIncompatible( 3213 ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP))) 3214 return false; // Attribute not compatible with transformed value. 3215 3216 if (Call.isInAllocaArgument(i) || 3217 CallerPAL.hasParamAttr(i, Attribute::Preallocated)) 3218 return false; // Cannot transform to and from inalloca/preallocated. 3219 3220 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError)) 3221 return false; 3222 3223 // If the parameter is passed as a byval argument, then we have to have a 3224 // sized type and the sized type has to have the same size as the old type. 3225 if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) { 3226 PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); 3227 if (!ParamPTy) 3228 return false; 3229 3230 if (!ParamPTy->isOpaque()) { 3231 Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType(); 3232 if (!ParamElTy->isSized()) 3233 return false; 3234 3235 Type *CurElTy = Call.getParamByValType(i); 3236 if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy)) 3237 return false; 3238 } 3239 } 3240 } 3241 3242 if (Callee->isDeclaration()) { 3243 // Do not delete arguments unless we have a function body. 3244 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) 3245 return false; 3246 3247 // If the callee is just a declaration, don't change the varargsness of the 3248 // call. We don't want to introduce a varargs call where one doesn't 3249 // already exist. 3250 if (FT->isVarArg() != Call.getFunctionType()->isVarArg()) 3251 return false; 3252 3253 // If both the callee and the cast type are varargs, we still have to make 3254 // sure the number of fixed parameters are the same or we have the same 3255 // ABI issues as if we introduce a varargs call. 3256 if (FT->isVarArg() && Call.getFunctionType()->isVarArg() && 3257 FT->getNumParams() != Call.getFunctionType()->getNumParams()) 3258 return false; 3259 } 3260 3261 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 3262 !CallerPAL.isEmpty()) { 3263 // In this case we have more arguments than the new function type, but we 3264 // won't be dropping them. Check that these extra arguments have attributes 3265 // that are compatible with being a vararg call argument. 3266 unsigned SRetIdx; 3267 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) && 3268 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams()) 3269 return false; 3270 } 3271 3272 // Okay, we decided that this is a safe thing to do: go ahead and start 3273 // inserting cast instructions as necessary. 3274 SmallVector<Value *, 8> Args; 3275 SmallVector<AttributeSet, 8> ArgAttrs; 3276 Args.reserve(NumActualArgs); 3277 ArgAttrs.reserve(NumActualArgs); 3278 3279 // Get any return attributes. 3280 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); 3281 3282 // If the return value is not being used, the type may not be compatible 3283 // with the existing attributes. Wipe out any problematic attributes. 3284 RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy)); 3285 3286 LLVMContext &Ctx = Call.getContext(); 3287 AI = Call.arg_begin(); 3288 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 3289 Type *ParamTy = FT->getParamType(i); 3290 3291 Value *NewArg = *AI; 3292 if ((*AI)->getType() != ParamTy) 3293 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); 3294 Args.push_back(NewArg); 3295 3296 // Add any parameter attributes except the ones incompatible with the new 3297 // type. Note that we made sure all incompatible ones are safe to drop. 3298 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( 3299 ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP); 3300 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) && 3301 !ParamTy->isOpaquePointerTy()) { 3302 AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes( 3303 Ctx, IncompatibleAttrs)); 3304 AB.addByValAttr(ParamTy->getNonOpaquePointerElementType()); 3305 ArgAttrs.push_back(AttributeSet::get(Ctx, AB)); 3306 } else { 3307 ArgAttrs.push_back( 3308 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs)); 3309 } 3310 } 3311 3312 // If the function takes more arguments than the call was taking, add them 3313 // now. 3314 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) { 3315 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 3316 ArgAttrs.push_back(AttributeSet()); 3317 } 3318 3319 // If we are removing arguments to the function, emit an obnoxious warning. 3320 if (FT->getNumParams() < NumActualArgs) { 3321 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 3322 if (FT->isVarArg()) { 3323 // Add all of the arguments in their promoted form to the arg list. 3324 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 3325 Type *PTy = getPromotedType((*AI)->getType()); 3326 Value *NewArg = *AI; 3327 if (PTy != (*AI)->getType()) { 3328 // Must promote to pass through va_arg area! 3329 Instruction::CastOps opcode = 3330 CastInst::getCastOpcode(*AI, false, PTy, false); 3331 NewArg = Builder.CreateCast(opcode, *AI, PTy); 3332 } 3333 Args.push_back(NewArg); 3334 3335 // Add any parameter attributes. 3336 ArgAttrs.push_back(CallerPAL.getParamAttrs(i)); 3337 } 3338 } 3339 } 3340 3341 AttributeSet FnAttrs = CallerPAL.getFnAttrs(); 3342 3343 if (NewRetTy->isVoidTy()) 3344 Caller->setName(""); // Void type should not have a name. 3345 3346 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) && 3347 "missing argument attributes"); 3348 AttributeList NewCallerPAL = AttributeList::get( 3349 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs); 3350 3351 SmallVector<OperandBundleDef, 1> OpBundles; 3352 Call.getOperandBundlesAsDefs(OpBundles); 3353 3354 CallBase *NewCall; 3355 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 3356 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(), 3357 II->getUnwindDest(), Args, OpBundles); 3358 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) { 3359 NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(), 3360 CBI->getIndirectDests(), Args, OpBundles); 3361 } else { 3362 NewCall = Builder.CreateCall(Callee, Args, OpBundles); 3363 cast<CallInst>(NewCall)->setTailCallKind( 3364 cast<CallInst>(Caller)->getTailCallKind()); 3365 } 3366 NewCall->takeName(Caller); 3367 NewCall->setCallingConv(Call.getCallingConv()); 3368 NewCall->setAttributes(NewCallerPAL); 3369 3370 // Preserve prof metadata if any. 3371 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof}); 3372 3373 // Insert a cast of the return type as necessary. 3374 Instruction *NC = NewCall; 3375 Value *NV = NC; 3376 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 3377 if (!NV->getType()->isVoidTy()) { 3378 NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy); 3379 NC->setDebugLoc(Caller->getDebugLoc()); 3380 3381 // If this is an invoke/callbr instruction, we should insert it after the 3382 // first non-phi instruction in the normal successor block. 3383 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 3384 BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); 3385 InsertNewInstBefore(NC, *I); 3386 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) { 3387 BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt(); 3388 InsertNewInstBefore(NC, *I); 3389 } else { 3390 // Otherwise, it's a call, just insert cast right after the call. 3391 InsertNewInstBefore(NC, *Caller); 3392 } 3393 Worklist.pushUsersToWorkList(*Caller); 3394 } else { 3395 NV = UndefValue::get(Caller->getType()); 3396 } 3397 } 3398 3399 if (!Caller->use_empty()) 3400 replaceInstUsesWith(*Caller, NV); 3401 else if (Caller->hasValueHandle()) { 3402 if (OldRetTy == NV->getType()) 3403 ValueHandleBase::ValueIsRAUWd(Caller, NV); 3404 else 3405 // We cannot call ValueIsRAUWd with a different type, and the 3406 // actual tracked value will disappear. 3407 ValueHandleBase::ValueIsDeleted(Caller); 3408 } 3409 3410 eraseInstFromFunction(*Caller); 3411 return true; 3412 } 3413 3414 /// Turn a call to a function created by init_trampoline / adjust_trampoline 3415 /// intrinsic pair into a direct call to the underlying function. 3416 Instruction * 3417 InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, 3418 IntrinsicInst &Tramp) { 3419 Value *Callee = Call.getCalledOperand(); 3420 Type *CalleeTy = Callee->getType(); 3421 FunctionType *FTy = Call.getFunctionType(); 3422 AttributeList Attrs = Call.getAttributes(); 3423 3424 // If the call already has the 'nest' attribute somewhere then give up - 3425 // otherwise 'nest' would occur twice after splicing in the chain. 3426 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 3427 return nullptr; 3428 3429 Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts()); 3430 FunctionType *NestFTy = NestF->getFunctionType(); 3431 3432 AttributeList NestAttrs = NestF->getAttributes(); 3433 if (!NestAttrs.isEmpty()) { 3434 unsigned NestArgNo = 0; 3435 Type *NestTy = nullptr; 3436 AttributeSet NestAttr; 3437 3438 // Look for a parameter marked with the 'nest' attribute. 3439 for (FunctionType::param_iterator I = NestFTy->param_begin(), 3440 E = NestFTy->param_end(); 3441 I != E; ++NestArgNo, ++I) { 3442 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo); 3443 if (AS.hasAttribute(Attribute::Nest)) { 3444 // Record the parameter type and any other attributes. 3445 NestTy = *I; 3446 NestAttr = AS; 3447 break; 3448 } 3449 } 3450 3451 if (NestTy) { 3452 std::vector<Value*> NewArgs; 3453 std::vector<AttributeSet> NewArgAttrs; 3454 NewArgs.reserve(Call.arg_size() + 1); 3455 NewArgAttrs.reserve(Call.arg_size()); 3456 3457 // Insert the nest argument into the call argument list, which may 3458 // mean appending it. Likewise for attributes. 3459 3460 { 3461 unsigned ArgNo = 0; 3462 auto I = Call.arg_begin(), E = Call.arg_end(); 3463 do { 3464 if (ArgNo == NestArgNo) { 3465 // Add the chain argument and attributes. 3466 Value *NestVal = Tramp.getArgOperand(2); 3467 if (NestVal->getType() != NestTy) 3468 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest"); 3469 NewArgs.push_back(NestVal); 3470 NewArgAttrs.push_back(NestAttr); 3471 } 3472 3473 if (I == E) 3474 break; 3475 3476 // Add the original argument and attributes. 3477 NewArgs.push_back(*I); 3478 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo)); 3479 3480 ++ArgNo; 3481 ++I; 3482 } while (true); 3483 } 3484 3485 // The trampoline may have been bitcast to a bogus type (FTy). 3486 // Handle this by synthesizing a new function type, equal to FTy 3487 // with the chain parameter inserted. 3488 3489 std::vector<Type*> NewTypes; 3490 NewTypes.reserve(FTy->getNumParams()+1); 3491 3492 // Insert the chain's type into the list of parameter types, which may 3493 // mean appending it. 3494 { 3495 unsigned ArgNo = 0; 3496 FunctionType::param_iterator I = FTy->param_begin(), 3497 E = FTy->param_end(); 3498 3499 do { 3500 if (ArgNo == NestArgNo) 3501 // Add the chain's type. 3502 NewTypes.push_back(NestTy); 3503 3504 if (I == E) 3505 break; 3506 3507 // Add the original type. 3508 NewTypes.push_back(*I); 3509 3510 ++ArgNo; 3511 ++I; 3512 } while (true); 3513 } 3514 3515 // Replace the trampoline call with a direct call. Let the generic 3516 // code sort out any function type mismatches. 3517 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 3518 FTy->isVarArg()); 3519 Constant *NewCallee = 3520 NestF->getType() == PointerType::getUnqual(NewFTy) ? 3521 NestF : ConstantExpr::getBitCast(NestF, 3522 PointerType::getUnqual(NewFTy)); 3523 AttributeList NewPAL = 3524 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(), 3525 Attrs.getRetAttrs(), NewArgAttrs); 3526 3527 SmallVector<OperandBundleDef, 1> OpBundles; 3528 Call.getOperandBundlesAsDefs(OpBundles); 3529 3530 Instruction *NewCaller; 3531 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) { 3532 NewCaller = InvokeInst::Create(NewFTy, NewCallee, 3533 II->getNormalDest(), II->getUnwindDest(), 3534 NewArgs, OpBundles); 3535 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 3536 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 3537 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) { 3538 NewCaller = 3539 CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(), 3540 CBI->getIndirectDests(), NewArgs, OpBundles); 3541 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv()); 3542 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL); 3543 } else { 3544 NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles); 3545 cast<CallInst>(NewCaller)->setTailCallKind( 3546 cast<CallInst>(Call).getTailCallKind()); 3547 cast<CallInst>(NewCaller)->setCallingConv( 3548 cast<CallInst>(Call).getCallingConv()); 3549 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 3550 } 3551 NewCaller->setDebugLoc(Call.getDebugLoc()); 3552 3553 return NewCaller; 3554 } 3555 } 3556 3557 // Replace the trampoline call with a direct call. Since there is no 'nest' 3558 // parameter, there is no need to adjust the argument list. Let the generic 3559 // code sort out any function type mismatches. 3560 Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy); 3561 Call.setCalledFunction(FTy, NewCallee); 3562 return &Call; 3563 } 3564