1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for load, store and alloca. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/Loads.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/IntrinsicInst.h" 19 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 20 #include "llvm/Transforms/Utils/Local.h" 21 using namespace llvm; 22 23 #define DEBUG_TYPE "instcombine" 24 25 STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 26 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 27 28 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 29 /// some part of a constant global variable. This intentionally only accepts 30 /// constant expressions because we can't rewrite arbitrary instructions. 31 static bool pointsToConstantGlobal(Value *V) { 32 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 33 return GV->isConstant(); 34 35 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 36 if (CE->getOpcode() == Instruction::BitCast || 37 CE->getOpcode() == Instruction::AddrSpaceCast || 38 CE->getOpcode() == Instruction::GetElementPtr) 39 return pointsToConstantGlobal(CE->getOperand(0)); 40 } 41 return false; 42 } 43 44 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 45 /// pointer to an alloca. Ignore any reads of the pointer, return false if we 46 /// see any stores or other unknown uses. If we see pointer arithmetic, keep 47 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 48 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 49 /// the alloca, and if the source pointer is a pointer to a constant global, we 50 /// can optimize this. 51 static bool 52 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 53 SmallVectorImpl<Instruction *> &ToDelete) { 54 // We track lifetime intrinsics as we encounter them. If we decide to go 55 // ahead and replace the value with the global, this lets the caller quickly 56 // eliminate the markers. 57 58 SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; 59 ValuesToInspect.push_back(std::make_pair(V, false)); 60 while (!ValuesToInspect.empty()) { 61 auto ValuePair = ValuesToInspect.pop_back_val(); 62 const bool IsOffset = ValuePair.second; 63 for (auto &U : ValuePair.first->uses()) { 64 Instruction *I = cast<Instruction>(U.getUser()); 65 66 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 67 // Ignore non-volatile loads, they are always ok. 68 if (!LI->isSimple()) return false; 69 continue; 70 } 71 72 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 73 // If uses of the bitcast are ok, we are ok. 74 ValuesToInspect.push_back(std::make_pair(I, IsOffset)); 75 continue; 76 } 77 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 78 // If the GEP has all zero indices, it doesn't offset the pointer. If it 79 // doesn't, it does. 80 ValuesToInspect.push_back( 81 std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); 82 continue; 83 } 84 85 if (CallSite CS = I) { 86 // If this is the function being called then we treat it like a load and 87 // ignore it. 88 if (CS.isCallee(&U)) 89 continue; 90 91 // Inalloca arguments are clobbered by the call. 92 unsigned ArgNo = CS.getArgumentNo(&U); 93 if (CS.isInAllocaArgument(ArgNo)) 94 return false; 95 96 // If this is a readonly/readnone call site, then we know it is just a 97 // load (but one that potentially returns the value itself), so we can 98 // ignore it if we know that the value isn't captured. 99 if (CS.onlyReadsMemory() && 100 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 101 continue; 102 103 // If this is being passed as a byval argument, the caller is making a 104 // copy, so it is only a read of the alloca. 105 if (CS.isByValArgument(ArgNo)) 106 continue; 107 } 108 109 // Lifetime intrinsics can be handled by the caller. 110 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 111 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 112 II->getIntrinsicID() == Intrinsic::lifetime_end) { 113 assert(II->use_empty() && "Lifetime markers have no result to use!"); 114 ToDelete.push_back(II); 115 continue; 116 } 117 } 118 119 // If this is isn't our memcpy/memmove, reject it as something we can't 120 // handle. 121 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 122 if (!MI) 123 return false; 124 125 // If the transfer is using the alloca as a source of the transfer, then 126 // ignore it since it is a load (unless the transfer is volatile). 127 if (U.getOperandNo() == 1) { 128 if (MI->isVolatile()) return false; 129 continue; 130 } 131 132 // If we already have seen a copy, reject the second one. 133 if (TheCopy) return false; 134 135 // If the pointer has been offset from the start of the alloca, we can't 136 // safely handle this. 137 if (IsOffset) return false; 138 139 // If the memintrinsic isn't using the alloca as the dest, reject it. 140 if (U.getOperandNo() != 0) return false; 141 142 // If the source of the memcpy/move is not a constant global, reject it. 143 if (!pointsToConstantGlobal(MI->getSource())) 144 return false; 145 146 // Otherwise, the transform is safe. Remember the copy instruction. 147 TheCopy = MI; 148 } 149 } 150 return true; 151 } 152 153 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 154 /// modified by a copy from a constant global. If we can prove this, we can 155 /// replace any uses of the alloca with uses of the global directly. 156 static MemTransferInst * 157 isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 158 SmallVectorImpl<Instruction *> &ToDelete) { 159 MemTransferInst *TheCopy = nullptr; 160 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 161 return TheCopy; 162 return nullptr; 163 } 164 165 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 166 // Ensure that the alloca array size argument has type intptr_t, so that 167 // any casting is exposed early. 168 if (DL) { 169 Type *IntPtrTy = DL->getIntPtrType(AI.getType()); 170 if (AI.getArraySize()->getType() != IntPtrTy) { 171 Value *V = Builder->CreateIntCast(AI.getArraySize(), 172 IntPtrTy, false); 173 AI.setOperand(0, V); 174 return &AI; 175 } 176 } 177 178 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 179 if (AI.isArrayAllocation()) { // Check C != 1 180 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 181 Type *NewTy = 182 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 183 AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 184 New->setAlignment(AI.getAlignment()); 185 186 // Scan to the end of the allocation instructions, to skip over a block of 187 // allocas if possible...also skip interleaved debug info 188 // 189 BasicBlock::iterator It = New; 190 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 191 192 // Now that I is pointing to the first non-allocation-inst in the block, 193 // insert our getelementptr instruction... 194 // 195 Type *IdxTy = DL 196 ? DL->getIntPtrType(AI.getType()) 197 : Type::getInt64Ty(AI.getContext()); 198 Value *NullIdx = Constant::getNullValue(IdxTy); 199 Value *Idx[2] = { NullIdx, NullIdx }; 200 Instruction *GEP = 201 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 202 InsertNewInstBefore(GEP, *It); 203 204 // Now make everything use the getelementptr instead of the original 205 // allocation. 206 return ReplaceInstUsesWith(AI, GEP); 207 } else if (isa<UndefValue>(AI.getArraySize())) { 208 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 209 } 210 } 211 212 if (DL && AI.getAllocatedType()->isSized()) { 213 // If the alignment is 0 (unspecified), assign it the preferred alignment. 214 if (AI.getAlignment() == 0) 215 AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); 216 217 // Move all alloca's of zero byte objects to the entry block and merge them 218 // together. Note that we only do this for alloca's, because malloc should 219 // allocate and return a unique pointer, even for a zero byte allocation. 220 if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { 221 // For a zero sized alloca there is no point in doing an array allocation. 222 // This is helpful if the array size is a complicated expression not used 223 // elsewhere. 224 if (AI.isArrayAllocation()) { 225 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 226 return &AI; 227 } 228 229 // Get the first instruction in the entry block. 230 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 231 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 232 if (FirstInst != &AI) { 233 // If the entry block doesn't start with a zero-size alloca then move 234 // this one to the start of the entry block. There is no problem with 235 // dominance as the array size was forced to a constant earlier already. 236 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 237 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 238 DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 239 AI.moveBefore(FirstInst); 240 return &AI; 241 } 242 243 // If the alignment of the entry block alloca is 0 (unspecified), 244 // assign it the preferred alignment. 245 if (EntryAI->getAlignment() == 0) 246 EntryAI->setAlignment( 247 DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); 248 // Replace this zero-sized alloca with the one at the start of the entry 249 // block after ensuring that the address will be aligned enough for both 250 // types. 251 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 252 AI.getAlignment()); 253 EntryAI->setAlignment(MaxAlign); 254 if (AI.getType() != EntryAI->getType()) 255 return new BitCastInst(EntryAI, AI.getType()); 256 return ReplaceInstUsesWith(AI, EntryAI); 257 } 258 } 259 } 260 261 if (AI.getAlignment()) { 262 // Check to see if this allocation is only modified by a memcpy/memmove from 263 // a constant global whose alignment is equal to or exceeds that of the 264 // allocation. If this is the case, we can change all users to use 265 // the constant global instead. This is commonly produced by the CFE by 266 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 267 // is only subsequently read. 268 SmallVector<Instruction *, 4> ToDelete; 269 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 270 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 271 AI.getAlignment(), 272 DL, AT, &AI, DT); 273 if (AI.getAlignment() <= SourceAlign) { 274 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 275 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 276 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 277 EraseInstFromFunction(*ToDelete[i]); 278 Constant *TheSrc = cast<Constant>(Copy->getSource()); 279 Constant *Cast 280 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 281 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 282 EraseInstFromFunction(*Copy); 283 ++NumGlobalCopies; 284 return NewI; 285 } 286 } 287 } 288 289 // At last, use the generic allocation site handler to aggressively remove 290 // unused allocas. 291 return visitAllocSite(AI); 292 } 293 294 295 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 296 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 297 const DataLayout *DL) { 298 User *CI = cast<User>(LI.getOperand(0)); 299 Value *CastOp = CI->getOperand(0); 300 301 PointerType *DestTy = cast<PointerType>(CI->getType()); 302 Type *DestPTy = DestTy->getElementType(); 303 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 304 305 // If the address spaces don't match, don't eliminate the cast. 306 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 307 return nullptr; 308 309 Type *SrcPTy = SrcTy->getElementType(); 310 311 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 312 DestPTy->isVectorTy()) { 313 // If the source is an array, the code below will not succeed. Check to 314 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 315 // constants. 316 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 317 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 318 if (ASrcTy->getNumElements() != 0) { 319 Type *IdxTy = DL 320 ? DL->getIntPtrType(SrcTy) 321 : Type::getInt64Ty(SrcTy->getContext()); 322 Value *Idx = Constant::getNullValue(IdxTy); 323 Value *Idxs[2] = { Idx, Idx }; 324 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 325 SrcTy = cast<PointerType>(CastOp->getType()); 326 SrcPTy = SrcTy->getElementType(); 327 } 328 329 if (IC.getDataLayout() && 330 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 331 SrcPTy->isVectorTy()) && 332 // Do not allow turning this into a load of an integer, which is then 333 // casted to a pointer, this pessimizes pointer analysis a lot. 334 (SrcPTy->isPtrOrPtrVectorTy() == 335 LI.getType()->isPtrOrPtrVectorTy()) && 336 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 337 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 338 339 // Okay, we are casting from one integer or pointer type to another of 340 // the same size. Instead of casting the pointer before the load, cast 341 // the result of the loaded value. 342 LoadInst *NewLoad = 343 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 344 NewLoad->setAlignment(LI.getAlignment()); 345 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 346 // Now cast the result of the load. 347 PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); 348 PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); 349 if (OldTy && NewTy && 350 OldTy->getAddressSpace() != NewTy->getAddressSpace()) { 351 return new AddrSpaceCastInst(NewLoad, LI.getType()); 352 } 353 354 return new BitCastInst(NewLoad, LI.getType()); 355 } 356 } 357 } 358 return nullptr; 359 } 360 361 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 362 Value *Op = LI.getOperand(0); 363 364 // Attempt to improve the alignment. 365 if (DL) { 366 unsigned KnownAlign = 367 getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()), 368 DL, AT, &LI, DT); 369 unsigned LoadAlign = LI.getAlignment(); 370 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 371 DL->getABITypeAlignment(LI.getType()); 372 373 if (KnownAlign > EffectiveLoadAlign) 374 LI.setAlignment(KnownAlign); 375 else if (LoadAlign == 0) 376 LI.setAlignment(EffectiveLoadAlign); 377 } 378 379 // load (cast X) --> cast (load X) iff safe. 380 if (isa<CastInst>(Op)) 381 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 382 return Res; 383 384 // None of the following transforms are legal for volatile/atomic loads. 385 // FIXME: Some of it is okay for atomic loads; needs refactoring. 386 if (!LI.isSimple()) return nullptr; 387 388 // Do really simple store-to-load forwarding and load CSE, to catch cases 389 // where there are several consecutive memory accesses to the same location, 390 // separated by a few arithmetic operations. 391 BasicBlock::iterator BBI = &LI; 392 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 393 return ReplaceInstUsesWith(LI, AvailableVal); 394 395 // load(gep null, ...) -> unreachable 396 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 397 const Value *GEPI0 = GEPI->getOperand(0); 398 // TODO: Consider a target hook for valid address spaces for this xform. 399 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 400 // Insert a new store to null instruction before the load to indicate 401 // that this code is not reachable. We do this instead of inserting 402 // an unreachable instruction directly because we cannot modify the 403 // CFG. 404 new StoreInst(UndefValue::get(LI.getType()), 405 Constant::getNullValue(Op->getType()), &LI); 406 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 407 } 408 } 409 410 // load null/undef -> unreachable 411 // TODO: Consider a target hook for valid address spaces for this xform. 412 if (isa<UndefValue>(Op) || 413 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 414 // Insert a new store to null instruction before the load to indicate that 415 // this code is not reachable. We do this instead of inserting an 416 // unreachable instruction directly because we cannot modify the CFG. 417 new StoreInst(UndefValue::get(LI.getType()), 418 Constant::getNullValue(Op->getType()), &LI); 419 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 420 } 421 422 // Instcombine load (constantexpr_cast global) -> cast (load global) 423 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 424 if (CE->isCast()) 425 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 426 return Res; 427 428 if (Op->hasOneUse()) { 429 // Change select and PHI nodes to select values instead of addresses: this 430 // helps alias analysis out a lot, allows many others simplifications, and 431 // exposes redundancy in the code. 432 // 433 // Note that we cannot do the transformation unless we know that the 434 // introduced loads cannot trap! Something like this is valid as long as 435 // the condition is always false: load (select bool %C, int* null, int* %G), 436 // but it would not be valid if we transformed it to load from null 437 // unconditionally. 438 // 439 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 440 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 441 unsigned Align = LI.getAlignment(); 442 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && 443 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { 444 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 445 SI->getOperand(1)->getName()+".val"); 446 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 447 SI->getOperand(2)->getName()+".val"); 448 V1->setAlignment(Align); 449 V2->setAlignment(Align); 450 return SelectInst::Create(SI->getCondition(), V1, V2); 451 } 452 453 // load (select (cond, null, P)) -> load P 454 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 455 if (C->isNullValue()) { 456 LI.setOperand(0, SI->getOperand(2)); 457 return &LI; 458 } 459 460 // load (select (cond, P, null)) -> load P 461 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 462 if (C->isNullValue()) { 463 LI.setOperand(0, SI->getOperand(1)); 464 return &LI; 465 } 466 } 467 } 468 return nullptr; 469 } 470 471 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 472 /// when possible. This makes it generally easy to do alias analysis and/or 473 /// SROA/mem2reg of the memory object. 474 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 475 User *CI = cast<User>(SI.getOperand(1)); 476 Value *CastOp = CI->getOperand(0); 477 478 Type *DestPTy = CI->getType()->getPointerElementType(); 479 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 480 if (!SrcTy) return nullptr; 481 482 Type *SrcPTy = SrcTy->getElementType(); 483 484 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 485 return nullptr; 486 487 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 488 /// to its first element. This allows us to handle things like: 489 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 490 /// on 32-bit hosts. 491 SmallVector<Value*, 4> NewGEPIndices; 492 493 // If the source is an array, the code below will not succeed. Check to 494 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 495 // constants. 496 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 497 // Index through pointer. 498 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 499 NewGEPIndices.push_back(Zero); 500 501 while (1) { 502 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 503 if (!STy->getNumElements()) /* Struct can be empty {} */ 504 break; 505 NewGEPIndices.push_back(Zero); 506 SrcPTy = STy->getElementType(0); 507 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 508 NewGEPIndices.push_back(Zero); 509 SrcPTy = ATy->getElementType(); 510 } else { 511 break; 512 } 513 } 514 515 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 516 } 517 518 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 519 return nullptr; 520 521 // If the pointers point into different address spaces don't do the 522 // transformation. 523 if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) 524 return nullptr; 525 526 // If the pointers point to values of different sizes don't do the 527 // transformation. 528 if (!IC.getDataLayout() || 529 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 530 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 531 return nullptr; 532 533 // If the pointers point to pointers to different address spaces don't do the 534 // transformation. It is not safe to introduce an addrspacecast instruction in 535 // this case since, depending on the target, addrspacecast may not be a no-op 536 // cast. 537 if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && 538 SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) 539 return nullptr; 540 541 // Okay, we are casting from one integer or pointer type to another of 542 // the same size. Instead of casting the pointer before 543 // the store, cast the value to be stored. 544 Value *NewCast; 545 Instruction::CastOps opcode = Instruction::BitCast; 546 Type* CastSrcTy = DestPTy; 547 Type* CastDstTy = SrcPTy; 548 if (CastDstTy->isPointerTy()) { 549 if (CastSrcTy->isIntegerTy()) 550 opcode = Instruction::IntToPtr; 551 } else if (CastDstTy->isIntegerTy()) { 552 if (CastSrcTy->isPointerTy()) 553 opcode = Instruction::PtrToInt; 554 } 555 556 // SIOp0 is a pointer to aggregate and this is a store to the first field, 557 // emit a GEP to index into its first field. 558 if (!NewGEPIndices.empty()) 559 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 560 561 Value *SIOp0 = SI.getOperand(0); 562 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 563 SIOp0->getName()+".c"); 564 SI.setOperand(0, NewCast); 565 SI.setOperand(1, CastOp); 566 return &SI; 567 } 568 569 /// equivalentAddressValues - Test if A and B will obviously have the same 570 /// value. This includes recognizing that %t0 and %t1 will have the same 571 /// value in code like this: 572 /// %t0 = getelementptr \@a, 0, 3 573 /// store i32 0, i32* %t0 574 /// %t1 = getelementptr \@a, 0, 3 575 /// %t2 = load i32* %t1 576 /// 577 static bool equivalentAddressValues(Value *A, Value *B) { 578 // Test if the values are trivially equivalent. 579 if (A == B) return true; 580 581 // Test if the values come form identical arithmetic instructions. 582 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 583 // its only used to compare two uses within the same basic block, which 584 // means that they'll always either have the same value or one of them 585 // will have an undefined value. 586 if (isa<BinaryOperator>(A) || 587 isa<CastInst>(A) || 588 isa<PHINode>(A) || 589 isa<GetElementPtrInst>(A)) 590 if (Instruction *BI = dyn_cast<Instruction>(B)) 591 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 592 return true; 593 594 // Otherwise they may not be equivalent. 595 return false; 596 } 597 598 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 599 Value *Val = SI.getOperand(0); 600 Value *Ptr = SI.getOperand(1); 601 602 // Attempt to improve the alignment. 603 if (DL) { 604 unsigned KnownAlign = 605 getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), 606 DL, AT, &SI, DT); 607 unsigned StoreAlign = SI.getAlignment(); 608 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 609 DL->getABITypeAlignment(Val->getType()); 610 611 if (KnownAlign > EffectiveStoreAlign) 612 SI.setAlignment(KnownAlign); 613 else if (StoreAlign == 0) 614 SI.setAlignment(EffectiveStoreAlign); 615 } 616 617 // Don't hack volatile/atomic stores. 618 // FIXME: Some bits are legal for atomic stores; needs refactoring. 619 if (!SI.isSimple()) return nullptr; 620 621 // If the RHS is an alloca with a single use, zapify the store, making the 622 // alloca dead. 623 if (Ptr->hasOneUse()) { 624 if (isa<AllocaInst>(Ptr)) 625 return EraseInstFromFunction(SI); 626 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 627 if (isa<AllocaInst>(GEP->getOperand(0))) { 628 if (GEP->getOperand(0)->hasOneUse()) 629 return EraseInstFromFunction(SI); 630 } 631 } 632 } 633 634 // Do really simple DSE, to catch cases where there are several consecutive 635 // stores to the same location, separated by a few arithmetic operations. This 636 // situation often occurs with bitfield accesses. 637 BasicBlock::iterator BBI = &SI; 638 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 639 --ScanInsts) { 640 --BBI; 641 // Don't count debug info directives, lest they affect codegen, 642 // and we skip pointer-to-pointer bitcasts, which are NOPs. 643 if (isa<DbgInfoIntrinsic>(BBI) || 644 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 645 ScanInsts++; 646 continue; 647 } 648 649 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 650 // Prev store isn't volatile, and stores to the same location? 651 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 652 SI.getOperand(1))) { 653 ++NumDeadStore; 654 ++BBI; 655 EraseInstFromFunction(*PrevSI); 656 continue; 657 } 658 break; 659 } 660 661 // If this is a load, we have to stop. However, if the loaded value is from 662 // the pointer we're loading and is producing the pointer we're storing, 663 // then *this* store is dead (X = load P; store X -> P). 664 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 665 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 666 LI->isSimple()) 667 return EraseInstFromFunction(SI); 668 669 // Otherwise, this is a load from some other location. Stores before it 670 // may not be dead. 671 break; 672 } 673 674 // Don't skip over loads or things that can modify memory. 675 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 676 break; 677 } 678 679 // store X, null -> turns into 'unreachable' in SimplifyCFG 680 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 681 if (!isa<UndefValue>(Val)) { 682 SI.setOperand(0, UndefValue::get(Val->getType())); 683 if (Instruction *U = dyn_cast<Instruction>(Val)) 684 Worklist.Add(U); // Dropped a use. 685 } 686 return nullptr; // Do not modify these! 687 } 688 689 // store undef, Ptr -> noop 690 if (isa<UndefValue>(Val)) 691 return EraseInstFromFunction(SI); 692 693 // If the pointer destination is a cast, see if we can fold the cast into the 694 // source instead. 695 if (isa<CastInst>(Ptr)) 696 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 697 return Res; 698 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 699 if (CE->isCast()) 700 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 701 return Res; 702 703 704 // If this store is the last instruction in the basic block (possibly 705 // excepting debug info instructions), and if the block ends with an 706 // unconditional branch, try to move it to the successor block. 707 BBI = &SI; 708 do { 709 ++BBI; 710 } while (isa<DbgInfoIntrinsic>(BBI) || 711 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 712 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 713 if (BI->isUnconditional()) 714 if (SimplifyStoreAtEndOfBlock(SI)) 715 return nullptr; // xform done! 716 717 return nullptr; 718 } 719 720 /// SimplifyStoreAtEndOfBlock - Turn things like: 721 /// if () { *P = v1; } else { *P = v2 } 722 /// into a phi node with a store in the successor. 723 /// 724 /// Simplify things like: 725 /// *P = v1; if () { *P = v2; } 726 /// into a phi node with a store in the successor. 727 /// 728 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 729 BasicBlock *StoreBB = SI.getParent(); 730 731 // Check to see if the successor block has exactly two incoming edges. If 732 // so, see if the other predecessor contains a store to the same location. 733 // if so, insert a PHI node (if needed) and move the stores down. 734 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 735 736 // Determine whether Dest has exactly two predecessors and, if so, compute 737 // the other predecessor. 738 pred_iterator PI = pred_begin(DestBB); 739 BasicBlock *P = *PI; 740 BasicBlock *OtherBB = nullptr; 741 742 if (P != StoreBB) 743 OtherBB = P; 744 745 if (++PI == pred_end(DestBB)) 746 return false; 747 748 P = *PI; 749 if (P != StoreBB) { 750 if (OtherBB) 751 return false; 752 OtherBB = P; 753 } 754 if (++PI != pred_end(DestBB)) 755 return false; 756 757 // Bail out if all the relevant blocks aren't distinct (this can happen, 758 // for example, if SI is in an infinite loop) 759 if (StoreBB == DestBB || OtherBB == DestBB) 760 return false; 761 762 // Verify that the other block ends in a branch and is not otherwise empty. 763 BasicBlock::iterator BBI = OtherBB->getTerminator(); 764 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 765 if (!OtherBr || BBI == OtherBB->begin()) 766 return false; 767 768 // If the other block ends in an unconditional branch, check for the 'if then 769 // else' case. there is an instruction before the branch. 770 StoreInst *OtherStore = nullptr; 771 if (OtherBr->isUnconditional()) { 772 --BBI; 773 // Skip over debugging info. 774 while (isa<DbgInfoIntrinsic>(BBI) || 775 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 776 if (BBI==OtherBB->begin()) 777 return false; 778 --BBI; 779 } 780 // If this isn't a store, isn't a store to the same location, or is not the 781 // right kind of store, bail out. 782 OtherStore = dyn_cast<StoreInst>(BBI); 783 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 784 !SI.isSameOperationAs(OtherStore)) 785 return false; 786 } else { 787 // Otherwise, the other block ended with a conditional branch. If one of the 788 // destinations is StoreBB, then we have the if/then case. 789 if (OtherBr->getSuccessor(0) != StoreBB && 790 OtherBr->getSuccessor(1) != StoreBB) 791 return false; 792 793 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 794 // if/then triangle. See if there is a store to the same ptr as SI that 795 // lives in OtherBB. 796 for (;; --BBI) { 797 // Check to see if we find the matching store. 798 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 799 if (OtherStore->getOperand(1) != SI.getOperand(1) || 800 !SI.isSameOperationAs(OtherStore)) 801 return false; 802 break; 803 } 804 // If we find something that may be using or overwriting the stored 805 // value, or if we run out of instructions, we can't do the xform. 806 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 807 BBI == OtherBB->begin()) 808 return false; 809 } 810 811 // In order to eliminate the store in OtherBr, we have to 812 // make sure nothing reads or overwrites the stored value in 813 // StoreBB. 814 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 815 // FIXME: This should really be AA driven. 816 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 817 return false; 818 } 819 } 820 821 // Insert a PHI node now if we need it. 822 Value *MergedVal = OtherStore->getOperand(0); 823 if (MergedVal != SI.getOperand(0)) { 824 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 825 PN->addIncoming(SI.getOperand(0), SI.getParent()); 826 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 827 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 828 } 829 830 // Advance to a place where it is safe to insert the new store and 831 // insert it. 832 BBI = DestBB->getFirstInsertionPt(); 833 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 834 SI.isVolatile(), 835 SI.getAlignment(), 836 SI.getOrdering(), 837 SI.getSynchScope()); 838 InsertNewInstBefore(NewSI, *BBI); 839 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 840 841 // If the two stores had AA tags, merge them. 842 AAMDNodes AATags; 843 SI.getAAMetadata(AATags); 844 if (AATags) { 845 OtherStore->getAAMetadata(AATags, /* Merge = */ true); 846 NewSI->setAAMetadata(AATags); 847 } 848 849 // Nuke the old stores. 850 EraseInstFromFunction(SI); 851 EraseInstFromFunction(*OtherStore); 852 return true; 853 } 854