1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for load, store and alloca. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/Loads.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/IntrinsicInst.h" 19 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 20 #include "llvm/Transforms/Utils/Local.h" 21 using namespace llvm; 22 23 #define DEBUG_TYPE "instcombine" 24 25 STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 26 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 27 28 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 29 /// some part of a constant global variable. This intentionally only accepts 30 /// constant expressions because we can't rewrite arbitrary instructions. 31 static bool pointsToConstantGlobal(Value *V) { 32 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 33 return GV->isConstant(); 34 35 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 36 if (CE->getOpcode() == Instruction::BitCast || 37 CE->getOpcode() == Instruction::AddrSpaceCast || 38 CE->getOpcode() == Instruction::GetElementPtr) 39 return pointsToConstantGlobal(CE->getOperand(0)); 40 } 41 return false; 42 } 43 44 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 45 /// pointer to an alloca. Ignore any reads of the pointer, return false if we 46 /// see any stores or other unknown uses. If we see pointer arithmetic, keep 47 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 48 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 49 /// the alloca, and if the source pointer is a pointer to a constant global, we 50 /// can optimize this. 51 static bool 52 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 53 SmallVectorImpl<Instruction *> &ToDelete) { 54 // We track lifetime intrinsics as we encounter them. If we decide to go 55 // ahead and replace the value with the global, this lets the caller quickly 56 // eliminate the markers. 57 58 SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; 59 ValuesToInspect.push_back(std::make_pair(V, false)); 60 while (!ValuesToInspect.empty()) { 61 auto ValuePair = ValuesToInspect.pop_back_val(); 62 const bool IsOffset = ValuePair.second; 63 for (auto &U : ValuePair.first->uses()) { 64 Instruction *I = cast<Instruction>(U.getUser()); 65 66 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 67 // Ignore non-volatile loads, they are always ok. 68 if (!LI->isSimple()) return false; 69 continue; 70 } 71 72 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 73 // If uses of the bitcast are ok, we are ok. 74 ValuesToInspect.push_back(std::make_pair(I, IsOffset)); 75 continue; 76 } 77 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 78 // If the GEP has all zero indices, it doesn't offset the pointer. If it 79 // doesn't, it does. 80 ValuesToInspect.push_back( 81 std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); 82 continue; 83 } 84 85 if (CallSite CS = I) { 86 // If this is the function being called then we treat it like a load and 87 // ignore it. 88 if (CS.isCallee(&U)) 89 continue; 90 91 // Inalloca arguments are clobbered by the call. 92 unsigned ArgNo = CS.getArgumentNo(&U); 93 if (CS.isInAllocaArgument(ArgNo)) 94 return false; 95 96 // If this is a readonly/readnone call site, then we know it is just a 97 // load (but one that potentially returns the value itself), so we can 98 // ignore it if we know that the value isn't captured. 99 if (CS.onlyReadsMemory() && 100 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 101 continue; 102 103 // If this is being passed as a byval argument, the caller is making a 104 // copy, so it is only a read of the alloca. 105 if (CS.isByValArgument(ArgNo)) 106 continue; 107 } 108 109 // Lifetime intrinsics can be handled by the caller. 110 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 111 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 112 II->getIntrinsicID() == Intrinsic::lifetime_end) { 113 assert(II->use_empty() && "Lifetime markers have no result to use!"); 114 ToDelete.push_back(II); 115 continue; 116 } 117 } 118 119 // If this is isn't our memcpy/memmove, reject it as something we can't 120 // handle. 121 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 122 if (!MI) 123 return false; 124 125 // If the transfer is using the alloca as a source of the transfer, then 126 // ignore it since it is a load (unless the transfer is volatile). 127 if (U.getOperandNo() == 1) { 128 if (MI->isVolatile()) return false; 129 continue; 130 } 131 132 // If we already have seen a copy, reject the second one. 133 if (TheCopy) return false; 134 135 // If the pointer has been offset from the start of the alloca, we can't 136 // safely handle this. 137 if (IsOffset) return false; 138 139 // If the memintrinsic isn't using the alloca as the dest, reject it. 140 if (U.getOperandNo() != 0) return false; 141 142 // If the source of the memcpy/move is not a constant global, reject it. 143 if (!pointsToConstantGlobal(MI->getSource())) 144 return false; 145 146 // Otherwise, the transform is safe. Remember the copy instruction. 147 TheCopy = MI; 148 } 149 } 150 return true; 151 } 152 153 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 154 /// modified by a copy from a constant global. If we can prove this, we can 155 /// replace any uses of the alloca with uses of the global directly. 156 static MemTransferInst * 157 isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 158 SmallVectorImpl<Instruction *> &ToDelete) { 159 MemTransferInst *TheCopy = nullptr; 160 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 161 return TheCopy; 162 return nullptr; 163 } 164 165 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 166 // Ensure that the alloca array size argument has type intptr_t, so that 167 // any casting is exposed early. 168 if (DL) { 169 Type *IntPtrTy = DL->getIntPtrType(AI.getType()); 170 if (AI.getArraySize()->getType() != IntPtrTy) { 171 Value *V = Builder->CreateIntCast(AI.getArraySize(), 172 IntPtrTy, false); 173 AI.setOperand(0, V); 174 return &AI; 175 } 176 } 177 178 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 179 if (AI.isArrayAllocation()) { // Check C != 1 180 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 181 Type *NewTy = 182 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 183 AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 184 New->setAlignment(AI.getAlignment()); 185 186 // Scan to the end of the allocation instructions, to skip over a block of 187 // allocas if possible...also skip interleaved debug info 188 // 189 BasicBlock::iterator It = New; 190 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 191 192 // Now that I is pointing to the first non-allocation-inst in the block, 193 // insert our getelementptr instruction... 194 // 195 Type *IdxTy = DL 196 ? DL->getIntPtrType(AI.getType()) 197 : Type::getInt64Ty(AI.getContext()); 198 Value *NullIdx = Constant::getNullValue(IdxTy); 199 Value *Idx[2] = { NullIdx, NullIdx }; 200 Instruction *GEP = 201 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 202 InsertNewInstBefore(GEP, *It); 203 204 // Now make everything use the getelementptr instead of the original 205 // allocation. 206 return ReplaceInstUsesWith(AI, GEP); 207 } else if (isa<UndefValue>(AI.getArraySize())) { 208 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 209 } 210 } 211 212 if (DL && AI.getAllocatedType()->isSized()) { 213 // If the alignment is 0 (unspecified), assign it the preferred alignment. 214 if (AI.getAlignment() == 0) 215 AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); 216 217 // Move all alloca's of zero byte objects to the entry block and merge them 218 // together. Note that we only do this for alloca's, because malloc should 219 // allocate and return a unique pointer, even for a zero byte allocation. 220 if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { 221 // For a zero sized alloca there is no point in doing an array allocation. 222 // This is helpful if the array size is a complicated expression not used 223 // elsewhere. 224 if (AI.isArrayAllocation()) { 225 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 226 return &AI; 227 } 228 229 // Get the first instruction in the entry block. 230 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 231 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 232 if (FirstInst != &AI) { 233 // If the entry block doesn't start with a zero-size alloca then move 234 // this one to the start of the entry block. There is no problem with 235 // dominance as the array size was forced to a constant earlier already. 236 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 237 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 238 DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 239 AI.moveBefore(FirstInst); 240 return &AI; 241 } 242 243 // If the alignment of the entry block alloca is 0 (unspecified), 244 // assign it the preferred alignment. 245 if (EntryAI->getAlignment() == 0) 246 EntryAI->setAlignment( 247 DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); 248 // Replace this zero-sized alloca with the one at the start of the entry 249 // block after ensuring that the address will be aligned enough for both 250 // types. 251 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 252 AI.getAlignment()); 253 EntryAI->setAlignment(MaxAlign); 254 if (AI.getType() != EntryAI->getType()) 255 return new BitCastInst(EntryAI, AI.getType()); 256 return ReplaceInstUsesWith(AI, EntryAI); 257 } 258 } 259 } 260 261 if (AI.getAlignment()) { 262 // Check to see if this allocation is only modified by a memcpy/memmove from 263 // a constant global whose alignment is equal to or exceeds that of the 264 // allocation. If this is the case, we can change all users to use 265 // the constant global instead. This is commonly produced by the CFE by 266 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 267 // is only subsequently read. 268 SmallVector<Instruction *, 4> ToDelete; 269 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 270 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 271 AI.getAlignment(), DL); 272 if (AI.getAlignment() <= SourceAlign) { 273 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 274 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 275 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 276 EraseInstFromFunction(*ToDelete[i]); 277 Constant *TheSrc = cast<Constant>(Copy->getSource()); 278 Constant *Cast 279 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 280 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 281 EraseInstFromFunction(*Copy); 282 ++NumGlobalCopies; 283 return NewI; 284 } 285 } 286 } 287 288 // At last, use the generic allocation site handler to aggressively remove 289 // unused allocas. 290 return visitAllocSite(AI); 291 } 292 293 294 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 295 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 296 const DataLayout *DL) { 297 User *CI = cast<User>(LI.getOperand(0)); 298 Value *CastOp = CI->getOperand(0); 299 300 PointerType *DestTy = cast<PointerType>(CI->getType()); 301 Type *DestPTy = DestTy->getElementType(); 302 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 303 304 // If the address spaces don't match, don't eliminate the cast. 305 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 306 return nullptr; 307 308 Type *SrcPTy = SrcTy->getElementType(); 309 310 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 311 DestPTy->isVectorTy()) { 312 // If the source is an array, the code below will not succeed. Check to 313 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 314 // constants. 315 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 316 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 317 if (ASrcTy->getNumElements() != 0) { 318 Type *IdxTy = DL 319 ? DL->getIntPtrType(SrcTy) 320 : Type::getInt64Ty(SrcTy->getContext()); 321 Value *Idx = Constant::getNullValue(IdxTy); 322 Value *Idxs[2] = { Idx, Idx }; 323 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 324 SrcTy = cast<PointerType>(CastOp->getType()); 325 SrcPTy = SrcTy->getElementType(); 326 } 327 328 if (IC.getDataLayout() && 329 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 330 SrcPTy->isVectorTy()) && 331 // Do not allow turning this into a load of an integer, which is then 332 // casted to a pointer, this pessimizes pointer analysis a lot. 333 (SrcPTy->isPtrOrPtrVectorTy() == 334 LI.getType()->isPtrOrPtrVectorTy()) && 335 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 336 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 337 338 // Okay, we are casting from one integer or pointer type to another of 339 // the same size. Instead of casting the pointer before the load, cast 340 // the result of the loaded value. 341 LoadInst *NewLoad = 342 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 343 NewLoad->setAlignment(LI.getAlignment()); 344 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 345 // Now cast the result of the load. 346 PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); 347 PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); 348 if (OldTy && NewTy && 349 OldTy->getAddressSpace() != NewTy->getAddressSpace()) { 350 return new AddrSpaceCastInst(NewLoad, LI.getType()); 351 } 352 353 return new BitCastInst(NewLoad, LI.getType()); 354 } 355 } 356 } 357 return nullptr; 358 } 359 360 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 361 Value *Op = LI.getOperand(0); 362 363 // Attempt to improve the alignment. 364 if (DL) { 365 unsigned KnownAlign = 366 getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL); 367 unsigned LoadAlign = LI.getAlignment(); 368 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 369 DL->getABITypeAlignment(LI.getType()); 370 371 if (KnownAlign > EffectiveLoadAlign) 372 LI.setAlignment(KnownAlign); 373 else if (LoadAlign == 0) 374 LI.setAlignment(EffectiveLoadAlign); 375 } 376 377 // load (cast X) --> cast (load X) iff safe. 378 if (isa<CastInst>(Op)) 379 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 380 return Res; 381 382 // None of the following transforms are legal for volatile/atomic loads. 383 // FIXME: Some of it is okay for atomic loads; needs refactoring. 384 if (!LI.isSimple()) return nullptr; 385 386 // Do really simple store-to-load forwarding and load CSE, to catch cases 387 // where there are several consecutive memory accesses to the same location, 388 // separated by a few arithmetic operations. 389 BasicBlock::iterator BBI = &LI; 390 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 391 return ReplaceInstUsesWith(LI, AvailableVal); 392 393 // load(gep null, ...) -> unreachable 394 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 395 const Value *GEPI0 = GEPI->getOperand(0); 396 // TODO: Consider a target hook for valid address spaces for this xform. 397 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 398 // Insert a new store to null instruction before the load to indicate 399 // that this code is not reachable. We do this instead of inserting 400 // an unreachable instruction directly because we cannot modify the 401 // CFG. 402 new StoreInst(UndefValue::get(LI.getType()), 403 Constant::getNullValue(Op->getType()), &LI); 404 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 405 } 406 } 407 408 // load null/undef -> unreachable 409 // TODO: Consider a target hook for valid address spaces for this xform. 410 if (isa<UndefValue>(Op) || 411 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 412 // Insert a new store to null instruction before the load to indicate that 413 // this code is not reachable. We do this instead of inserting an 414 // unreachable instruction directly because we cannot modify the CFG. 415 new StoreInst(UndefValue::get(LI.getType()), 416 Constant::getNullValue(Op->getType()), &LI); 417 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 418 } 419 420 // Instcombine load (constantexpr_cast global) -> cast (load global) 421 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 422 if (CE->isCast()) 423 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 424 return Res; 425 426 if (Op->hasOneUse()) { 427 // Change select and PHI nodes to select values instead of addresses: this 428 // helps alias analysis out a lot, allows many others simplifications, and 429 // exposes redundancy in the code. 430 // 431 // Note that we cannot do the transformation unless we know that the 432 // introduced loads cannot trap! Something like this is valid as long as 433 // the condition is always false: load (select bool %C, int* null, int* %G), 434 // but it would not be valid if we transformed it to load from null 435 // unconditionally. 436 // 437 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 438 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 439 unsigned Align = LI.getAlignment(); 440 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && 441 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { 442 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 443 SI->getOperand(1)->getName()+".val"); 444 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 445 SI->getOperand(2)->getName()+".val"); 446 V1->setAlignment(Align); 447 V2->setAlignment(Align); 448 return SelectInst::Create(SI->getCondition(), V1, V2); 449 } 450 451 // load (select (cond, null, P)) -> load P 452 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 453 if (C->isNullValue()) { 454 LI.setOperand(0, SI->getOperand(2)); 455 return &LI; 456 } 457 458 // load (select (cond, P, null)) -> load P 459 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 460 if (C->isNullValue()) { 461 LI.setOperand(0, SI->getOperand(1)); 462 return &LI; 463 } 464 } 465 } 466 return nullptr; 467 } 468 469 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 470 /// when possible. This makes it generally easy to do alias analysis and/or 471 /// SROA/mem2reg of the memory object. 472 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 473 User *CI = cast<User>(SI.getOperand(1)); 474 Value *CastOp = CI->getOperand(0); 475 476 Type *DestPTy = CI->getType()->getPointerElementType(); 477 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 478 if (!SrcTy) return nullptr; 479 480 Type *SrcPTy = SrcTy->getElementType(); 481 482 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 483 return nullptr; 484 485 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 486 /// to its first element. This allows us to handle things like: 487 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 488 /// on 32-bit hosts. 489 SmallVector<Value*, 4> NewGEPIndices; 490 491 // If the source is an array, the code below will not succeed. Check to 492 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 493 // constants. 494 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 495 // Index through pointer. 496 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 497 NewGEPIndices.push_back(Zero); 498 499 while (1) { 500 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 501 if (!STy->getNumElements()) /* Struct can be empty {} */ 502 break; 503 NewGEPIndices.push_back(Zero); 504 SrcPTy = STy->getElementType(0); 505 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 506 NewGEPIndices.push_back(Zero); 507 SrcPTy = ATy->getElementType(); 508 } else { 509 break; 510 } 511 } 512 513 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 514 } 515 516 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 517 return nullptr; 518 519 // If the pointers point into different address spaces don't do the 520 // transformation. 521 if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) 522 return nullptr; 523 524 // If the pointers point to values of different sizes don't do the 525 // transformation. 526 if (!IC.getDataLayout() || 527 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 528 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 529 return nullptr; 530 531 // If the pointers point to pointers to different address spaces don't do the 532 // transformation. It is not safe to introduce an addrspacecast instruction in 533 // this case since, depending on the target, addrspacecast may not be a no-op 534 // cast. 535 if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && 536 SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) 537 return nullptr; 538 539 // Okay, we are casting from one integer or pointer type to another of 540 // the same size. Instead of casting the pointer before 541 // the store, cast the value to be stored. 542 Value *NewCast; 543 Instruction::CastOps opcode = Instruction::BitCast; 544 Type* CastSrcTy = DestPTy; 545 Type* CastDstTy = SrcPTy; 546 if (CastDstTy->isPointerTy()) { 547 if (CastSrcTy->isIntegerTy()) 548 opcode = Instruction::IntToPtr; 549 } else if (CastDstTy->isIntegerTy()) { 550 if (CastSrcTy->isPointerTy()) 551 opcode = Instruction::PtrToInt; 552 } 553 554 // SIOp0 is a pointer to aggregate and this is a store to the first field, 555 // emit a GEP to index into its first field. 556 if (!NewGEPIndices.empty()) 557 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 558 559 Value *SIOp0 = SI.getOperand(0); 560 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 561 SIOp0->getName()+".c"); 562 SI.setOperand(0, NewCast); 563 SI.setOperand(1, CastOp); 564 return &SI; 565 } 566 567 /// equivalentAddressValues - Test if A and B will obviously have the same 568 /// value. This includes recognizing that %t0 and %t1 will have the same 569 /// value in code like this: 570 /// %t0 = getelementptr \@a, 0, 3 571 /// store i32 0, i32* %t0 572 /// %t1 = getelementptr \@a, 0, 3 573 /// %t2 = load i32* %t1 574 /// 575 static bool equivalentAddressValues(Value *A, Value *B) { 576 // Test if the values are trivially equivalent. 577 if (A == B) return true; 578 579 // Test if the values come form identical arithmetic instructions. 580 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 581 // its only used to compare two uses within the same basic block, which 582 // means that they'll always either have the same value or one of them 583 // will have an undefined value. 584 if (isa<BinaryOperator>(A) || 585 isa<CastInst>(A) || 586 isa<PHINode>(A) || 587 isa<GetElementPtrInst>(A)) 588 if (Instruction *BI = dyn_cast<Instruction>(B)) 589 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 590 return true; 591 592 // Otherwise they may not be equivalent. 593 return false; 594 } 595 596 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 597 Value *Val = SI.getOperand(0); 598 Value *Ptr = SI.getOperand(1); 599 600 // Attempt to improve the alignment. 601 if (DL) { 602 unsigned KnownAlign = 603 getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), 604 DL); 605 unsigned StoreAlign = SI.getAlignment(); 606 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 607 DL->getABITypeAlignment(Val->getType()); 608 609 if (KnownAlign > EffectiveStoreAlign) 610 SI.setAlignment(KnownAlign); 611 else if (StoreAlign == 0) 612 SI.setAlignment(EffectiveStoreAlign); 613 } 614 615 // Don't hack volatile/atomic stores. 616 // FIXME: Some bits are legal for atomic stores; needs refactoring. 617 if (!SI.isSimple()) return nullptr; 618 619 // If the RHS is an alloca with a single use, zapify the store, making the 620 // alloca dead. 621 if (Ptr->hasOneUse()) { 622 if (isa<AllocaInst>(Ptr)) 623 return EraseInstFromFunction(SI); 624 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 625 if (isa<AllocaInst>(GEP->getOperand(0))) { 626 if (GEP->getOperand(0)->hasOneUse()) 627 return EraseInstFromFunction(SI); 628 } 629 } 630 } 631 632 // Do really simple DSE, to catch cases where there are several consecutive 633 // stores to the same location, separated by a few arithmetic operations. This 634 // situation often occurs with bitfield accesses. 635 BasicBlock::iterator BBI = &SI; 636 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 637 --ScanInsts) { 638 --BBI; 639 // Don't count debug info directives, lest they affect codegen, 640 // and we skip pointer-to-pointer bitcasts, which are NOPs. 641 if (isa<DbgInfoIntrinsic>(BBI) || 642 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 643 ScanInsts++; 644 continue; 645 } 646 647 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 648 // Prev store isn't volatile, and stores to the same location? 649 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 650 SI.getOperand(1))) { 651 ++NumDeadStore; 652 ++BBI; 653 EraseInstFromFunction(*PrevSI); 654 continue; 655 } 656 break; 657 } 658 659 // If this is a load, we have to stop. However, if the loaded value is from 660 // the pointer we're loading and is producing the pointer we're storing, 661 // then *this* store is dead (X = load P; store X -> P). 662 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 663 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 664 LI->isSimple()) 665 return EraseInstFromFunction(SI); 666 667 // Otherwise, this is a load from some other location. Stores before it 668 // may not be dead. 669 break; 670 } 671 672 // Don't skip over loads or things that can modify memory. 673 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 674 break; 675 } 676 677 // store X, null -> turns into 'unreachable' in SimplifyCFG 678 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 679 if (!isa<UndefValue>(Val)) { 680 SI.setOperand(0, UndefValue::get(Val->getType())); 681 if (Instruction *U = dyn_cast<Instruction>(Val)) 682 Worklist.Add(U); // Dropped a use. 683 } 684 return nullptr; // Do not modify these! 685 } 686 687 // store undef, Ptr -> noop 688 if (isa<UndefValue>(Val)) 689 return EraseInstFromFunction(SI); 690 691 // If the pointer destination is a cast, see if we can fold the cast into the 692 // source instead. 693 if (isa<CastInst>(Ptr)) 694 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 695 return Res; 696 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 697 if (CE->isCast()) 698 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 699 return Res; 700 701 702 // If this store is the last instruction in the basic block (possibly 703 // excepting debug info instructions), and if the block ends with an 704 // unconditional branch, try to move it to the successor block. 705 BBI = &SI; 706 do { 707 ++BBI; 708 } while (isa<DbgInfoIntrinsic>(BBI) || 709 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 710 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 711 if (BI->isUnconditional()) 712 if (SimplifyStoreAtEndOfBlock(SI)) 713 return nullptr; // xform done! 714 715 return nullptr; 716 } 717 718 /// SimplifyStoreAtEndOfBlock - Turn things like: 719 /// if () { *P = v1; } else { *P = v2 } 720 /// into a phi node with a store in the successor. 721 /// 722 /// Simplify things like: 723 /// *P = v1; if () { *P = v2; } 724 /// into a phi node with a store in the successor. 725 /// 726 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 727 BasicBlock *StoreBB = SI.getParent(); 728 729 // Check to see if the successor block has exactly two incoming edges. If 730 // so, see if the other predecessor contains a store to the same location. 731 // if so, insert a PHI node (if needed) and move the stores down. 732 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 733 734 // Determine whether Dest has exactly two predecessors and, if so, compute 735 // the other predecessor. 736 pred_iterator PI = pred_begin(DestBB); 737 BasicBlock *P = *PI; 738 BasicBlock *OtherBB = nullptr; 739 740 if (P != StoreBB) 741 OtherBB = P; 742 743 if (++PI == pred_end(DestBB)) 744 return false; 745 746 P = *PI; 747 if (P != StoreBB) { 748 if (OtherBB) 749 return false; 750 OtherBB = P; 751 } 752 if (++PI != pred_end(DestBB)) 753 return false; 754 755 // Bail out if all the relevant blocks aren't distinct (this can happen, 756 // for example, if SI is in an infinite loop) 757 if (StoreBB == DestBB || OtherBB == DestBB) 758 return false; 759 760 // Verify that the other block ends in a branch and is not otherwise empty. 761 BasicBlock::iterator BBI = OtherBB->getTerminator(); 762 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 763 if (!OtherBr || BBI == OtherBB->begin()) 764 return false; 765 766 // If the other block ends in an unconditional branch, check for the 'if then 767 // else' case. there is an instruction before the branch. 768 StoreInst *OtherStore = nullptr; 769 if (OtherBr->isUnconditional()) { 770 --BBI; 771 // Skip over debugging info. 772 while (isa<DbgInfoIntrinsic>(BBI) || 773 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 774 if (BBI==OtherBB->begin()) 775 return false; 776 --BBI; 777 } 778 // If this isn't a store, isn't a store to the same location, or is not the 779 // right kind of store, bail out. 780 OtherStore = dyn_cast<StoreInst>(BBI); 781 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 782 !SI.isSameOperationAs(OtherStore)) 783 return false; 784 } else { 785 // Otherwise, the other block ended with a conditional branch. If one of the 786 // destinations is StoreBB, then we have the if/then case. 787 if (OtherBr->getSuccessor(0) != StoreBB && 788 OtherBr->getSuccessor(1) != StoreBB) 789 return false; 790 791 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 792 // if/then triangle. See if there is a store to the same ptr as SI that 793 // lives in OtherBB. 794 for (;; --BBI) { 795 // Check to see if we find the matching store. 796 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 797 if (OtherStore->getOperand(1) != SI.getOperand(1) || 798 !SI.isSameOperationAs(OtherStore)) 799 return false; 800 break; 801 } 802 // If we find something that may be using or overwriting the stored 803 // value, or if we run out of instructions, we can't do the xform. 804 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 805 BBI == OtherBB->begin()) 806 return false; 807 } 808 809 // In order to eliminate the store in OtherBr, we have to 810 // make sure nothing reads or overwrites the stored value in 811 // StoreBB. 812 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 813 // FIXME: This should really be AA driven. 814 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 815 return false; 816 } 817 } 818 819 // Insert a PHI node now if we need it. 820 Value *MergedVal = OtherStore->getOperand(0); 821 if (MergedVal != SI.getOperand(0)) { 822 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 823 PN->addIncoming(SI.getOperand(0), SI.getParent()); 824 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 825 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 826 } 827 828 // Advance to a place where it is safe to insert the new store and 829 // insert it. 830 BBI = DestBB->getFirstInsertionPt(); 831 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 832 SI.isVolatile(), 833 SI.getAlignment(), 834 SI.getOrdering(), 835 SI.getSynchScope()); 836 InsertNewInstBefore(NewSI, *BBI); 837 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 838 839 // If the two stores had AA tags, merge them. 840 AAMDNodes AATags; 841 SI.getAAMetadata(AATags); 842 if (AATags) { 843 OtherStore->getAAMetadata(AATags, /* Merge = */ true); 844 NewSI->setAAMetadata(AATags); 845 } 846 847 // Nuke the old stores. 848 EraseInstFromFunction(SI); 849 EraseInstFromFunction(*OtherStore); 850 return true; 851 } 852