1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for load, store and alloca. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/Loads.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/IntrinsicInst.h" 19 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 20 #include "llvm/Transforms/Utils/Local.h" 21 using namespace llvm; 22 23 #define DEBUG_TYPE "instcombine" 24 25 STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 26 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 27 28 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 29 /// some part of a constant global variable. This intentionally only accepts 30 /// constant expressions because we can't rewrite arbitrary instructions. 31 static bool pointsToConstantGlobal(Value *V) { 32 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 33 return GV->isConstant(); 34 35 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 36 if (CE->getOpcode() == Instruction::BitCast || 37 CE->getOpcode() == Instruction::AddrSpaceCast || 38 CE->getOpcode() == Instruction::GetElementPtr) 39 return pointsToConstantGlobal(CE->getOperand(0)); 40 } 41 return false; 42 } 43 44 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 45 /// pointer to an alloca. Ignore any reads of the pointer, return false if we 46 /// see any stores or other unknown uses. If we see pointer arithmetic, keep 47 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 48 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 49 /// the alloca, and if the source pointer is a pointer to a constant global, we 50 /// can optimize this. 51 static bool 52 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 53 SmallVectorImpl<Instruction *> &ToDelete, 54 bool IsOffset = false) { 55 // We track lifetime intrinsics as we encounter them. If we decide to go 56 // ahead and replace the value with the global, this lets the caller quickly 57 // eliminate the markers. 58 59 for (Use &U : V->uses()) { 60 Instruction *I = cast<Instruction>(U.getUser()); 61 62 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 63 // Ignore non-volatile loads, they are always ok. 64 if (!LI->isSimple()) return false; 65 continue; 66 } 67 68 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 69 // If uses of the bitcast are ok, we are ok. 70 if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) 71 return false; 72 continue; 73 } 74 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 75 // If the GEP has all zero indices, it doesn't offset the pointer. If it 76 // doesn't, it does. 77 if (!isOnlyCopiedFromConstantGlobal( 78 GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) 79 return false; 80 continue; 81 } 82 83 if (CallSite CS = I) { 84 // If this is the function being called then we treat it like a load and 85 // ignore it. 86 if (CS.isCallee(&U)) 87 continue; 88 89 // Inalloca arguments are clobbered by the call. 90 unsigned ArgNo = CS.getArgumentNo(&U); 91 if (CS.isInAllocaArgument(ArgNo)) 92 return false; 93 94 // If this is a readonly/readnone call site, then we know it is just a 95 // load (but one that potentially returns the value itself), so we can 96 // ignore it if we know that the value isn't captured. 97 if (CS.onlyReadsMemory() && 98 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 99 continue; 100 101 // If this is being passed as a byval argument, the caller is making a 102 // copy, so it is only a read of the alloca. 103 if (CS.isByValArgument(ArgNo)) 104 continue; 105 } 106 107 // Lifetime intrinsics can be handled by the caller. 108 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 109 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 110 II->getIntrinsicID() == Intrinsic::lifetime_end) { 111 assert(II->use_empty() && "Lifetime markers have no result to use!"); 112 ToDelete.push_back(II); 113 continue; 114 } 115 } 116 117 // If this is isn't our memcpy/memmove, reject it as something we can't 118 // handle. 119 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 120 if (!MI) 121 return false; 122 123 // If the transfer is using the alloca as a source of the transfer, then 124 // ignore it since it is a load (unless the transfer is volatile). 125 if (U.getOperandNo() == 1) { 126 if (MI->isVolatile()) return false; 127 continue; 128 } 129 130 // If we already have seen a copy, reject the second one. 131 if (TheCopy) return false; 132 133 // If the pointer has been offset from the start of the alloca, we can't 134 // safely handle this. 135 if (IsOffset) return false; 136 137 // If the memintrinsic isn't using the alloca as the dest, reject it. 138 if (U.getOperandNo() != 0) return false; 139 140 // If the source of the memcpy/move is not a constant global, reject it. 141 if (!pointsToConstantGlobal(MI->getSource())) 142 return false; 143 144 // Otherwise, the transform is safe. Remember the copy instruction. 145 TheCopy = MI; 146 } 147 return true; 148 } 149 150 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 151 /// modified by a copy from a constant global. If we can prove this, we can 152 /// replace any uses of the alloca with uses of the global directly. 153 static MemTransferInst * 154 isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 155 SmallVectorImpl<Instruction *> &ToDelete) { 156 MemTransferInst *TheCopy = nullptr; 157 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 158 return TheCopy; 159 return nullptr; 160 } 161 162 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 163 // Ensure that the alloca array size argument has type intptr_t, so that 164 // any casting is exposed early. 165 if (DL) { 166 Type *IntPtrTy = DL->getIntPtrType(AI.getType()); 167 if (AI.getArraySize()->getType() != IntPtrTy) { 168 Value *V = Builder->CreateIntCast(AI.getArraySize(), 169 IntPtrTy, false); 170 AI.setOperand(0, V); 171 return &AI; 172 } 173 } 174 175 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 176 if (AI.isArrayAllocation()) { // Check C != 1 177 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 178 Type *NewTy = 179 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 180 AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 181 New->setAlignment(AI.getAlignment()); 182 183 // Scan to the end of the allocation instructions, to skip over a block of 184 // allocas if possible...also skip interleaved debug info 185 // 186 BasicBlock::iterator It = New; 187 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 188 189 // Now that I is pointing to the first non-allocation-inst in the block, 190 // insert our getelementptr instruction... 191 // 192 Type *IdxTy = DL 193 ? DL->getIntPtrType(AI.getType()) 194 : Type::getInt64Ty(AI.getContext()); 195 Value *NullIdx = Constant::getNullValue(IdxTy); 196 Value *Idx[2] = { NullIdx, NullIdx }; 197 Instruction *GEP = 198 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 199 InsertNewInstBefore(GEP, *It); 200 201 // Now make everything use the getelementptr instead of the original 202 // allocation. 203 return ReplaceInstUsesWith(AI, GEP); 204 } else if (isa<UndefValue>(AI.getArraySize())) { 205 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 206 } 207 } 208 209 if (DL && AI.getAllocatedType()->isSized()) { 210 // If the alignment is 0 (unspecified), assign it the preferred alignment. 211 if (AI.getAlignment() == 0) 212 AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); 213 214 // Move all alloca's of zero byte objects to the entry block and merge them 215 // together. Note that we only do this for alloca's, because malloc should 216 // allocate and return a unique pointer, even for a zero byte allocation. 217 if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { 218 // For a zero sized alloca there is no point in doing an array allocation. 219 // This is helpful if the array size is a complicated expression not used 220 // elsewhere. 221 if (AI.isArrayAllocation()) { 222 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 223 return &AI; 224 } 225 226 // Get the first instruction in the entry block. 227 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 228 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 229 if (FirstInst != &AI) { 230 // If the entry block doesn't start with a zero-size alloca then move 231 // this one to the start of the entry block. There is no problem with 232 // dominance as the array size was forced to a constant earlier already. 233 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 234 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 235 DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 236 AI.moveBefore(FirstInst); 237 return &AI; 238 } 239 240 // If the alignment of the entry block alloca is 0 (unspecified), 241 // assign it the preferred alignment. 242 if (EntryAI->getAlignment() == 0) 243 EntryAI->setAlignment( 244 DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); 245 // Replace this zero-sized alloca with the one at the start of the entry 246 // block after ensuring that the address will be aligned enough for both 247 // types. 248 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 249 AI.getAlignment()); 250 EntryAI->setAlignment(MaxAlign); 251 if (AI.getType() != EntryAI->getType()) 252 return new BitCastInst(EntryAI, AI.getType()); 253 return ReplaceInstUsesWith(AI, EntryAI); 254 } 255 } 256 } 257 258 if (AI.getAlignment()) { 259 // Check to see if this allocation is only modified by a memcpy/memmove from 260 // a constant global whose alignment is equal to or exceeds that of the 261 // allocation. If this is the case, we can change all users to use 262 // the constant global instead. This is commonly produced by the CFE by 263 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 264 // is only subsequently read. 265 SmallVector<Instruction *, 4> ToDelete; 266 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 267 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 268 AI.getAlignment(), DL); 269 if (AI.getAlignment() <= SourceAlign) { 270 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 271 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 272 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 273 EraseInstFromFunction(*ToDelete[i]); 274 Constant *TheSrc = cast<Constant>(Copy->getSource()); 275 Constant *Cast 276 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 277 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 278 EraseInstFromFunction(*Copy); 279 ++NumGlobalCopies; 280 return NewI; 281 } 282 } 283 } 284 285 // At last, use the generic allocation site handler to aggressively remove 286 // unused allocas. 287 return visitAllocSite(AI); 288 } 289 290 291 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 292 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 293 const DataLayout *DL) { 294 User *CI = cast<User>(LI.getOperand(0)); 295 Value *CastOp = CI->getOperand(0); 296 297 PointerType *DestTy = cast<PointerType>(CI->getType()); 298 Type *DestPTy = DestTy->getElementType(); 299 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 300 301 // If the address spaces don't match, don't eliminate the cast. 302 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 303 return nullptr; 304 305 Type *SrcPTy = SrcTy->getElementType(); 306 307 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 308 DestPTy->isVectorTy()) { 309 // If the source is an array, the code below will not succeed. Check to 310 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 311 // constants. 312 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 313 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 314 if (ASrcTy->getNumElements() != 0) { 315 Type *IdxTy = DL 316 ? DL->getIntPtrType(SrcTy) 317 : Type::getInt64Ty(SrcTy->getContext()); 318 Value *Idx = Constant::getNullValue(IdxTy); 319 Value *Idxs[2] = { Idx, Idx }; 320 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 321 SrcTy = cast<PointerType>(CastOp->getType()); 322 SrcPTy = SrcTy->getElementType(); 323 } 324 325 if (IC.getDataLayout() && 326 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 327 SrcPTy->isVectorTy()) && 328 // Do not allow turning this into a load of an integer, which is then 329 // casted to a pointer, this pessimizes pointer analysis a lot. 330 (SrcPTy->isPtrOrPtrVectorTy() == 331 LI.getType()->isPtrOrPtrVectorTy()) && 332 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 333 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 334 335 // Okay, we are casting from one integer or pointer type to another of 336 // the same size. Instead of casting the pointer before the load, cast 337 // the result of the loaded value. 338 LoadInst *NewLoad = 339 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 340 NewLoad->setAlignment(LI.getAlignment()); 341 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 342 // Now cast the result of the load. 343 PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); 344 PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); 345 if (OldTy && NewTy && 346 OldTy->getAddressSpace() != NewTy->getAddressSpace()) { 347 return new AddrSpaceCastInst(NewLoad, LI.getType()); 348 } 349 350 return new BitCastInst(NewLoad, LI.getType()); 351 } 352 } 353 } 354 return nullptr; 355 } 356 357 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 358 Value *Op = LI.getOperand(0); 359 360 // Attempt to improve the alignment. 361 if (DL) { 362 unsigned KnownAlign = 363 getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL); 364 unsigned LoadAlign = LI.getAlignment(); 365 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 366 DL->getABITypeAlignment(LI.getType()); 367 368 if (KnownAlign > EffectiveLoadAlign) 369 LI.setAlignment(KnownAlign); 370 else if (LoadAlign == 0) 371 LI.setAlignment(EffectiveLoadAlign); 372 } 373 374 // load (cast X) --> cast (load X) iff safe. 375 if (isa<CastInst>(Op)) 376 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 377 return Res; 378 379 // None of the following transforms are legal for volatile/atomic loads. 380 // FIXME: Some of it is okay for atomic loads; needs refactoring. 381 if (!LI.isSimple()) return nullptr; 382 383 // Do really simple store-to-load forwarding and load CSE, to catch cases 384 // where there are several consecutive memory accesses to the same location, 385 // separated by a few arithmetic operations. 386 BasicBlock::iterator BBI = &LI; 387 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 388 return ReplaceInstUsesWith(LI, AvailableVal); 389 390 // load(gep null, ...) -> unreachable 391 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 392 const Value *GEPI0 = GEPI->getOperand(0); 393 // TODO: Consider a target hook for valid address spaces for this xform. 394 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 395 // Insert a new store to null instruction before the load to indicate 396 // that this code is not reachable. We do this instead of inserting 397 // an unreachable instruction directly because we cannot modify the 398 // CFG. 399 new StoreInst(UndefValue::get(LI.getType()), 400 Constant::getNullValue(Op->getType()), &LI); 401 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 402 } 403 } 404 405 // load null/undef -> unreachable 406 // TODO: Consider a target hook for valid address spaces for this xform. 407 if (isa<UndefValue>(Op) || 408 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 409 // Insert a new store to null instruction before the load to indicate that 410 // this code is not reachable. We do this instead of inserting an 411 // unreachable instruction directly because we cannot modify the CFG. 412 new StoreInst(UndefValue::get(LI.getType()), 413 Constant::getNullValue(Op->getType()), &LI); 414 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 415 } 416 417 // Instcombine load (constantexpr_cast global) -> cast (load global) 418 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 419 if (CE->isCast()) 420 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 421 return Res; 422 423 if (Op->hasOneUse()) { 424 // Change select and PHI nodes to select values instead of addresses: this 425 // helps alias analysis out a lot, allows many others simplifications, and 426 // exposes redundancy in the code. 427 // 428 // Note that we cannot do the transformation unless we know that the 429 // introduced loads cannot trap! Something like this is valid as long as 430 // the condition is always false: load (select bool %C, int* null, int* %G), 431 // but it would not be valid if we transformed it to load from null 432 // unconditionally. 433 // 434 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 435 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 436 unsigned Align = LI.getAlignment(); 437 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && 438 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { 439 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 440 SI->getOperand(1)->getName()+".val"); 441 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 442 SI->getOperand(2)->getName()+".val"); 443 V1->setAlignment(Align); 444 V2->setAlignment(Align); 445 return SelectInst::Create(SI->getCondition(), V1, V2); 446 } 447 448 // load (select (cond, null, P)) -> load P 449 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 450 if (C->isNullValue()) { 451 LI.setOperand(0, SI->getOperand(2)); 452 return &LI; 453 } 454 455 // load (select (cond, P, null)) -> load P 456 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 457 if (C->isNullValue()) { 458 LI.setOperand(0, SI->getOperand(1)); 459 return &LI; 460 } 461 } 462 } 463 return nullptr; 464 } 465 466 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 467 /// when possible. This makes it generally easy to do alias analysis and/or 468 /// SROA/mem2reg of the memory object. 469 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 470 User *CI = cast<User>(SI.getOperand(1)); 471 Value *CastOp = CI->getOperand(0); 472 473 Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 474 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 475 if (!SrcTy) return nullptr; 476 477 Type *SrcPTy = SrcTy->getElementType(); 478 479 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 480 return nullptr; 481 482 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 483 /// to its first element. This allows us to handle things like: 484 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 485 /// on 32-bit hosts. 486 SmallVector<Value*, 4> NewGEPIndices; 487 488 // If the source is an array, the code below will not succeed. Check to 489 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 490 // constants. 491 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 492 // Index through pointer. 493 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 494 NewGEPIndices.push_back(Zero); 495 496 while (1) { 497 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 498 if (!STy->getNumElements()) /* Struct can be empty {} */ 499 break; 500 NewGEPIndices.push_back(Zero); 501 SrcPTy = STy->getElementType(0); 502 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 503 NewGEPIndices.push_back(Zero); 504 SrcPTy = ATy->getElementType(); 505 } else { 506 break; 507 } 508 } 509 510 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 511 } 512 513 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 514 return nullptr; 515 516 // If the pointers point into different address spaces don't do the 517 // transformation. 518 if (SrcTy->getAddressSpace() != 519 cast<PointerType>(CI->getType())->getAddressSpace()) 520 return nullptr; 521 522 // If the pointers point to values of different sizes don't do the 523 // transformation. 524 if (!IC.getDataLayout() || 525 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 526 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 527 return nullptr; 528 529 // If the pointers point to pointers to different address spaces don't do the 530 // transformation. It is not safe to introduce an addrspacecast instruction in 531 // this case since, depending on the target, addrspacecast may not be a no-op 532 // cast. 533 if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && 534 SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) 535 return nullptr; 536 537 // Okay, we are casting from one integer or pointer type to another of 538 // the same size. Instead of casting the pointer before 539 // the store, cast the value to be stored. 540 Value *NewCast; 541 Instruction::CastOps opcode = Instruction::BitCast; 542 Type* CastSrcTy = DestPTy; 543 Type* CastDstTy = SrcPTy; 544 if (CastDstTy->isPointerTy()) { 545 if (CastSrcTy->isIntegerTy()) 546 opcode = Instruction::IntToPtr; 547 } else if (CastDstTy->isIntegerTy()) { 548 if (CastSrcTy->isPointerTy()) 549 opcode = Instruction::PtrToInt; 550 } 551 552 // SIOp0 is a pointer to aggregate and this is a store to the first field, 553 // emit a GEP to index into its first field. 554 if (!NewGEPIndices.empty()) 555 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 556 557 Value *SIOp0 = SI.getOperand(0); 558 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 559 SIOp0->getName()+".c"); 560 SI.setOperand(0, NewCast); 561 SI.setOperand(1, CastOp); 562 return &SI; 563 } 564 565 /// equivalentAddressValues - Test if A and B will obviously have the same 566 /// value. This includes recognizing that %t0 and %t1 will have the same 567 /// value in code like this: 568 /// %t0 = getelementptr \@a, 0, 3 569 /// store i32 0, i32* %t0 570 /// %t1 = getelementptr \@a, 0, 3 571 /// %t2 = load i32* %t1 572 /// 573 static bool equivalentAddressValues(Value *A, Value *B) { 574 // Test if the values are trivially equivalent. 575 if (A == B) return true; 576 577 // Test if the values come form identical arithmetic instructions. 578 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 579 // its only used to compare two uses within the same basic block, which 580 // means that they'll always either have the same value or one of them 581 // will have an undefined value. 582 if (isa<BinaryOperator>(A) || 583 isa<CastInst>(A) || 584 isa<PHINode>(A) || 585 isa<GetElementPtrInst>(A)) 586 if (Instruction *BI = dyn_cast<Instruction>(B)) 587 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 588 return true; 589 590 // Otherwise they may not be equivalent. 591 return false; 592 } 593 594 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 595 Value *Val = SI.getOperand(0); 596 Value *Ptr = SI.getOperand(1); 597 598 // Attempt to improve the alignment. 599 if (DL) { 600 unsigned KnownAlign = 601 getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), 602 DL); 603 unsigned StoreAlign = SI.getAlignment(); 604 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 605 DL->getABITypeAlignment(Val->getType()); 606 607 if (KnownAlign > EffectiveStoreAlign) 608 SI.setAlignment(KnownAlign); 609 else if (StoreAlign == 0) 610 SI.setAlignment(EffectiveStoreAlign); 611 } 612 613 // Don't hack volatile/atomic stores. 614 // FIXME: Some bits are legal for atomic stores; needs refactoring. 615 if (!SI.isSimple()) return nullptr; 616 617 // If the RHS is an alloca with a single use, zapify the store, making the 618 // alloca dead. 619 if (Ptr->hasOneUse()) { 620 if (isa<AllocaInst>(Ptr)) 621 return EraseInstFromFunction(SI); 622 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 623 if (isa<AllocaInst>(GEP->getOperand(0))) { 624 if (GEP->getOperand(0)->hasOneUse()) 625 return EraseInstFromFunction(SI); 626 } 627 } 628 } 629 630 // Do really simple DSE, to catch cases where there are several consecutive 631 // stores to the same location, separated by a few arithmetic operations. This 632 // situation often occurs with bitfield accesses. 633 BasicBlock::iterator BBI = &SI; 634 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 635 --ScanInsts) { 636 --BBI; 637 // Don't count debug info directives, lest they affect codegen, 638 // and we skip pointer-to-pointer bitcasts, which are NOPs. 639 if (isa<DbgInfoIntrinsic>(BBI) || 640 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 641 ScanInsts++; 642 continue; 643 } 644 645 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 646 // Prev store isn't volatile, and stores to the same location? 647 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 648 SI.getOperand(1))) { 649 ++NumDeadStore; 650 ++BBI; 651 EraseInstFromFunction(*PrevSI); 652 continue; 653 } 654 break; 655 } 656 657 // If this is a load, we have to stop. However, if the loaded value is from 658 // the pointer we're loading and is producing the pointer we're storing, 659 // then *this* store is dead (X = load P; store X -> P). 660 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 661 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 662 LI->isSimple()) 663 return EraseInstFromFunction(SI); 664 665 // Otherwise, this is a load from some other location. Stores before it 666 // may not be dead. 667 break; 668 } 669 670 // Don't skip over loads or things that can modify memory. 671 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 672 break; 673 } 674 675 // store X, null -> turns into 'unreachable' in SimplifyCFG 676 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 677 if (!isa<UndefValue>(Val)) { 678 SI.setOperand(0, UndefValue::get(Val->getType())); 679 if (Instruction *U = dyn_cast<Instruction>(Val)) 680 Worklist.Add(U); // Dropped a use. 681 } 682 return nullptr; // Do not modify these! 683 } 684 685 // store undef, Ptr -> noop 686 if (isa<UndefValue>(Val)) 687 return EraseInstFromFunction(SI); 688 689 // If the pointer destination is a cast, see if we can fold the cast into the 690 // source instead. 691 if (isa<CastInst>(Ptr)) 692 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 693 return Res; 694 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 695 if (CE->isCast()) 696 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 697 return Res; 698 699 700 // If this store is the last instruction in the basic block (possibly 701 // excepting debug info instructions), and if the block ends with an 702 // unconditional branch, try to move it to the successor block. 703 BBI = &SI; 704 do { 705 ++BBI; 706 } while (isa<DbgInfoIntrinsic>(BBI) || 707 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 708 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 709 if (BI->isUnconditional()) 710 if (SimplifyStoreAtEndOfBlock(SI)) 711 return nullptr; // xform done! 712 713 return nullptr; 714 } 715 716 /// SimplifyStoreAtEndOfBlock - Turn things like: 717 /// if () { *P = v1; } else { *P = v2 } 718 /// into a phi node with a store in the successor. 719 /// 720 /// Simplify things like: 721 /// *P = v1; if () { *P = v2; } 722 /// into a phi node with a store in the successor. 723 /// 724 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 725 BasicBlock *StoreBB = SI.getParent(); 726 727 // Check to see if the successor block has exactly two incoming edges. If 728 // so, see if the other predecessor contains a store to the same location. 729 // if so, insert a PHI node (if needed) and move the stores down. 730 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 731 732 // Determine whether Dest has exactly two predecessors and, if so, compute 733 // the other predecessor. 734 pred_iterator PI = pred_begin(DestBB); 735 BasicBlock *P = *PI; 736 BasicBlock *OtherBB = nullptr; 737 738 if (P != StoreBB) 739 OtherBB = P; 740 741 if (++PI == pred_end(DestBB)) 742 return false; 743 744 P = *PI; 745 if (P != StoreBB) { 746 if (OtherBB) 747 return false; 748 OtherBB = P; 749 } 750 if (++PI != pred_end(DestBB)) 751 return false; 752 753 // Bail out if all the relevant blocks aren't distinct (this can happen, 754 // for example, if SI is in an infinite loop) 755 if (StoreBB == DestBB || OtherBB == DestBB) 756 return false; 757 758 // Verify that the other block ends in a branch and is not otherwise empty. 759 BasicBlock::iterator BBI = OtherBB->getTerminator(); 760 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 761 if (!OtherBr || BBI == OtherBB->begin()) 762 return false; 763 764 // If the other block ends in an unconditional branch, check for the 'if then 765 // else' case. there is an instruction before the branch. 766 StoreInst *OtherStore = nullptr; 767 if (OtherBr->isUnconditional()) { 768 --BBI; 769 // Skip over debugging info. 770 while (isa<DbgInfoIntrinsic>(BBI) || 771 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 772 if (BBI==OtherBB->begin()) 773 return false; 774 --BBI; 775 } 776 // If this isn't a store, isn't a store to the same location, or is not the 777 // right kind of store, bail out. 778 OtherStore = dyn_cast<StoreInst>(BBI); 779 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 780 !SI.isSameOperationAs(OtherStore)) 781 return false; 782 } else { 783 // Otherwise, the other block ended with a conditional branch. If one of the 784 // destinations is StoreBB, then we have the if/then case. 785 if (OtherBr->getSuccessor(0) != StoreBB && 786 OtherBr->getSuccessor(1) != StoreBB) 787 return false; 788 789 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 790 // if/then triangle. See if there is a store to the same ptr as SI that 791 // lives in OtherBB. 792 for (;; --BBI) { 793 // Check to see if we find the matching store. 794 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 795 if (OtherStore->getOperand(1) != SI.getOperand(1) || 796 !SI.isSameOperationAs(OtherStore)) 797 return false; 798 break; 799 } 800 // If we find something that may be using or overwriting the stored 801 // value, or if we run out of instructions, we can't do the xform. 802 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 803 BBI == OtherBB->begin()) 804 return false; 805 } 806 807 // In order to eliminate the store in OtherBr, we have to 808 // make sure nothing reads or overwrites the stored value in 809 // StoreBB. 810 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 811 // FIXME: This should really be AA driven. 812 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 813 return false; 814 } 815 } 816 817 // Insert a PHI node now if we need it. 818 Value *MergedVal = OtherStore->getOperand(0); 819 if (MergedVal != SI.getOperand(0)) { 820 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 821 PN->addIncoming(SI.getOperand(0), SI.getParent()); 822 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 823 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 824 } 825 826 // Advance to a place where it is safe to insert the new store and 827 // insert it. 828 BBI = DestBB->getFirstInsertionPt(); 829 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 830 SI.isVolatile(), 831 SI.getAlignment(), 832 SI.getOrdering(), 833 SI.getSynchScope()); 834 InsertNewInstBefore(NewSI, *BBI); 835 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 836 837 // If the two stores had the same TBAA tag, preserve it. 838 if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) 839 if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, 840 OtherStore->getMetadata(LLVMContext::MD_tbaa)))) 841 NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); 842 843 844 // Nuke the old stores. 845 EraseInstFromFunction(SI); 846 EraseInstFromFunction(*OtherStore); 847 return true; 848 } 849