1 #include "llvm/Transforms/Utils/VNCoercion.h" 2 #include "llvm/Analysis/AliasAnalysis.h" 3 #include "llvm/Analysis/ConstantFolding.h" 4 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 5 #include "llvm/Analysis/ValueTracking.h" 6 #include "llvm/IR/IRBuilder.h" 7 #include "llvm/IR/IntrinsicInst.h" 8 #include "llvm/Support/Debug.h" 9 10 #define DEBUG_TYPE "vncoerce" 11 namespace llvm { 12 namespace VNCoercion { 13 14 /// Return true if coerceAvailableValueToLoadType will succeed. 15 bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, 16 const DataLayout &DL) { 17 // If the loaded or stored value is an first class array or struct, don't try 18 // to transform them. We need to be able to bitcast to integer. 19 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 20 StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) 21 return false; 22 23 // The store has to be at least as big as the load. 24 if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) 25 return false; 26 27 // Don't coerce non-integral pointers to integers or vice versa. 28 if (DL.isNonIntegralPointerType(StoredVal->getType()) != 29 DL.isNonIntegralPointerType(LoadTy)) 30 return false; 31 32 return true; 33 } 34 35 template <class T, class HelperClass> 36 static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, 37 HelperClass &Helper, 38 const DataLayout &DL) { 39 assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && 40 "precondition violation - materialization can't fail"); 41 if (auto *C = dyn_cast<Constant>(StoredVal)) 42 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 43 StoredVal = FoldedStoredVal; 44 45 // If this is already the right type, just return it. 46 Type *StoredValTy = StoredVal->getType(); 47 48 uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); 49 uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); 50 51 // If the store and reload are the same size, we can always reuse it. 52 if (StoredValSize == LoadedValSize) { 53 // Pointer to Pointer -> use bitcast. 54 if (StoredValTy->getScalarType()->isPointerTy() && 55 LoadedTy->getScalarType()->isPointerTy()) { 56 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 57 } else { 58 // Convert source pointers to integers, which can be bitcast. 59 if (StoredValTy->getScalarType()->isPointerTy()) { 60 StoredValTy = DL.getIntPtrType(StoredValTy); 61 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 62 } 63 64 Type *TypeToCastTo = LoadedTy; 65 if (TypeToCastTo->getScalarType()->isPointerTy()) 66 TypeToCastTo = DL.getIntPtrType(TypeToCastTo); 67 68 if (StoredValTy != TypeToCastTo) 69 StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); 70 71 // Cast to pointer if the load needs a pointer type. 72 if (LoadedTy->getScalarType()->isPointerTy()) 73 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 74 } 75 76 if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) 77 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 78 StoredVal = FoldedStoredVal; 79 80 return StoredVal; 81 } 82 // If the loaded value is smaller than the available value, then we can 83 // extract out a piece from it. If the available value is too small, then we 84 // can't do anything. 85 assert(StoredValSize >= LoadedValSize && 86 "canCoerceMustAliasedValueToLoad fail"); 87 88 // Convert source pointers to integers, which can be manipulated. 89 if (StoredValTy->getScalarType()->isPointerTy()) { 90 StoredValTy = DL.getIntPtrType(StoredValTy); 91 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 92 } 93 94 // Convert vectors and fp to integer, which can be manipulated. 95 if (!StoredValTy->isIntegerTy()) { 96 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); 97 StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); 98 } 99 100 // If this is a big-endian system, we need to shift the value down to the low 101 // bits so that a truncate will work. 102 if (DL.isBigEndian()) { 103 uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - 104 DL.getTypeStoreSizeInBits(LoadedTy); 105 StoredVal = Helper.CreateLShr( 106 StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); 107 } 108 109 // Truncate the integer to the right size now. 110 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); 111 StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); 112 113 if (LoadedTy != NewIntTy) { 114 // If the result is a pointer, inttoptr. 115 if (LoadedTy->getScalarType()->isPointerTy()) 116 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 117 else 118 // Otherwise, bitcast. 119 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 120 } 121 122 if (auto *C = dyn_cast<Constant>(StoredVal)) 123 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 124 StoredVal = FoldedStoredVal; 125 126 return StoredVal; 127 } 128 129 /// If we saw a store of a value to memory, and 130 /// then a load from a must-aliased pointer of a different type, try to coerce 131 /// the stored value. LoadedTy is the type of the load we want to replace. 132 /// IRB is IRBuilder used to insert new instructions. 133 /// 134 /// If we can't do it, return null. 135 Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, 136 IRBuilder<> &IRB, const DataLayout &DL) { 137 return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL); 138 } 139 140 /// This function is called when we have a memdep query of a load that ends up 141 /// being a clobbering memory write (store, memset, memcpy, memmove). This 142 /// means that the write *may* provide bits used by the load but we can't be 143 /// sure because the pointers don't must-alias. 144 /// 145 /// Check this case to see if there is anything more we can do before we give 146 /// up. This returns -1 if we have to give up, or a byte number in the stored 147 /// value of the piece that feeds the load. 148 static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 149 Value *WritePtr, 150 uint64_t WriteSizeInBits, 151 const DataLayout &DL) { 152 // If the loaded or stored value is a first class array or struct, don't try 153 // to transform them. We need to be able to bitcast to integer. 154 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 155 return -1; 156 157 int64_t StoreOffset = 0, LoadOffset = 0; 158 Value *StoreBase = 159 GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); 160 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); 161 if (StoreBase != LoadBase) 162 return -1; 163 164 // If the load and store are to the exact same address, they should have been 165 // a must alias. AA must have gotten confused. 166 // FIXME: Study to see if/when this happens. One case is forwarding a memset 167 // to a load from the base of the memset. 168 169 // If the load and store don't overlap at all, the store doesn't provide 170 // anything to the load. In this case, they really don't alias at all, AA 171 // must have gotten confused. 172 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); 173 174 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 175 return -1; 176 uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. 177 LoadSize /= 8; 178 179 bool isAAFailure = false; 180 if (StoreOffset < LoadOffset) 181 isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; 182 else 183 isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; 184 185 if (isAAFailure) 186 return -1; 187 188 // If the Load isn't completely contained within the stored bits, we don't 189 // have all the bits to feed it. We could do something crazy in the future 190 // (issue a smaller load then merge the bits in) but this seems unlikely to be 191 // valuable. 192 if (StoreOffset > LoadOffset || 193 StoreOffset + StoreSize < LoadOffset + LoadSize) 194 return -1; 195 196 // Okay, we can do this transformation. Return the number of bytes into the 197 // store that the load is. 198 return LoadOffset - StoreOffset; 199 } 200 201 /// This function is called when we have a 202 /// memdep query of a load that ends up being a clobbering store. 203 int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 204 StoreInst *DepSI, const DataLayout &DL) { 205 // Cannot handle reading from store of first-class aggregate yet. 206 if (DepSI->getValueOperand()->getType()->isStructTy() || 207 DepSI->getValueOperand()->getType()->isArrayTy()) 208 return -1; 209 210 Value *StorePtr = DepSI->getPointerOperand(); 211 uint64_t StoreSize = 212 DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 213 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, 214 DL); 215 } 216 217 /// This function is called when we have a 218 /// memdep query of a load that ends up being clobbered by another load. See if 219 /// the other load can feed into the second load. 220 int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, 221 const DataLayout &DL) { 222 // Cannot handle reading from store of first-class aggregate yet. 223 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 224 return -1; 225 226 Value *DepPtr = DepLI->getPointerOperand(); 227 uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); 228 int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); 229 if (R != -1) 230 return R; 231 232 // If we have a load/load clobber an DepLI can be widened to cover this load, 233 // then we should widen it! 234 int64_t LoadOffs = 0; 235 const Value *LoadBase = 236 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); 237 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 238 239 unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( 240 LoadBase, LoadOffs, LoadSize, DepLI); 241 if (Size == 0) 242 return -1; 243 244 // Check non-obvious conditions enforced by MDA which we rely on for being 245 // able to materialize this potentially available value 246 assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); 247 assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); 248 249 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); 250 } 251 252 int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 253 MemIntrinsic *MI, const DataLayout &DL) { 254 // If the mem operation is a non-constant size, we can't handle it. 255 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 256 if (!SizeCst) 257 return -1; 258 uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; 259 260 // If this is memset, we just need to see if the offset is valid in the size 261 // of the memset.. 262 if (MI->getIntrinsicID() == Intrinsic::memset) 263 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 264 MemSizeInBits, DL); 265 266 // If we have a memcpy/memmove, the only case we can handle is if this is a 267 // copy from constant memory. In that case, we can read directly from the 268 // constant memory. 269 MemTransferInst *MTI = cast<MemTransferInst>(MI); 270 271 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 272 if (!Src) 273 return -1; 274 275 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); 276 if (!GV || !GV->isConstant()) 277 return -1; 278 279 // See if the access is within the bounds of the transfer. 280 int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 281 MemSizeInBits, DL); 282 if (Offset == -1) 283 return Offset; 284 285 unsigned AS = Src->getType()->getPointerAddressSpace(); 286 // Otherwise, see if we can constant fold a load from the constant with the 287 // offset applied as appropriate. 288 Src = 289 ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); 290 Constant *OffsetCst = 291 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 292 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 293 OffsetCst); 294 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 295 if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) 296 return Offset; 297 return -1; 298 } 299 300 template <class T, class HelperClass> 301 static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, 302 HelperClass &Helper, 303 const DataLayout &DL) { 304 LLVMContext &Ctx = SrcVal->getType()->getContext(); 305 306 // If two pointers are in the same address space, they have the same size, 307 // so we don't need to do any truncation, etc. This avoids introducing 308 // ptrtoint instructions for pointers that may be non-integral. 309 if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && 310 cast<PointerType>(SrcVal->getType())->getAddressSpace() == 311 cast<PointerType>(LoadTy)->getAddressSpace()) { 312 return SrcVal; 313 } 314 315 uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 316 uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; 317 // Compute which bits of the stored value are being used by the load. Convert 318 // to an integer type to start with. 319 if (SrcVal->getType()->getScalarType()->isPointerTy()) 320 SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); 321 if (!SrcVal->getType()->isIntegerTy()) 322 SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); 323 324 // Shift the bits to the least significant depending on endianness. 325 unsigned ShiftAmt; 326 if (DL.isLittleEndian()) 327 ShiftAmt = Offset * 8; 328 else 329 ShiftAmt = (StoreSize - LoadSize - Offset) * 8; 330 if (ShiftAmt) 331 SrcVal = Helper.CreateLShr(SrcVal, 332 ConstantInt::get(SrcVal->getType(), ShiftAmt)); 333 334 if (LoadSize != StoreSize) 335 SrcVal = Helper.CreateTruncOrBitCast(SrcVal, 336 IntegerType::get(Ctx, LoadSize * 8)); 337 return SrcVal; 338 } 339 340 /// This function is called when we have a memdep query of a load that ends up 341 /// being a clobbering store. This means that the store provides bits used by 342 /// the load but the pointers don't must-alias. Check this case to see if 343 /// there is anything more we can do before we give up. 344 Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, 345 Instruction *InsertPt, const DataLayout &DL) { 346 347 IRBuilder<> Builder(InsertPt); 348 SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); 349 return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL); 350 } 351 352 Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, 353 Type *LoadTy, const DataLayout &DL) { 354 ConstantFolder F; 355 SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL); 356 return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL); 357 } 358 359 /// This function is called when we have a memdep query of a load that ends up 360 /// being a clobbering load. This means that the load *may* provide bits used 361 /// by the load but we can't be sure because the pointers don't must-alias. 362 /// Check this case to see if there is anything more we can do before we give 363 /// up. 364 Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, 365 Instruction *InsertPt, const DataLayout &DL) { 366 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 367 // widen SrcVal out to a larger load. 368 unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); 369 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 370 if (Offset + LoadSize > SrcValStoreSize) { 371 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 372 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 373 // If we have a load/load clobber an DepLI can be widened to cover this 374 // load, then we should widen it to the next power of 2 size big enough! 375 unsigned NewLoadSize = Offset + LoadSize; 376 if (!isPowerOf2_32(NewLoadSize)) 377 NewLoadSize = NextPowerOf2(NewLoadSize); 378 379 Value *PtrVal = SrcVal->getPointerOperand(); 380 // Insert the new load after the old load. This ensures that subsequent 381 // memdep queries will find the new load. We can't easily remove the old 382 // load completely because it is already in the value numbering table. 383 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 384 Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); 385 DestPTy = 386 PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); 387 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 388 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 389 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 390 NewLoad->takeName(SrcVal); 391 NewLoad->setAlignment(SrcVal->getAlignment()); 392 393 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 394 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 395 396 // Replace uses of the original load with the wider load. On a big endian 397 // system, we need to shift down to get the relevant bits. 398 Value *RV = NewLoad; 399 if (DL.isBigEndian()) 400 RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); 401 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 402 SrcVal->replaceAllUsesWith(RV); 403 404 SrcVal = NewLoad; 405 } 406 407 return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); 408 } 409 410 Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, 411 Type *LoadTy, const DataLayout &DL) { 412 unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); 413 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 414 if (Offset + LoadSize > SrcValStoreSize) 415 return nullptr; 416 return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); 417 } 418 419 template <class T, class HelperClass> 420 T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, 421 Type *LoadTy, HelperClass &Helper, 422 const DataLayout &DL) { 423 LLVMContext &Ctx = LoadTy->getContext(); 424 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; 425 426 // We know that this method is only called when the mem transfer fully 427 // provides the bits for the load. 428 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 429 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 430 // independently of what the offset is. 431 T *Val = cast<T>(MSI->getValue()); 432 if (LoadSize != 1) 433 Val = 434 Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); 435 T *OneElt = Val; 436 437 // Splat the value out to the right number of bits. 438 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { 439 // If we can double the number of bytes set, do it. 440 if (NumBytesSet * 2 <= LoadSize) { 441 T *ShVal = Helper.CreateShl( 442 Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); 443 Val = Helper.CreateOr(Val, ShVal); 444 NumBytesSet <<= 1; 445 continue; 446 } 447 448 // Otherwise insert one byte at a time. 449 T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); 450 Val = Helper.CreateOr(OneElt, ShVal); 451 ++NumBytesSet; 452 } 453 454 return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL); 455 } 456 457 // Otherwise, this is a memcpy/memmove from a constant global. 458 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 459 Constant *Src = cast<Constant>(MTI->getSource()); 460 unsigned AS = Src->getType()->getPointerAddressSpace(); 461 462 // Otherwise, see if we can constant fold a load from the constant with the 463 // offset applied as appropriate. 464 Src = 465 ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); 466 Constant *OffsetCst = 467 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 468 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 469 OffsetCst); 470 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 471 return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); 472 } 473 474 /// This function is called when we have a 475 /// memdep query of a load that ends up being a clobbering mem intrinsic. 476 Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 477 Type *LoadTy, Instruction *InsertPt, 478 const DataLayout &DL) { 479 IRBuilder<> Builder(InsertPt); 480 return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset, 481 LoadTy, Builder, DL); 482 } 483 484 Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 485 Type *LoadTy, const DataLayout &DL) { 486 // The only case analyzeLoadFromClobberingMemInst cannot be converted to a 487 // constant is when it's a memset of a non-constant. 488 if (auto *MSI = dyn_cast<MemSetInst>(SrcInst)) 489 if (!isa<Constant>(MSI->getValue())) 490 return nullptr; 491 ConstantFolder F; 492 return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset, 493 LoadTy, F, DL); 494 } 495 } // namespace VNCoercion 496 } // namespace llvm 497