1 #include "llvm/Transforms/Utils/VNCoercion.h" 2 #include "llvm/Analysis/AliasAnalysis.h" 3 #include "llvm/Analysis/ConstantFolding.h" 4 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 5 #include "llvm/Analysis/ValueTracking.h" 6 #include "llvm/IR/IRBuilder.h" 7 #include "llvm/IR/IntrinsicInst.h" 8 #include "llvm/Support/Debug.h" 9 10 #define DEBUG_TYPE "vncoerce" 11 namespace llvm { 12 namespace VNCoercion { 13 14 /// Return true if coerceAvailableValueToLoadType will succeed. 15 bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, 16 const DataLayout &DL) { 17 // If the loaded or stored value is an first class array or struct, don't try 18 // to transform them. We need to be able to bitcast to integer. 19 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 20 StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) 21 return false; 22 23 // The store has to be at least as big as the load. 24 if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) 25 return false; 26 27 // Don't coerce non-integral pointers to integers or vice versa. 28 if (DL.isNonIntegralPointerType(StoredVal->getType()) != 29 DL.isNonIntegralPointerType(LoadTy)) 30 return false; 31 32 return true; 33 } 34 35 template <class T, class HelperClass> 36 static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, 37 HelperClass &Helper, 38 const DataLayout &DL) { 39 assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && 40 "precondition violation - materialization can't fail"); 41 if (auto *C = dyn_cast<Constant>(StoredVal)) 42 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 43 StoredVal = FoldedStoredVal; 44 45 // If this is already the right type, just return it. 46 Type *StoredValTy = StoredVal->getType(); 47 48 uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); 49 uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); 50 51 // If the store and reload are the same size, we can always reuse it. 52 if (StoredValSize == LoadedValSize) { 53 // Pointer to Pointer -> use bitcast. 54 if (StoredValTy->getScalarType()->isPointerTy() && 55 LoadedTy->getScalarType()->isPointerTy()) { 56 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 57 } else { 58 // Convert source pointers to integers, which can be bitcast. 59 if (StoredValTy->getScalarType()->isPointerTy()) { 60 StoredValTy = DL.getIntPtrType(StoredValTy); 61 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 62 } 63 64 Type *TypeToCastTo = LoadedTy; 65 if (TypeToCastTo->getScalarType()->isPointerTy()) 66 TypeToCastTo = DL.getIntPtrType(TypeToCastTo); 67 68 if (StoredValTy != TypeToCastTo) 69 StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); 70 71 // Cast to pointer if the load needs a pointer type. 72 if (LoadedTy->getScalarType()->isPointerTy()) 73 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 74 } 75 76 if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) 77 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 78 StoredVal = FoldedStoredVal; 79 80 return StoredVal; 81 } 82 // If the loaded value is smaller than the available value, then we can 83 // extract out a piece from it. If the available value is too small, then we 84 // can't do anything. 85 assert(StoredValSize >= LoadedValSize && 86 "canCoerceMustAliasedValueToLoad fail"); 87 88 // Convert source pointers to integers, which can be manipulated. 89 if (StoredValTy->getScalarType()->isPointerTy()) { 90 StoredValTy = DL.getIntPtrType(StoredValTy); 91 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 92 } 93 94 // Convert vectors and fp to integer, which can be manipulated. 95 if (!StoredValTy->isIntegerTy()) { 96 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); 97 StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); 98 } 99 100 // If this is a big-endian system, we need to shift the value down to the low 101 // bits so that a truncate will work. 102 if (DL.isBigEndian()) { 103 uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - 104 DL.getTypeStoreSizeInBits(LoadedTy); 105 StoredVal = Helper.CreateLShr( 106 StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); 107 } 108 109 // Truncate the integer to the right size now. 110 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); 111 StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); 112 113 if (LoadedTy != NewIntTy) { 114 // If the result is a pointer, inttoptr. 115 if (LoadedTy->getScalarType()->isPointerTy()) 116 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 117 else 118 // Otherwise, bitcast. 119 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 120 } 121 122 if (auto *C = dyn_cast<Constant>(StoredVal)) 123 if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) 124 StoredVal = FoldedStoredVal; 125 126 return StoredVal; 127 } 128 129 /// If we saw a store of a value to memory, and 130 /// then a load from a must-aliased pointer of a different type, try to coerce 131 /// the stored value. LoadedTy is the type of the load we want to replace. 132 /// IRB is IRBuilder used to insert new instructions. 133 /// 134 /// If we can't do it, return null. 135 Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, 136 IRBuilder<> &IRB, const DataLayout &DL) { 137 return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL); 138 } 139 140 /// This function is called when we have a memdep query of a load that ends up 141 /// being a clobbering memory write (store, memset, memcpy, memmove). This 142 /// means that the write *may* provide bits used by the load but we can't be 143 /// sure because the pointers don't must-alias. 144 /// 145 /// Check this case to see if there is anything more we can do before we give 146 /// up. This returns -1 if we have to give up, or a byte number in the stored 147 /// value of the piece that feeds the load. 148 static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 149 Value *WritePtr, 150 uint64_t WriteSizeInBits, 151 const DataLayout &DL) { 152 // If the loaded or stored value is a first class array or struct, don't try 153 // to transform them. We need to be able to bitcast to integer. 154 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 155 return -1; 156 157 int64_t StoreOffset = 0, LoadOffset = 0; 158 Value *StoreBase = 159 GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); 160 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); 161 if (StoreBase != LoadBase) 162 return -1; 163 164 // If the load and store are to the exact same address, they should have been 165 // a must alias. AA must have gotten confused. 166 // FIXME: Study to see if/when this happens. One case is forwarding a memset 167 // to a load from the base of the memset. 168 169 // If the load and store don't overlap at all, the store doesn't provide 170 // anything to the load. In this case, they really don't alias at all, AA 171 // must have gotten confused. 172 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); 173 174 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 175 return -1; 176 uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. 177 LoadSize /= 8; 178 179 bool isAAFailure = false; 180 if (StoreOffset < LoadOffset) 181 isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; 182 else 183 isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; 184 185 if (isAAFailure) 186 return -1; 187 188 // If the Load isn't completely contained within the stored bits, we don't 189 // have all the bits to feed it. We could do something crazy in the future 190 // (issue a smaller load then merge the bits in) but this seems unlikely to be 191 // valuable. 192 if (StoreOffset > LoadOffset || 193 StoreOffset + StoreSize < LoadOffset + LoadSize) 194 return -1; 195 196 // Okay, we can do this transformation. Return the number of bytes into the 197 // store that the load is. 198 return LoadOffset - StoreOffset; 199 } 200 201 /// This function is called when we have a 202 /// memdep query of a load that ends up being a clobbering store. 203 int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 204 StoreInst *DepSI, const DataLayout &DL) { 205 // Cannot handle reading from store of first-class aggregate yet. 206 if (DepSI->getValueOperand()->getType()->isStructTy() || 207 DepSI->getValueOperand()->getType()->isArrayTy()) 208 return -1; 209 210 Value *StorePtr = DepSI->getPointerOperand(); 211 uint64_t StoreSize = 212 DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 213 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, 214 DL); 215 } 216 217 /// This function is called when we have a 218 /// memdep query of a load that ends up being clobbered by another load. See if 219 /// the other load can feed into the second load. 220 int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, 221 const DataLayout &DL) { 222 // Cannot handle reading from store of first-class aggregate yet. 223 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 224 return -1; 225 226 Value *DepPtr = DepLI->getPointerOperand(); 227 uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); 228 int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); 229 if (R != -1) 230 return R; 231 232 // If we have a load/load clobber an DepLI can be widened to cover this load, 233 // then we should widen it! 234 int64_t LoadOffs = 0; 235 const Value *LoadBase = 236 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); 237 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 238 239 unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( 240 LoadBase, LoadOffs, LoadSize, DepLI); 241 if (Size == 0) 242 return -1; 243 244 // Check non-obvious conditions enforced by MDA which we rely on for being 245 // able to materialize this potentially available value 246 assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); 247 assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); 248 249 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); 250 } 251 252 int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 253 MemIntrinsic *MI, const DataLayout &DL) { 254 // If the mem operation is a non-constant size, we can't handle it. 255 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 256 if (!SizeCst) 257 return -1; 258 uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; 259 260 // If this is memset, we just need to see if the offset is valid in the size 261 // of the memset.. 262 if (MI->getIntrinsicID() == Intrinsic::memset) 263 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 264 MemSizeInBits, DL); 265 266 // If we have a memcpy/memmove, the only case we can handle is if this is a 267 // copy from constant memory. In that case, we can read directly from the 268 // constant memory. 269 MemTransferInst *MTI = cast<MemTransferInst>(MI); 270 271 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 272 if (!Src) 273 return -1; 274 275 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); 276 if (!GV || !GV->isConstant()) 277 return -1; 278 279 // See if the access is within the bounds of the transfer. 280 int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 281 MemSizeInBits, DL); 282 if (Offset == -1) 283 return Offset; 284 285 unsigned AS = Src->getType()->getPointerAddressSpace(); 286 // Otherwise, see if we can constant fold a load from the constant with the 287 // offset applied as appropriate. 288 Src = 289 ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); 290 Constant *OffsetCst = 291 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 292 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 293 OffsetCst); 294 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 295 if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) 296 return Offset; 297 return -1; 298 } 299 300 template <class T, class HelperClass> 301 static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, 302 HelperClass &Helper, 303 const DataLayout &DL) { 304 LLVMContext &Ctx = SrcVal->getType()->getContext(); 305 306 uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 307 uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; 308 // Compute which bits of the stored value are being used by the load. Convert 309 // to an integer type to start with. 310 if (SrcVal->getType()->getScalarType()->isPointerTy()) 311 SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); 312 if (!SrcVal->getType()->isIntegerTy()) 313 SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); 314 315 // Shift the bits to the least significant depending on endianness. 316 unsigned ShiftAmt; 317 if (DL.isLittleEndian()) 318 ShiftAmt = Offset * 8; 319 else 320 ShiftAmt = (StoreSize - LoadSize - Offset) * 8; 321 if (ShiftAmt) 322 SrcVal = Helper.CreateLShr(SrcVal, 323 ConstantInt::get(SrcVal->getType(), ShiftAmt)); 324 325 if (LoadSize != StoreSize) 326 SrcVal = Helper.CreateTruncOrBitCast(SrcVal, 327 IntegerType::get(Ctx, LoadSize * 8)); 328 return SrcVal; 329 } 330 331 /// This function is called when we have a memdep query of a load that ends up 332 /// being a clobbering store. This means that the store provides bits used by 333 /// the load but the pointers don't must-alias. Check this case to see if 334 /// there is anything more we can do before we give up. 335 Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, 336 Instruction *InsertPt, const DataLayout &DL) { 337 338 IRBuilder<> Builder(InsertPt); 339 SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); 340 return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL); 341 } 342 343 Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, 344 Type *LoadTy, const DataLayout &DL) { 345 ConstantFolder F; 346 SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL); 347 return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL); 348 } 349 350 /// This function is called when we have a memdep query of a load that ends up 351 /// being a clobbering load. This means that the load *may* provide bits used 352 /// by the load but we can't be sure because the pointers don't must-alias. 353 /// Check this case to see if there is anything more we can do before we give 354 /// up. 355 Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, 356 Instruction *InsertPt, const DataLayout &DL) { 357 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 358 // widen SrcVal out to a larger load. 359 unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); 360 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 361 if (Offset + LoadSize > SrcValStoreSize) { 362 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 363 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 364 // If we have a load/load clobber an DepLI can be widened to cover this 365 // load, then we should widen it to the next power of 2 size big enough! 366 unsigned NewLoadSize = Offset + LoadSize; 367 if (!isPowerOf2_32(NewLoadSize)) 368 NewLoadSize = NextPowerOf2(NewLoadSize); 369 370 Value *PtrVal = SrcVal->getPointerOperand(); 371 // Insert the new load after the old load. This ensures that subsequent 372 // memdep queries will find the new load. We can't easily remove the old 373 // load completely because it is already in the value numbering table. 374 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 375 Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); 376 DestPTy = 377 PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); 378 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 379 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 380 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 381 NewLoad->takeName(SrcVal); 382 NewLoad->setAlignment(SrcVal->getAlignment()); 383 384 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 385 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 386 387 // Replace uses of the original load with the wider load. On a big endian 388 // system, we need to shift down to get the relevant bits. 389 Value *RV = NewLoad; 390 if (DL.isBigEndian()) 391 RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); 392 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 393 SrcVal->replaceAllUsesWith(RV); 394 395 SrcVal = NewLoad; 396 } 397 398 return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); 399 } 400 401 Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, 402 Type *LoadTy, const DataLayout &DL) { 403 unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); 404 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 405 if (Offset + LoadSize > SrcValStoreSize) 406 return nullptr; 407 return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); 408 } 409 410 template <class T, class HelperClass> 411 T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, 412 Type *LoadTy, HelperClass &Helper, 413 const DataLayout &DL) { 414 LLVMContext &Ctx = LoadTy->getContext(); 415 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; 416 417 // We know that this method is only called when the mem transfer fully 418 // provides the bits for the load. 419 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 420 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 421 // independently of what the offset is. 422 T *Val = cast<T>(MSI->getValue()); 423 if (LoadSize != 1) 424 Val = 425 Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); 426 T *OneElt = Val; 427 428 // Splat the value out to the right number of bits. 429 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { 430 // If we can double the number of bytes set, do it. 431 if (NumBytesSet * 2 <= LoadSize) { 432 T *ShVal = Helper.CreateShl( 433 Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); 434 Val = Helper.CreateOr(Val, ShVal); 435 NumBytesSet <<= 1; 436 continue; 437 } 438 439 // Otherwise insert one byte at a time. 440 T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); 441 Val = Helper.CreateOr(OneElt, ShVal); 442 ++NumBytesSet; 443 } 444 445 return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL); 446 } 447 448 // Otherwise, this is a memcpy/memmove from a constant global. 449 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 450 Constant *Src = cast<Constant>(MTI->getSource()); 451 unsigned AS = Src->getType()->getPointerAddressSpace(); 452 453 // Otherwise, see if we can constant fold a load from the constant with the 454 // offset applied as appropriate. 455 Src = 456 ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); 457 Constant *OffsetCst = 458 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 459 Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, 460 OffsetCst); 461 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 462 return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); 463 } 464 465 /// This function is called when we have a 466 /// memdep query of a load that ends up being a clobbering mem intrinsic. 467 Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 468 Type *LoadTy, Instruction *InsertPt, 469 const DataLayout &DL) { 470 IRBuilder<> Builder(InsertPt); 471 return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset, 472 LoadTy, Builder, DL); 473 } 474 475 Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 476 Type *LoadTy, const DataLayout &DL) { 477 // The only case analyzeLoadFromClobberingMemInst cannot be converted to a 478 // constant is when it's a memset of a non-constant. 479 if (auto *MSI = dyn_cast<MemSetInst>(SrcInst)) 480 if (!isa<Constant>(MSI->getValue())) 481 return nullptr; 482 ConstantFolder F; 483 return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset, 484 LoadTy, F, DL); 485 } 486 } // namespace VNCoercion 487 } // namespace llvm 488