1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 15 16 using namespace llvm; 17 18 static unsigned getLoopOperandSizeInBytes(Type *Type) { 19 if (VectorType *VTy = dyn_cast<VectorType>(Type)) { 20 return VTy->getBitWidth() / 8; 21 } 22 23 return Type->getPrimitiveSizeInBits() / 8; 24 } 25 26 void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, 27 Value *DstAddr, ConstantInt *CopyLen, 28 unsigned SrcAlign, unsigned DestAlign, 29 bool SrcIsVolatile, bool DstIsVolatile, 30 const TargetTransformInfo &TTI) { 31 // No need to expand zero length copies. 32 if (CopyLen->isZero()) 33 return; 34 35 BasicBlock *PreLoopBB = InsertBefore->getParent(); 36 BasicBlock *PostLoopBB = nullptr; 37 Function *ParentFunc = PreLoopBB->getParent(); 38 LLVMContext &Ctx = PreLoopBB->getContext(); 39 40 Type *TypeOfCopyLen = CopyLen->getType(); 41 Type *LoopOpType = 42 TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); 43 44 unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); 45 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 46 47 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 48 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 49 50 if (LoopEndCount != 0) { 51 // Split 52 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 53 BasicBlock *LoopBB = 54 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 55 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 56 57 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 58 59 // Cast the Src and Dst pointers to pointers to the loop operand type (if 60 // needed). 61 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 62 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 63 if (SrcAddr->getType() != SrcOpType) { 64 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 65 } 66 if (DstAddr->getType() != DstOpType) { 67 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 68 } 69 70 IRBuilder<> LoopBuilder(LoopBB); 71 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 72 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 73 // Loop Body 74 Value *SrcGEP = 75 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 76 Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); 77 Value *DstGEP = 78 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 79 LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); 80 81 Value *NewIndex = 82 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 83 LoopIndex->addIncoming(NewIndex, LoopBB); 84 85 // Create the loop branch condition. 86 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 87 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 88 LoopBB, PostLoopBB); 89 } 90 91 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 92 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 93 if (RemainingBytes) { 94 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 95 : InsertBefore); 96 97 // Update the alignment based on the copy size used in the loop body. 98 SrcAlign = std::min(SrcAlign, LoopOpSize); 99 DestAlign = std::min(DestAlign, LoopOpSize); 100 101 SmallVector<Type *, 5> RemainingOps; 102 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 103 SrcAlign, DestAlign); 104 105 for (auto OpTy : RemainingOps) { 106 // Calaculate the new index 107 unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); 108 uint64_t GepIndex = BytesCopied / OperandSize; 109 assert(GepIndex * OperandSize == BytesCopied && 110 "Division should have no Remainder!"); 111 // Cast source to operand type and load 112 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); 113 Value *CastedSrc = SrcAddr->getType() == SrcPtrType 114 ? SrcAddr 115 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); 116 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 117 OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); 118 Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); 119 120 // Cast destination to operand type and store. 121 PointerType *DstPtrType = PointerType::get(OpTy, DstAS); 122 Value *CastedDst = DstAddr->getType() == DstPtrType 123 ? DstAddr 124 : RBuilder.CreateBitCast(DstAddr, DstPtrType); 125 Value *DstGEP = RBuilder.CreateInBoundsGEP( 126 OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); 127 RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); 128 129 BytesCopied += OperandSize; 130 } 131 } 132 assert(BytesCopied == CopyLen->getZExtValue() && 133 "Bytes copied should match size in the call!"); 134 } 135 136 void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, 137 Value *SrcAddr, Value *DstAddr, 138 Value *CopyLen, unsigned SrcAlign, 139 unsigned DestAlign, bool SrcIsVolatile, 140 bool DstIsVolatile, 141 const TargetTransformInfo &TTI) { 142 BasicBlock *PreLoopBB = InsertBefore->getParent(); 143 BasicBlock *PostLoopBB = 144 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 145 146 Function *ParentFunc = PreLoopBB->getParent(); 147 LLVMContext &Ctx = PreLoopBB->getContext(); 148 149 Type *LoopOpType = 150 TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); 151 unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); 152 153 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 154 155 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 156 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 157 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 158 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 159 if (SrcAddr->getType() != SrcOpType) { 160 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 161 } 162 if (DstAddr->getType() != DstOpType) { 163 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 164 } 165 166 // Calculate the loop trip count, and remaining bytes to copy after the loop. 167 Type *CopyLenType = CopyLen->getType(); 168 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 169 assert(ILengthType && 170 "expected size argument to memcpy to be an integer type!"); 171 Type *Int8Type = Type::getInt8Ty(Ctx); 172 bool LoopOpIsInt8 = LoopOpType == Int8Type; 173 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 174 Value *RuntimeLoopCount = LoopOpIsInt8 ? 175 CopyLen : 176 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); 177 BasicBlock *LoopBB = 178 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 179 IRBuilder<> LoopBuilder(LoopBB); 180 181 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 182 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 183 184 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 185 Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); 186 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 187 LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); 188 189 Value *NewIndex = 190 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 191 LoopIndex->addIncoming(NewIndex, LoopBB); 192 193 if (!LoopOpIsInt8) { 194 // Add in the 195 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); 196 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 197 198 // Loop body for the residual copy. 199 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 200 PreLoopBB->getParent(), 201 PostLoopBB); 202 // Residual loop header. 203 BasicBlock *ResHeaderBB = BasicBlock::Create( 204 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 205 206 // Need to update the pre-loop basic block to branch to the correct place. 207 // branch to the main loop if the count is non-zero, branch to the residual 208 // loop if the copy size is smaller then 1 iteration of the main loop but 209 // non-zero and finally branch to after the residual loop if the memcpy 210 // size is zero. 211 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 212 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 213 LoopBB, ResHeaderBB); 214 PreLoopBB->getTerminator()->eraseFromParent(); 215 216 LoopBuilder.CreateCondBr( 217 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 218 ResHeaderBB); 219 220 // Determine if we need to branch to the residual loop or bypass it. 221 IRBuilder<> RHBuilder(ResHeaderBB); 222 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 223 ResLoopBB, PostLoopBB); 224 225 // Copy the residual with single byte load/store loop. 226 IRBuilder<> ResBuilder(ResLoopBB); 227 PHINode *ResidualIndex = 228 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 229 ResidualIndex->addIncoming(Zero, ResHeaderBB); 230 231 Value *SrcAsInt8 = 232 ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); 233 Value *DstAsInt8 = 234 ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); 235 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 236 Value *SrcGEP = 237 ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); 238 Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); 239 Value *DstGEP = 240 ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); 241 ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); 242 243 Value *ResNewIndex = 244 ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); 245 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 246 247 // Create the loop branch condition. 248 ResBuilder.CreateCondBr( 249 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 250 PostLoopBB); 251 } else { 252 // In this case the loop operand type was a byte, and there is no need for a 253 // residual loop to copy the remaining memory after the main loop. 254 // We do however need to patch up the control flow by creating the 255 // terminators for the preloop block and the memcpy loop. 256 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 257 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 258 LoopBB, PostLoopBB); 259 PreLoopBB->getTerminator()->eraseFromParent(); 260 LoopBuilder.CreateCondBr( 261 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 262 PostLoopBB); 263 } 264 } 265 266 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 267 // regions; therefore, it has to check the relative positions of the source and 268 // destination pointers and choose the copy direction accordingly. 269 // 270 // The code below is an IR rendition of this C function: 271 // 272 // void* memmove(void* dst, const void* src, size_t n) { 273 // unsigned char* d = dst; 274 // const unsigned char* s = src; 275 // if (s < d) { 276 // // copy backwards 277 // while (n--) { 278 // d[n] = s[n]; 279 // } 280 // } else { 281 // // copy forward 282 // for (size_t i = 0; i < n; ++i) { 283 // d[i] = s[i]; 284 // } 285 // } 286 // return dst; 287 // } 288 static void createMemMoveLoop(Instruction *InsertBefore, 289 Value *SrcAddr, Value *DstAddr, Value *CopyLen, 290 unsigned SrcAlign, unsigned DestAlign, 291 bool SrcIsVolatile, bool DstIsVolatile) { 292 Type *TypeOfCopyLen = CopyLen->getType(); 293 BasicBlock *OrigBB = InsertBefore->getParent(); 294 Function *F = OrigBB->getParent(); 295 296 // Create the a comparison of src and dst, based on which we jump to either 297 // the forward-copy part of the function (if src >= dst) or the backwards-copy 298 // part (if src < dst). 299 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 300 // structure. Its block terminators (unconditional branches) are replaced by 301 // the appropriate conditional branches when the loop is built. 302 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, 303 SrcAddr, DstAddr, "compare_src_dst"); 304 Instruction *ThenTerm, *ElseTerm; 305 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, 306 &ElseTerm); 307 308 // Each part of the function consists of two blocks: 309 // copy_backwards: used to skip the loop when n == 0 310 // copy_backwards_loop: the actual backwards loop BB 311 // copy_forward: used to skip the loop when n == 0 312 // copy_forward_loop: the actual forward loop BB 313 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 314 CopyBackwardsBB->setName("copy_backwards"); 315 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 316 CopyForwardBB->setName("copy_forward"); 317 BasicBlock *ExitBB = InsertBefore->getParent(); 318 ExitBB->setName("memmove_done"); 319 320 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 321 // between both backwards and forward copy clauses. 322 ICmpInst *CompareN = 323 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, 324 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 325 326 // Copying backwards. 327 BasicBlock *LoopBB = 328 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 329 IRBuilder<> LoopBuilder(LoopBB); 330 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 331 Value *IndexPtr = LoopBuilder.CreateSub( 332 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 333 Value *Element = LoopBuilder.CreateLoad( 334 LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); 335 LoopBuilder.CreateStore(Element, 336 LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); 337 LoopBuilder.CreateCondBr( 338 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 339 ExitBB, LoopBB); 340 LoopPhi->addIncoming(IndexPtr, LoopBB); 341 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 342 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); 343 ThenTerm->eraseFromParent(); 344 345 // Copying forward. 346 BasicBlock *FwdLoopBB = 347 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 348 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 349 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 350 Value *FwdElement = FwdLoopBuilder.CreateLoad( 351 FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); 352 FwdLoopBuilder.CreateStore( 353 FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); 354 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 355 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 356 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 357 ExitBB, FwdLoopBB); 358 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 359 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 360 361 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); 362 ElseTerm->eraseFromParent(); 363 } 364 365 static void createMemSetLoop(Instruction *InsertBefore, 366 Value *DstAddr, Value *CopyLen, Value *SetValue, 367 unsigned Align, bool IsVolatile) { 368 Type *TypeOfCopyLen = CopyLen->getType(); 369 BasicBlock *OrigBB = InsertBefore->getParent(); 370 Function *F = OrigBB->getParent(); 371 BasicBlock *NewBB = 372 OrigBB->splitBasicBlock(InsertBefore, "split"); 373 BasicBlock *LoopBB 374 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 375 376 IRBuilder<> Builder(OrigBB->getTerminator()); 377 378 // Cast pointer to the type of value getting stored 379 unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 380 DstAddr = Builder.CreateBitCast(DstAddr, 381 PointerType::get(SetValue->getType(), dstAS)); 382 383 Builder.CreateCondBr( 384 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 385 LoopBB); 386 OrigBB->getTerminator()->eraseFromParent(); 387 388 IRBuilder<> LoopBuilder(LoopBB); 389 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 390 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 391 392 LoopBuilder.CreateStore( 393 SetValue, 394 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 395 IsVolatile); 396 397 Value *NewIndex = 398 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 399 LoopIndex->addIncoming(NewIndex, LoopBB); 400 401 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 402 NewBB); 403 } 404 405 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 406 const TargetTransformInfo &TTI) { 407 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 408 createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, 409 /* SrcAddr */ Memcpy->getRawSource(), 410 /* DstAddr */ Memcpy->getRawDest(), 411 /* CopyLen */ CI, 412 /* SrcAlign */ Memcpy->getSourceAlignment(), 413 /* DestAlign */ Memcpy->getDestAlignment(), 414 /* SrcIsVolatile */ Memcpy->isVolatile(), 415 /* DstIsVolatile */ Memcpy->isVolatile(), 416 /* TargetTransformInfo */ TTI); 417 } else { 418 createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, 419 /* SrcAddr */ Memcpy->getRawSource(), 420 /* DstAddr */ Memcpy->getRawDest(), 421 /* CopyLen */ Memcpy->getLength(), 422 /* SrcAlign */ Memcpy->getSourceAlignment(), 423 /* DestAlign */ Memcpy->getDestAlignment(), 424 /* SrcIsVolatile */ Memcpy->isVolatile(), 425 /* DstIsVolatile */ Memcpy->isVolatile(), 426 /* TargetTransfomrInfo */ TTI); 427 } 428 } 429 430 void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { 431 createMemMoveLoop(/* InsertBefore */ Memmove, 432 /* SrcAddr */ Memmove->getRawSource(), 433 /* DstAddr */ Memmove->getRawDest(), 434 /* CopyLen */ Memmove->getLength(), 435 /* SrcAlign */ Memmove->getSourceAlignment(), 436 /* DestAlign */ Memmove->getDestAlignment(), 437 /* SrcIsVolatile */ Memmove->isVolatile(), 438 /* DstIsVolatile */ Memmove->isVolatile()); 439 } 440 441 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 442 createMemSetLoop(/* InsertBefore */ Memset, 443 /* DstAddr */ Memset->getRawDest(), 444 /* CopyLen */ Memset->getLength(), 445 /* SetValue */ Memset->getValue(), 446 /* Alignment */ Memset->getDestAlignment(), 447 Memset->isVolatile()); 448 } 449