1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass (at IR level) to replace atomic instructions with 11 // appropriate (intrinsic-based) ldrex/strex loops. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/Passes.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/IRBuilder.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/Intrinsics.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Target/TargetLowering.h" 23 #include "llvm/Target/TargetMachine.h" 24 #include "llvm/Target/TargetSubtargetInfo.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "atomic-expand" 29 30 namespace { 31 class AtomicExpand: public FunctionPass { 32 const TargetMachine *TM; 33 public: 34 static char ID; // Pass identification, replacement for typeid 35 explicit AtomicExpand(const TargetMachine *TM = nullptr) 36 : FunctionPass(ID), TM(TM) { 37 initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnFunction(Function &F) override; 41 bool expandAtomicInsts(Function &F); 42 43 bool expandAtomicLoad(LoadInst *LI); 44 bool expandAtomicStore(StoreInst *LI); 45 bool expandAtomicRMW(AtomicRMWInst *AI); 46 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 47 48 AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); 49 void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); 50 }; 51 } 52 53 char AtomicExpand::ID = 0; 54 char &llvm::AtomicExpandID = AtomicExpand::ID; 55 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", 56 "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", 57 false, false) 58 59 FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { 60 return new AtomicExpand(TM); 61 } 62 63 bool AtomicExpand::runOnFunction(Function &F) { 64 if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) 65 return false; 66 67 SmallVector<Instruction *, 1> AtomicInsts; 68 69 // Changing control-flow while iterating through it is a bad idea, so gather a 70 // list of all atomic instructions before we start. 71 for (BasicBlock &BB : F) 72 for (Instruction &Inst : BB) { 73 if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || 74 (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || 75 (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) 76 AtomicInsts.push_back(&Inst); 77 } 78 79 bool MadeChange = false; 80 for (Instruction *Inst : AtomicInsts) { 81 if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR( 82 Inst)) 83 continue; 84 85 if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) 86 MadeChange |= expandAtomicRMW(AI); 87 else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) 88 MadeChange |= expandAtomicCmpXchg(CI); 89 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 90 MadeChange |= expandAtomicLoad(LI); 91 else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 92 MadeChange |= expandAtomicStore(SI); 93 else 94 llvm_unreachable("Unknown atomic instruction"); 95 } 96 97 return MadeChange; 98 } 99 100 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { 101 // Load instructions don't actually need a leading fence, even in the 102 // SequentiallyConsistent case. 103 AtomicOrdering MemOpOrder = 104 TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic() 105 ? Monotonic 106 : LI->getOrdering(); 107 108 // The only 64-bit load guaranteed to be single-copy atomic by the ARM is 109 // an ldrexd (A3.5.3). 110 IRBuilder<> Builder(LI); 111 Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( 112 Builder, LI->getPointerOperand(), MemOpOrder); 113 114 insertTrailingFence(Builder, LI->getOrdering()); 115 116 LI->replaceAllUsesWith(Val); 117 LI->eraseFromParent(); 118 119 return true; 120 } 121 122 bool AtomicExpand::expandAtomicStore(StoreInst *SI) { 123 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means 124 // we need a loop and the entire instruction is essentially an "atomicrmw 125 // xchg" that ignores the value loaded. 126 IRBuilder<> Builder(SI); 127 AtomicRMWInst *AI = 128 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), 129 SI->getValueOperand(), SI->getOrdering()); 130 SI->eraseFromParent(); 131 132 // Now we have an appropriate swap instruction, lower it as usual. 133 return expandAtomicRMW(AI); 134 } 135 136 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { 137 AtomicOrdering Order = AI->getOrdering(); 138 Value *Addr = AI->getPointerOperand(); 139 BasicBlock *BB = AI->getParent(); 140 Function *F = BB->getParent(); 141 LLVMContext &Ctx = F->getContext(); 142 143 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 144 // 145 // The standard expansion we produce is: 146 // [...] 147 // fence? 148 // atomicrmw.start: 149 // %loaded = @load.linked(%addr) 150 // %new = some_op iN %loaded, %incr 151 // %stored = @store_conditional(%new, %addr) 152 // %try_again = icmp i32 ne %stored, 0 153 // br i1 %try_again, label %loop, label %atomicrmw.end 154 // atomicrmw.end: 155 // fence? 156 // [...] 157 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); 158 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 159 160 // This grabs the DebugLoc from AI. 161 IRBuilder<> Builder(AI); 162 163 // The split call above "helpfully" added a branch at the end of BB (to the 164 // wrong place), but we might want a fence too. It's easiest to just remove 165 // the branch entirely. 166 std::prev(BB->end())->eraseFromParent(); 167 Builder.SetInsertPoint(BB); 168 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); 169 Builder.CreateBr(LoopBB); 170 171 // Start the main loop block now that we've taken care of the preliminaries. 172 Builder.SetInsertPoint(LoopBB); 173 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( 174 Builder, Addr, MemOpOrder); 175 176 Value *NewVal; 177 switch (AI->getOperation()) { 178 case AtomicRMWInst::Xchg: 179 NewVal = AI->getValOperand(); 180 break; 181 case AtomicRMWInst::Add: 182 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); 183 break; 184 case AtomicRMWInst::Sub: 185 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); 186 break; 187 case AtomicRMWInst::And: 188 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); 189 break; 190 case AtomicRMWInst::Nand: 191 NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), 192 "new"); 193 break; 194 case AtomicRMWInst::Or: 195 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); 196 break; 197 case AtomicRMWInst::Xor: 198 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); 199 break; 200 case AtomicRMWInst::Max: 201 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); 202 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); 203 break; 204 case AtomicRMWInst::Min: 205 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); 206 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); 207 break; 208 case AtomicRMWInst::UMax: 209 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); 210 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); 211 break; 212 case AtomicRMWInst::UMin: 213 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); 214 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); 215 break; 216 default: 217 llvm_unreachable("Unknown atomic op"); 218 } 219 220 Value *StoreSuccess = 221 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( 222 Builder, NewVal, Addr, MemOpOrder); 223 Value *TryAgain = Builder.CreateICmpNE( 224 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 225 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 226 227 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 228 insertTrailingFence(Builder, Order); 229 230 AI->replaceAllUsesWith(Loaded); 231 AI->eraseFromParent(); 232 233 return true; 234 } 235 236 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 237 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 238 AtomicOrdering FailureOrder = CI->getFailureOrdering(); 239 Value *Addr = CI->getPointerOperand(); 240 BasicBlock *BB = CI->getParent(); 241 Function *F = BB->getParent(); 242 LLVMContext &Ctx = F->getContext(); 243 244 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 245 // 246 // The full expansion we produce is: 247 // [...] 248 // fence? 249 // cmpxchg.start: 250 // %loaded = @load.linked(%addr) 251 // %should_store = icmp eq %loaded, %desired 252 // br i1 %should_store, label %cmpxchg.trystore, 253 // label %cmpxchg.failure 254 // cmpxchg.trystore: 255 // %stored = @store_conditional(%new, %addr) 256 // %success = icmp eq i32 %stored, 0 257 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure 258 // cmpxchg.success: 259 // fence? 260 // br label %cmpxchg.end 261 // cmpxchg.failure: 262 // fence? 263 // br label %cmpxchg.end 264 // cmpxchg.end: 265 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 266 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 267 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 268 // [...] 269 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); 270 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 271 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); 272 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); 273 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); 274 275 // This grabs the DebugLoc from CI 276 IRBuilder<> Builder(CI); 277 278 // The split call above "helpfully" added a branch at the end of BB (to the 279 // wrong place), but we might want a fence too. It's easiest to just remove 280 // the branch entirely. 281 std::prev(BB->end())->eraseFromParent(); 282 Builder.SetInsertPoint(BB); 283 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); 284 Builder.CreateBr(LoopBB); 285 286 // Start the main loop block now that we've taken care of the preliminaries. 287 Builder.SetInsertPoint(LoopBB); 288 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( 289 Builder, Addr, MemOpOrder); 290 Value *ShouldStore = 291 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); 292 293 // If the the cmpxchg doesn't actually need any ordering when it fails, we can 294 // jump straight past that fence instruction (if it exists). 295 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); 296 297 Builder.SetInsertPoint(TryStoreBB); 298 Value *StoreSuccess = 299 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( 300 Builder, CI->getNewValOperand(), Addr, MemOpOrder); 301 StoreSuccess = Builder.CreateICmpEQ( 302 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 303 Builder.CreateCondBr(StoreSuccess, SuccessBB, 304 CI->isWeak() ? FailureBB : LoopBB); 305 306 // Make sure later instructions don't get reordered with a fence if necessary. 307 Builder.SetInsertPoint(SuccessBB); 308 insertTrailingFence(Builder, SuccessOrder); 309 Builder.CreateBr(ExitBB); 310 311 Builder.SetInsertPoint(FailureBB); 312 insertTrailingFence(Builder, FailureOrder); 313 Builder.CreateBr(ExitBB); 314 315 // Finally, we have control-flow based knowledge of whether the cmpxchg 316 // succeeded or not. We expose this to later passes by converting any 317 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. 318 319 // Setup the builder so we can create any PHIs we need. 320 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 321 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); 322 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 323 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 324 325 // Look for any users of the cmpxchg that are just comparing the loaded value 326 // against the desired one, and replace them with the CFG-derived version. 327 SmallVector<ExtractValueInst *, 2> PrunedInsts; 328 for (auto User : CI->users()) { 329 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 330 if (!EV) 331 continue; 332 333 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 334 "weird extraction from { iN, i1 }"); 335 336 if (EV->getIndices()[0] == 0) 337 EV->replaceAllUsesWith(Loaded); 338 else 339 EV->replaceAllUsesWith(Success); 340 341 PrunedInsts.push_back(EV); 342 } 343 344 // We can remove the instructions now we're no longer iterating through them. 345 for (auto EV : PrunedInsts) 346 EV->eraseFromParent(); 347 348 if (!CI->use_empty()) { 349 // Some use of the full struct return that we don't understand has happened, 350 // so we've got to reconstruct it properly. 351 Value *Res; 352 Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); 353 Res = Builder.CreateInsertValue(Res, Success, 1); 354 355 CI->replaceAllUsesWith(Res); 356 } 357 358 CI->eraseFromParent(); 359 return true; 360 } 361 362 AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder, 363 AtomicOrdering Ord) { 364 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) 365 return Ord; 366 367 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) 368 Builder.CreateFence(Release); 369 370 // The exclusive operations don't need any barrier if we're adding separate 371 // fences. 372 return Monotonic; 373 } 374 375 void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder, 376 AtomicOrdering Ord) { 377 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()) 378 return; 379 380 if (Ord == Acquire || Ord == AcquireRelease) 381 Builder.CreateFence(Acquire); 382 else if (Ord == SequentiallyConsistent) 383 Builder.CreateFence(SequentiallyConsistent); 384 } 385