1 //===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass turns explicit null checks of the form
11 //
12 //   test %r10, %r10
13 //   je throw_npe
14 //   movl (%r10), %esi
15 //   ...
16 //
17 // to
18 //
19 //   faulting_load_op("movl (%r10), %esi", throw_npe)
20 //   ...
21 //
22 // With the help of a runtime that understands the .fault_maps section,
23 // faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
24 // a page fault.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "llvm/ADT/DenseSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/CodeGen/Passes.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineMemOperand.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineInstrBuilder.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/MachineModuleInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Target/TargetSubtargetInfo.h"
45 #include "llvm/Target/TargetInstrInfo.h"
46 
47 using namespace llvm;
48 
49 static cl::opt<int> PageSize("imp-null-check-page-size",
50                              cl::desc("The page size of the target in bytes"),
51                              cl::init(4096));
52 
53 #define DEBUG_TYPE "implicit-null-checks"
54 
55 STATISTIC(NumImplicitNullChecks,
56           "Number of explicit null checks made implicit");
57 
58 namespace {
59 
60 class ImplicitNullChecks : public MachineFunctionPass {
61   /// Represents one null check that can be made implicit.
62   struct NullCheck {
63     // The memory operation the null check can be folded into.
64     MachineInstr *MemOperation;
65 
66     // The instruction actually doing the null check (Ptr != 0).
67     MachineInstr *CheckOperation;
68 
69     // The block the check resides in.
70     MachineBasicBlock *CheckBlock;
71 
72     // The block branched to if the pointer is non-null.
73     MachineBasicBlock *NotNullSucc;
74 
75     // The block branched to if the pointer is null.
76     MachineBasicBlock *NullSucc;
77 
78     NullCheck()
79         : MemOperation(), CheckOperation(), CheckBlock(), NotNullSucc(),
80           NullSucc() {}
81 
82     explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
83                        MachineBasicBlock *checkBlock,
84                        MachineBasicBlock *notNullSucc,
85                        MachineBasicBlock *nullSucc)
86         : MemOperation(memOperation), CheckOperation(checkOperation),
87           CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc) {
88     }
89   };
90 
91   const TargetInstrInfo *TII = nullptr;
92   const TargetRegisterInfo *TRI = nullptr;
93   MachineModuleInfo *MMI = nullptr;
94 
95   bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
96                                  SmallVectorImpl<NullCheck> &NullCheckList);
97   MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
98                                    MCSymbol *HandlerLabel);
99   void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
100 
101 public:
102   static char ID;
103 
104   ImplicitNullChecks() : MachineFunctionPass(ID) {
105     initializeImplicitNullChecksPass(*PassRegistry::getPassRegistry());
106   }
107 
108   bool runOnMachineFunction(MachineFunction &MF) override;
109 
110   MachineFunctionProperties getRequiredProperties() const override {
111     return MachineFunctionProperties().set(
112         MachineFunctionProperties::Property::AllVRegsAllocated);
113   }
114 };
115 
116 /// \brief Detect re-ordering hazards and dependencies.
117 ///
118 /// This class keeps track of defs and uses, and can be queried if a given
119 /// machine instruction can be re-ordered from after the machine instructions
120 /// seen so far to before them.
121 class HazardDetector {
122   DenseSet<unsigned> RegDefs;
123   DenseSet<unsigned> RegUses;
124   const TargetRegisterInfo &TRI;
125   bool hasSeenClobber;
126 
127 public:
128   explicit HazardDetector(const TargetRegisterInfo &TRI) :
129     TRI(TRI), hasSeenClobber(false) {}
130 
131   /// \brief Make a note of \p MI for later queries to isSafeToHoist.
132   ///
133   /// May clobber this HazardDetector instance.  \see isClobbered.
134   void rememberInstruction(MachineInstr *MI);
135 
136   /// \brief Return true if it is safe to hoist \p MI from after all the
137   /// instructions seen so far (via rememberInstruction) to before it.
138   bool isSafeToHoist(MachineInstr *MI);
139 
140   /// \brief Return true if this instance of HazardDetector has been clobbered
141   /// (i.e. has no more useful information).
142   ///
143   /// A HazardDetecter is clobbered when it sees a construct it cannot
144   /// understand, and it would have to return a conservative answer for all
145   /// future queries.  Having a separate clobbered state lets the client code
146   /// bail early, without making queries about all of the future instructions
147   /// (which would have returned the most conservative answer anyway).
148   ///
149   /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
150   /// is an error.
151   bool isClobbered() { return hasSeenClobber; }
152 };
153 }
154 
155 
156 void HazardDetector::rememberInstruction(MachineInstr *MI) {
157   assert(!isClobbered() &&
158          "Don't add instructions to a clobbered hazard detector");
159 
160   if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
161     hasSeenClobber = true;
162     return;
163   }
164 
165   for (auto *MMO : MI->memoperands()) {
166     // Right now we don't want to worry about LLVM's memory model.
167     if (!MMO->isUnordered()) {
168       hasSeenClobber = true;
169       return;
170     }
171   }
172 
173   for (auto &MO : MI->operands()) {
174     if (!MO.isReg() || !MO.getReg())
175       continue;
176 
177     if (MO.isDef())
178       RegDefs.insert(MO.getReg());
179     else
180       RegUses.insert(MO.getReg());
181   }
182 }
183 
184 bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
185   assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
186 
187   // Right now we don't want to worry about LLVM's memory model.  This can be
188   // made more precise later.
189   for (auto *MMO : MI->memoperands())
190     if (!MMO->isUnordered())
191       return false;
192 
193   for (auto &MO : MI->operands()) {
194     if (MO.isReg() && MO.getReg()) {
195       for (unsigned Reg : RegDefs)
196         if (TRI.regsOverlap(Reg, MO.getReg()))
197           return false;  // We found a write-after-write or read-after-write
198 
199       if (MO.isDef())
200         for (unsigned Reg : RegUses)
201           if (TRI.regsOverlap(Reg, MO.getReg()))
202             return false;  // We found a write-after-read
203     }
204   }
205 
206   return true;
207 }
208 
209 bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
210   TII = MF.getSubtarget().getInstrInfo();
211   TRI = MF.getRegInfo().getTargetRegisterInfo();
212   MMI = &MF.getMMI();
213 
214   SmallVector<NullCheck, 16> NullCheckList;
215 
216   for (auto &MBB : MF)
217     analyzeBlockForNullChecks(MBB, NullCheckList);
218 
219   if (!NullCheckList.empty())
220     rewriteNullChecks(NullCheckList);
221 
222   return !NullCheckList.empty();
223 }
224 
225 /// Analyze MBB to check if its terminating branch can be turned into an
226 /// implicit null check.  If yes, append a description of the said null check to
227 /// NullCheckList and return true, else return false.
228 bool ImplicitNullChecks::analyzeBlockForNullChecks(
229     MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
230   typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
231 
232   MDNode *BranchMD = nullptr;
233   if (auto *BB = MBB.getBasicBlock())
234     BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
235 
236   if (!BranchMD)
237     return false;
238 
239   MachineBranchPredicate MBP;
240 
241   if (TII->AnalyzeBranchPredicate(MBB, MBP, true))
242     return false;
243 
244   // Is the predicate comparing an integer to zero?
245   if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
246         (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
247          MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
248     return false;
249 
250   // If we cannot erase the test instruction itself, then making the null check
251   // implicit does not buy us much.
252   if (!MBP.SingleUseCondition)
253     return false;
254 
255   MachineBasicBlock *NotNullSucc, *NullSucc;
256 
257   if (MBP.Predicate == MachineBranchPredicate::PRED_NE) {
258     NotNullSucc = MBP.TrueDest;
259     NullSucc = MBP.FalseDest;
260   } else {
261     NotNullSucc = MBP.FalseDest;
262     NullSucc = MBP.TrueDest;
263   }
264 
265   // We handle the simplest case for now.  We can potentially do better by using
266   // the machine dominator tree.
267   if (NotNullSucc->pred_size() != 1)
268     return false;
269 
270   // Starting with a code fragment like:
271   //
272   //   test %RAX, %RAX
273   //   jne LblNotNull
274   //
275   //  LblNull:
276   //   callq throw_NullPointerException
277   //
278   //  LblNotNull:
279   //   Inst0
280   //   Inst1
281   //   ...
282   //   Def = Load (%RAX + <offset>)
283   //   ...
284   //
285   //
286   // we want to end up with
287   //
288   //   Def = FaultingLoad (%RAX + <offset>), LblNull
289   //   jmp LblNotNull ;; explicit or fallthrough
290   //
291   //  LblNotNull:
292   //   Inst0
293   //   Inst1
294   //   ...
295   //
296   //  LblNull:
297   //   callq throw_NullPointerException
298   //
299   //
300   // To see why this is legal, consider the two possibilities:
301   //
302   //  1. %RAX is null: since we constrain <offset> to be less than PageSize, the
303   //     load instruction dereferences the null page, causing a segmentation
304   //     fault.
305   //
306   //  2. %RAX is not null: in this case we know that the load cannot fault, as
307   //     otherwise the load would've faulted in the original program too and the
308   //     original program would've been undefined.
309   //
310   // This reasoning cannot be extended to justify hoisting through arbitrary
311   // control flow.  For instance, in the example below (in pseudo-C)
312   //
313   //    if (ptr == null) { throw_npe(); unreachable; }
314   //    if (some_cond) { return 42; }
315   //    v = ptr->field;  // LD
316   //    ...
317   //
318   // we cannot (without code duplication) use the load marked "LD" to null check
319   // ptr -- clause (2) above does not apply in this case.  In the above program
320   // the safety of ptr->field can be dependent on some_cond; and, for instance,
321   // ptr could be some non-null invalid reference that never gets loaded from
322   // because some_cond is always true.
323 
324   unsigned PointerReg = MBP.LHS.getReg();
325 
326   HazardDetector HD(*TRI);
327 
328   for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
329        ++MII) {
330     MachineInstr *MI = &*MII;
331     unsigned BaseReg;
332     int64_t Offset;
333     if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
334       if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
335           Offset < PageSize && MI->getDesc().getNumDefs() <= 1 &&
336           HD.isSafeToHoist(MI)) {
337         NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
338                                    NullSucc);
339         return true;
340       }
341 
342     HD.rememberInstruction(MI);
343     if (HD.isClobbered())
344       return false;
345   }
346 
347   return false;
348 }
349 
350 /// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
351 /// instruction.  The FAULTING_LOAD_OP instruction does the same load as LoadMI
352 /// (defining the same register), and branches to HandlerLabel if the load
353 /// faults.  The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
354 MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
355                                                      MachineBasicBlock *MBB,
356                                                      MCSymbol *HandlerLabel) {
357   const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
358                                  // all targets.
359 
360   DebugLoc DL;
361   unsigned NumDefs = LoadMI->getDesc().getNumDefs();
362   assert(NumDefs <= 1 && "other cases unhandled!");
363 
364   unsigned DefReg = NoRegister;
365   if (NumDefs != 0) {
366     DefReg = LoadMI->defs().begin()->getReg();
367     assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
368            "expected exactly one def!");
369   }
370 
371   auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
372                  .addSym(HandlerLabel)
373                  .addImm(LoadMI->getOpcode());
374 
375   for (auto &MO : LoadMI->uses())
376     MIB.addOperand(MO);
377 
378   MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
379 
380   return MIB;
381 }
382 
383 /// Rewrite the null checks in NullCheckList into implicit null checks.
384 void ImplicitNullChecks::rewriteNullChecks(
385     ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) {
386   DebugLoc DL;
387 
388   for (auto &NC : NullCheckList) {
389     MCSymbol *HandlerLabel = MMI->getContext().createTempSymbol();
390 
391     // Remove the conditional branch dependent on the null check.
392     unsigned BranchesRemoved = TII->RemoveBranch(*NC.CheckBlock);
393     (void)BranchesRemoved;
394     assert(BranchesRemoved > 0 && "expected at least one branch!");
395 
396     // Insert a faulting load where the conditional branch was originally.  We
397     // check earlier ensures that this bit of code motion is legal.  We do not
398     // touch the successors list for any basic block since we haven't changed
399     // control flow, we've just made it implicit.
400     insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel);
401     NC.MemOperation->eraseFromParent();
402     NC.CheckOperation->eraseFromParent();
403 
404     // Insert an *unconditional* branch to not-null successor.
405     TII->InsertBranch(*NC.CheckBlock, NC.NotNullSucc, nullptr, /*Cond=*/None,
406                       DL);
407 
408     // Emit the HandlerLabel as an EH_LABEL.
409     BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL,
410             TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel);
411 
412     NumImplicitNullChecks++;
413   }
414 }
415 
416 char ImplicitNullChecks::ID = 0;
417 char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
418 INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
419                       "Implicit null checks", false, false)
420 INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
421                     "Implicit null checks", false, false)
422