1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit.  That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language.   If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 //    state of each input to whether the dynamic output of the instruction
23 //    produces poison.
24 // 3) A flag validation rule which validates any poison producing flags on the
25 //    instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 //    execute undefined behavior given the poison state of it's inputs.
28 //
29 // At the moment, the UB detection is done in a best effort manner; that is,
30 // the resulting code may produce a false negative result (not report UB when
31 // it actually exists according to the LangRef spec), but should never produce
32 // a false positive (report UB where it doesn't exist).  The intention is to
33 // eventually support a "strict" mode which never dynamically reports a false
34 // negative at the cost of rejecting some valid inputs to translation.
35 //
36 // Use cases for this pass include:
37 // - Understanding (and testing!) the implications of the definition of poison
38 //   from the LangRef.
39 // - Validating the output of a IR fuzzer to ensure that all programs produced
40 //   are well defined on the specific input used.
41 // - Finding/confirming poison specific miscompiles by checking the poison
42 //   status of an input/IR pair is the same before and after an optimization
43 //   transform.
44 // - Checking that a bugpoint reduction does not introduce UB which didn't
45 //   exist in the original program being reduced.
46 //
47 // The major sources of inaccuracy are currently:
48 // - Most validation rules not yet implemented for instructions with poison
49 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
50 // - UB which is control dependent on a branch on poison is not yet
51 //   reported. Currently, only data flow dependence is modeled.
52 // - Poison which is propagated through memory is not modeled.  As such,
53 //   storing poison to memory and then reloading it will cause a false negative
54 //   as we consider the reloaded value to not be poisoned.
55 // - Poison propagation across function boundaries is not modeled.  At the
56 //   moment, all arguments and return values are assumed not to be poison.
57 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
58 //   concrete values for undef bits so as to maximize potential for producing
59 //   poison is not modeled.
60 //
61 //===----------------------------------------------------------------------===//
62 
63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Analysis/MemoryBuiltins.h"
67 #include "llvm/Analysis/ValueTracking.h"
68 #include "llvm/IR/IRBuilder.h"
69 #include "llvm/IR/InstVisitor.h"
70 #include "llvm/IR/IntrinsicInst.h"
71 #include "llvm/IR/PatternMatch.h"
72 #include "llvm/Support/CommandLine.h"
73 #include "llvm/Support/Debug.h"
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "poison-checking"
78 
79 static cl::opt<bool>
80 LocalCheck("poison-checking-function-local",
81            cl::init(false),
82            cl::desc("Check that returns are non-poison (for testing)"));
83 
84 
85 static bool isConstantFalse(Value* V) {
86   assert(V->getType()->isIntegerTy(1));
87   if (auto *CI = dyn_cast<ConstantInt>(V))
88     return CI->isZero();
89   return false;
90 }
91 
92 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
93   if (Ops.size() == 0)
94     return B.getFalse();
95   unsigned i = 0;
96   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
97   if (i == Ops.size())
98     return B.getFalse();
99   Value *Accum = Ops[i++];
100   for (; i < Ops.size(); i++)
101     if (!isConstantFalse(Ops[i]))
102       Accum = B.CreateOr(Accum, Ops[i]);
103   return Accum;
104 }
105 
106 static void generateCreationChecksForBinOp(Instruction &I,
107                                            SmallVectorImpl<Value*> &Checks) {
108   assert(isa<BinaryOperator>(I));
109 
110   IRBuilder<> B(&I);
111   Value *LHS = I.getOperand(0);
112   Value *RHS = I.getOperand(1);
113   switch (I.getOpcode()) {
114   default:
115     return;
116   case Instruction::Add: {
117     if (I.hasNoSignedWrap()) {
118       auto *OverflowOp =
119         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
120       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
121     }
122     if (I.hasNoUnsignedWrap()) {
123       auto *OverflowOp =
124         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
125       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
126     }
127     break;
128   }
129   case Instruction::Sub: {
130     if (I.hasNoSignedWrap()) {
131       auto *OverflowOp =
132         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
133       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
134     }
135     if (I.hasNoUnsignedWrap()) {
136       auto *OverflowOp =
137         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
138       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
139     }
140     break;
141   }
142   case Instruction::Mul: {
143     if (I.hasNoSignedWrap()) {
144       auto *OverflowOp =
145         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
146       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
147     }
148     if (I.hasNoUnsignedWrap()) {
149       auto *OverflowOp =
150         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
151       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
152     }
153     break;
154   }
155   case Instruction::UDiv: {
156     if (I.isExact()) {
157       auto *Check =
158         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
159                      ConstantInt::get(LHS->getType(), 0));
160       Checks.push_back(Check);
161     }
162     break;
163   }
164   case Instruction::SDiv: {
165     if (I.isExact()) {
166       auto *Check =
167         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
168                      ConstantInt::get(LHS->getType(), 0));
169       Checks.push_back(Check);
170     }
171     break;
172   }
173   case Instruction::AShr:
174   case Instruction::LShr:
175   case Instruction::Shl: {
176     Value *ShiftCheck =
177       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
178                    ConstantInt::get(RHS->getType(),
179                                     LHS->getType()->getScalarSizeInBits()));
180     Checks.push_back(ShiftCheck);
181     break;
182   }
183   };
184 }
185 
186 /// Given an instruction which can produce poison on non-poison inputs
187 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
188 /// boolean indicators of when poison would result.
189 static void generateCreationChecks(Instruction &I,
190                                    SmallVectorImpl<Value*> &Checks) {
191   IRBuilder<> B(&I);
192   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
193     generateCreationChecksForBinOp(I, Checks);
194 
195   // Handle non-binops seperately
196   switch (I.getOpcode()) {
197   default:
198     // Note there are a couple of missing cases here, once implemented, this
199     // should become an llvm_unreachable.
200     break;
201   case Instruction::ExtractElement: {
202     Value *Vec = I.getOperand(0);
203     auto *VecVTy = cast<VectorType>(Vec->getType());
204     if (VecVTy->isScalable())
205       break;
206     Value *Idx = I.getOperand(1);
207     unsigned NumElts = VecVTy->getNumElements();
208     Value *Check =
209       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
210                    ConstantInt::get(Idx->getType(), NumElts));
211     Checks.push_back(Check);
212     break;
213   }
214   case Instruction::InsertElement: {
215     Value *Vec = I.getOperand(0);
216     auto *VecVTy = cast<VectorType>(Vec->getType());
217     if (VecVTy->isScalable())
218       break;
219     Value *Idx = I.getOperand(2);
220     unsigned NumElts = VecVTy->getNumElements();
221     Value *Check =
222       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
223                    ConstantInt::get(Idx->getType(), NumElts));
224     Checks.push_back(Check);
225     break;
226   }
227   };
228 }
229 
230 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
231   auto Itr = ValToPoison.find(V);
232   if (Itr != ValToPoison.end())
233     return Itr->second;
234   if (isa<Constant>(V)) {
235     return ConstantInt::getFalse(V->getContext());
236   }
237   // Return false for unknwon values - this implements a non-strict mode where
238   // unhandled IR constructs are simply considered to never produce poison.  At
239   // some point in the future, we probably want a "strict mode" for testing if
240   // nothing else.
241   return ConstantInt::getFalse(V->getContext());
242 }
243 
244 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
245   assert(Cond->getType()->isIntegerTy(1));
246   if (auto *CI = dyn_cast<ConstantInt>(Cond))
247     if (CI->isAllOnesValue())
248       return;
249 
250   Module *M = B.GetInsertBlock()->getModule();
251   M->getOrInsertFunction("__poison_checker_assert",
252                          Type::getVoidTy(M->getContext()),
253                          Type::getInt1Ty(M->getContext()));
254   Function *TrapFunc = M->getFunction("__poison_checker_assert");
255   B.CreateCall(TrapFunc, Cond);
256 }
257 
258 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
259   assert(Cond->getType()->isIntegerTy(1));
260   CreateAssert(B, B.CreateNot(Cond));
261 }
262 
263 static bool rewrite(Function &F) {
264   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
265 
266   DenseMap<Value *, Value *> ValToPoison;
267 
268   for (BasicBlock &BB : F)
269     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
270       auto *OldPHI = cast<PHINode>(&*I);
271       auto *NewPHI = PHINode::Create(Int1Ty,
272                                      OldPHI->getNumIncomingValues());
273       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
274         NewPHI->addIncoming(UndefValue::get(Int1Ty),
275                             OldPHI->getIncomingBlock(i));
276       NewPHI->insertBefore(OldPHI);
277       ValToPoison[OldPHI] = NewPHI;
278     }
279 
280   for (BasicBlock &BB : F)
281     for (Instruction &I : BB) {
282       if (isa<PHINode>(I)) continue;
283 
284       IRBuilder<> B(cast<Instruction>(&I));
285 
286       // Note: There are many more sources of documented UB, but this pass only
287       // attempts to find UB triggered by propagation of poison.
288       if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
289         CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
290 
291       if (LocalCheck)
292         if (auto *RI = dyn_cast<ReturnInst>(&I))
293           if (RI->getNumOperands() != 0) {
294             Value *Op = RI->getOperand(0);
295             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
296           }
297 
298       SmallVector<Value*, 4> Checks;
299       if (propagatesFullPoison(&I))
300         for (Value *V : I.operands())
301           Checks.push_back(getPoisonFor(ValToPoison, V));
302 
303       if (canCreatePoison(&I))
304         generateCreationChecks(I, Checks);
305       ValToPoison[&I] = buildOrChain(B, Checks);
306     }
307 
308   for (BasicBlock &BB : F)
309     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
310       auto *OldPHI = cast<PHINode>(&*I);
311       if (!ValToPoison.count(OldPHI))
312         continue; // skip the newly inserted phis
313       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
314       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
315         auto *OldVal = OldPHI->getIncomingValue(i);
316         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
317       }
318     }
319   return true;
320 }
321 
322 
323 PreservedAnalyses PoisonCheckingPass::run(Module &M,
324                                           ModuleAnalysisManager &AM) {
325   bool Changed = false;
326   for (auto &F : M)
327     Changed |= rewrite(F);
328 
329   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
330 }
331 
332 PreservedAnalyses PoisonCheckingPass::run(Function &F,
333                                           FunctionAnalysisManager &AM) {
334   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
335 }
336 
337 
338 /* Major TODO Items:
339    - Control dependent poison UB
340    - Strict mode - (i.e. must analyze every operand)
341      - Poison through memory
342      - Function ABIs
343      - Full coverage of intrinsics, etc.. (ouch)
344 
345    Instructions w/Unclear Semantics:
346    - shufflevector - It would seem reasonable for an out of bounds mask element
347      to produce poison, but the LangRef does not state.
348    - and/or - It would seem reasonable for poison to propagate from both
349      arguments, but LangRef doesn't state and propagatesFullPoison doesn't
350      include these two.
351    - all binary ops w/vector operands - The likely interpretation would be that
352      any element overflowing should produce poison for the entire result, but
353      the LangRef does not state.
354    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
355      strange that only certian flags should be documented as producing poison.
356 
357    Cases of clear poison semantics not yet implemented:
358    - Exact flags on ashr/lshr produce poison
359    - NSW/NUW flags on shl produce poison
360    - Inbounds flag on getelementptr produce poison
361    - fptosi/fptoui (out of bounds input) produce poison
362    - Scalable vector types for insertelement/extractelement
363    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
364  */
365