1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit.  That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language.   If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 //    state of each input to whether the dynamic output of the instruction
23 //    produces poison.
24 // 3) A flag validation rule which validates any poison producing flags on the
25 //    instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 //    execute undefined behavior given the poison state of it's inputs.
28 //
29 // At the moment, the UB detection is done in a best effort manner; that is,
30 // the resulting code may produce a false negative result (not report UB when
31 // it actually exists according to the LangRef spec), but should never produce
32 // a false positive (report UB where it doesn't exist).  The intention is to
33 // eventually support a "strict" mode which never dynamically reports a false
34 // negative at the cost of rejecting some valid inputs to translation.
35 //
36 // Use cases for this pass include:
37 // - Understanding (and testing!) the implications of the definition of poison
38 //   from the LangRef.
39 // - Validating the output of a IR fuzzer to ensure that all programs produced
40 //   are well defined on the specific input used.
41 // - Finding/confirming poison specific miscompiles by checking the poison
42 //   status of an input/IR pair is the same before and after an optimization
43 //   transform.
44 // - Checking that a bugpoint reduction does not introduce UB which didn't
45 //   exist in the original program being reduced.
46 //
47 // The major sources of inaccuracy are currently:
48 // - Most validation rules not yet implemented for instructions with poison
49 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
50 // - UB which is control dependent on a branch on poison is not yet
51 //   reported. Currently, only data flow dependence is modeled.
52 // - Poison which is propagated through memory is not modeled.  As such,
53 //   storing poison to memory and then reloading it will cause a false negative
54 //   as we consider the reloaded value to not be poisoned.
55 // - Poison propagation across function boundaries is not modeled.  At the
56 //   moment, all arguments and return values are assumed not to be poison.
57 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
58 //   concrete values for undef bits so as to maximize potential for producing
59 //   poison is not modeled.
60 //
61 //===----------------------------------------------------------------------===//
62 
63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Analysis/MemoryBuiltins.h"
67 #include "llvm/Analysis/ValueTracking.h"
68 #include "llvm/IR/InstVisitor.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/PatternMatch.h"
72 #include "llvm/Support/Debug.h"
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "poison-checking"
77 
78 static cl::opt<bool>
79 LocalCheck("poison-checking-function-local",
80            cl::init(false),
81            cl::desc("Check that returns are non-poison (for testing)"));
82 
83 
84 static bool isConstantFalse(Value* V) {
85   assert(V->getType()->isIntegerTy(1));
86   if (auto *CI = dyn_cast<ConstantInt>(V))
87     return CI->isZero();
88   return false;
89 }
90 
91 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
92   if (Ops.size() == 0)
93     return B.getFalse();
94   unsigned i = 0;
95   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
96   if (i == Ops.size())
97     return B.getFalse();
98   Value *Accum = Ops[i++];
99   for (; i < Ops.size(); i++)
100     if (!isConstantFalse(Ops[i]))
101       Accum = B.CreateOr(Accum, Ops[i]);
102   return Accum;
103 }
104 
105 static void generatePoisonChecksForBinOp(Instruction &I,
106                                          SmallVector<Value*, 2> &Checks) {
107   assert(isa<BinaryOperator>(I));
108 
109   IRBuilder<> B(&I);
110   Value *LHS = I.getOperand(0);
111   Value *RHS = I.getOperand(1);
112   switch (I.getOpcode()) {
113   default:
114     return;
115   case Instruction::Add: {
116     if (I.hasNoSignedWrap()) {
117       auto *OverflowOp =
118         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
119       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
120     }
121     if (I.hasNoUnsignedWrap()) {
122       auto *OverflowOp =
123         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
124       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
125     }
126     break;
127   }
128   case Instruction::Sub: {
129     if (I.hasNoSignedWrap()) {
130       auto *OverflowOp =
131         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
132       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
133     }
134     if (I.hasNoUnsignedWrap()) {
135       auto *OverflowOp =
136         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
137       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
138     }
139     break;
140   }
141   case Instruction::Mul: {
142     if (I.hasNoSignedWrap()) {
143       auto *OverflowOp =
144         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
145       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
146     }
147     if (I.hasNoUnsignedWrap()) {
148       auto *OverflowOp =
149         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
150       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
151     }
152     break;
153   }
154   };
155 }
156 
157 static Value* generatePoisonChecks(Instruction &I) {
158   IRBuilder<> B(&I);
159   SmallVector<Value*, 2> Checks;
160   if (isa<BinaryOperator>(I))
161     generatePoisonChecksForBinOp(I, Checks);
162   return buildOrChain(B, Checks);
163 }
164 
165 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
166   auto Itr = ValToPoison.find(V);
167   if (Itr != ValToPoison.end())
168     return Itr->second;
169   if (isa<Constant>(V)) {
170     return ConstantInt::getFalse(V->getContext());
171   }
172   // Return false for unknwon values - this implements a non-strict mode where
173   // unhandled IR constructs are simply considered to never produce poison.  At
174   // some point in the future, we probably want a "strict mode" for testing if
175   // nothing else.
176   return ConstantInt::getFalse(V->getContext());
177 }
178 
179 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
180   assert(Cond->getType()->isIntegerTy(1));
181   if (auto *CI = dyn_cast<ConstantInt>(Cond))
182     if (CI->isAllOnesValue())
183       return;
184 
185   Module *M = B.GetInsertBlock()->getModule();
186   M->getOrInsertFunction("__poison_checker_assert",
187                          Type::getVoidTy(M->getContext()),
188                          Type::getInt1Ty(M->getContext()));
189   Function *TrapFunc = M->getFunction("__poison_checker_assert");
190   B.CreateCall(TrapFunc, Cond);
191 }
192 
193 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
194   assert(Cond->getType()->isIntegerTy(1));
195   CreateAssert(B, B.CreateNot(Cond));
196 }
197 
198 static bool rewrite(Function &F) {
199   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
200 
201   DenseMap<Value *, Value *> ValToPoison;
202 
203   for (BasicBlock &BB : F)
204     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
205       auto *OldPHI = cast<PHINode>(&*I);
206       auto *NewPHI = PHINode::Create(Int1Ty,
207                                      OldPHI->getNumIncomingValues());
208       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
209         NewPHI->addIncoming(UndefValue::get(Int1Ty),
210                             OldPHI->getIncomingBlock(i));
211       NewPHI->insertBefore(OldPHI);
212       ValToPoison[OldPHI] = NewPHI;
213     }
214 
215   for (BasicBlock &BB : F)
216     for (Instruction &I : BB) {
217       if (isa<PHINode>(I)) continue;
218 
219       IRBuilder<> B(cast<Instruction>(&I));
220       if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
221         CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
222 
223       if (LocalCheck)
224         if (auto *RI = dyn_cast<ReturnInst>(&I))
225           if (RI->getNumOperands() != 0) {
226             Value *Op = RI->getOperand(0);
227             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
228           }
229 
230       SmallVector<Value*, 4> Checks;
231       if (propagatesFullPoison(&I))
232         for (Value *V : I.operands())
233           Checks.push_back(getPoisonFor(ValToPoison, V));
234 
235       if (auto *Check = generatePoisonChecks(I))
236         Checks.push_back(Check);
237       ValToPoison[&I] = buildOrChain(B, Checks);
238     }
239 
240   for (BasicBlock &BB : F)
241     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
242       auto *OldPHI = cast<PHINode>(&*I);
243       if (!ValToPoison.count(OldPHI))
244         continue; // skip the newly inserted phis
245       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
246       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
247         auto *OldVal = OldPHI->getIncomingValue(i);
248         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
249       }
250     }
251   return true;
252 }
253 
254 
255 PreservedAnalyses PoisonCheckingPass::run(Module &M,
256                                           ModuleAnalysisManager &AM) {
257   bool Changed = false;
258   for (auto &F : M)
259     Changed |= rewrite(F);
260 
261   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
262 }
263 
264 PreservedAnalyses PoisonCheckingPass::run(Function &F,
265                                           FunctionAnalysisManager &AM) {
266   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
267 }
268 
269 
270 /* Major TODO Items:
271    - Control dependent poison UB
272    - Strict mode - (i.e. must analyze every operand)
273      - Poison through memory
274      - Function ABIs
275 
276    Minor TODO items:
277    - Add propagation rules for and/or instructions
278    - Add hasPoisonFlags predicate to ValueTracking
279    - Add poison check rules for:
280      - exact flags, out of bounds operands
281      - inbounds (can't be strict due to unknown allocation sizes)
282      - fmf and fp casts
283  */
284