1 //===- PoisonChecking.cpp - -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements a transform pass which instruments IR such that poison semantics 10 // are made explicit. That is, it provides a (possibly partial) executable 11 // semantics for every instruction w.r.t. poison as specified in the LLVM 12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass 13 // is focused purely on the semantics of LLVM IR, not any particular source 14 // language. If you're looking for something to see if your C/C++ contains 15 // UB, this is not it. 16 // 17 // The rewritten semantics of each instruction will include the following 18 // components: 19 // 20 // 1) The original instruction, unmodified. 21 // 2) A propagation rule which translates dynamic information about the poison 22 // state of each input to whether the dynamic output of the instruction 23 // produces poison. 24 // 3) A flag validation rule which validates any poison producing flags on the 25 // instruction itself (e.g. checks for overflow on nsw). 26 // 4) A check rule which traps (to a handler function) if this instruction must 27 // execute undefined behavior given the poison state of it's inputs. 28 // 29 // At the moment, the UB detection is done in a best effort manner; that is, 30 // the resulting code may produce a false negative result (not report UB when 31 // it actually exists according to the LangRef spec), but should never produce 32 // a false positive (report UB where it doesn't exist). The intention is to 33 // eventually support a "strict" mode which never dynamically reports a false 34 // negative at the cost of rejecting some valid inputs to translation. 35 // 36 // Use cases for this pass include: 37 // - Understanding (and testing!) the implications of the definition of poison 38 // from the LangRef. 39 // - Validating the output of a IR fuzzer to ensure that all programs produced 40 // are well defined on the specific input used. 41 // - Finding/confirming poison specific miscompiles by checking the poison 42 // status of an input/IR pair is the same before and after an optimization 43 // transform. 44 // - Checking that a bugpoint reduction does not introduce UB which didn't 45 // exist in the original program being reduced. 46 // 47 // The major sources of inaccuracy are currently: 48 // - Most validation rules not yet implemented for instructions with poison 49 // relavant flags. At the moment, only nsw/nuw on add/sub are supported. 50 // - UB which is control dependent on a branch on poison is not yet 51 // reported. Currently, only data flow dependence is modeled. 52 // - Poison which is propagated through memory is not modeled. As such, 53 // storing poison to memory and then reloading it will cause a false negative 54 // as we consider the reloaded value to not be poisoned. 55 // - Poison propagation across function boundaries is not modeled. At the 56 // moment, all arguments and return values are assumed not to be poison. 57 // - Undef is not modeled. In particular, the optimizer's freedom to pick 58 // concrete values for undef bits so as to maximize potential for producing 59 // poison is not modeled. 60 // 61 //===----------------------------------------------------------------------===// 62 63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h" 64 #include "llvm/ADT/DenseMap.h" 65 #include "llvm/ADT/Statistic.h" 66 #include "llvm/Analysis/MemoryBuiltins.h" 67 #include "llvm/Analysis/ValueTracking.h" 68 #include "llvm/IR/InstVisitor.h" 69 #include "llvm/IR/IntrinsicInst.h" 70 #include "llvm/IR/IRBuilder.h" 71 #include "llvm/IR/PatternMatch.h" 72 #include "llvm/Support/Debug.h" 73 74 using namespace llvm; 75 76 #define DEBUG_TYPE "poison-checking" 77 78 static cl::opt<bool> 79 LocalCheck("poison-checking-function-local", 80 cl::init(false), 81 cl::desc("Check that returns are non-poison (for testing)")); 82 83 84 static bool isConstantFalse(Value* V) { 85 assert(V->getType()->isIntegerTy(1)); 86 if (auto *CI = dyn_cast<ConstantInt>(V)) 87 return CI->isZero(); 88 return false; 89 } 90 91 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 92 if (Ops.size() == 0) 93 return B.getFalse(); 94 unsigned i = 0; 95 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 96 if (i == Ops.size()) 97 return B.getFalse(); 98 Value *Accum = Ops[i++]; 99 for (; i < Ops.size(); i++) 100 if (!isConstantFalse(Ops[i])) 101 Accum = B.CreateOr(Accum, Ops[i]); 102 return Accum; 103 } 104 105 static void generatePoisonChecksForBinOp(Instruction &I, 106 SmallVector<Value*, 2> &Checks) { 107 assert(isa<BinaryOperator>(I)); 108 109 IRBuilder<> B(&I); 110 Value *LHS = I.getOperand(0); 111 Value *RHS = I.getOperand(1); 112 switch (I.getOpcode()) { 113 default: 114 return; 115 case Instruction::Add: { 116 if (I.hasNoSignedWrap()) { 117 auto *OverflowOp = 118 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 119 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 120 } 121 if (I.hasNoUnsignedWrap()) { 122 auto *OverflowOp = 123 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 124 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 125 } 126 break; 127 } 128 case Instruction::Sub: { 129 if (I.hasNoSignedWrap()) { 130 auto *OverflowOp = 131 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 132 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 133 } 134 if (I.hasNoUnsignedWrap()) { 135 auto *OverflowOp = 136 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 137 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 138 } 139 break; 140 } 141 case Instruction::Mul: { 142 if (I.hasNoSignedWrap()) { 143 auto *OverflowOp = 144 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 145 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 146 } 147 if (I.hasNoUnsignedWrap()) { 148 auto *OverflowOp = 149 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 150 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 151 } 152 break; 153 } 154 }; 155 } 156 157 static Value* generatePoisonChecks(Instruction &I) { 158 IRBuilder<> B(&I); 159 SmallVector<Value*, 2> Checks; 160 if (isa<BinaryOperator>(I)) 161 generatePoisonChecksForBinOp(I, Checks); 162 return buildOrChain(B, Checks); 163 } 164 165 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 166 auto Itr = ValToPoison.find(V); 167 if (Itr != ValToPoison.end()) 168 return Itr->second; 169 if (isa<Constant>(V)) { 170 return ConstantInt::getFalse(V->getContext()); 171 } 172 // Return false for unknwon values - this implements a non-strict mode where 173 // unhandled IR constructs are simply considered to never produce poison. At 174 // some point in the future, we probably want a "strict mode" for testing if 175 // nothing else. 176 return ConstantInt::getFalse(V->getContext()); 177 } 178 179 static void CreateAssert(IRBuilder<> &B, Value *Cond) { 180 assert(Cond->getType()->isIntegerTy(1)); 181 if (auto *CI = dyn_cast<ConstantInt>(Cond)) 182 if (CI->isAllOnesValue()) 183 return; 184 185 Module *M = B.GetInsertBlock()->getModule(); 186 M->getOrInsertFunction("__poison_checker_assert", 187 Type::getVoidTy(M->getContext()), 188 Type::getInt1Ty(M->getContext())); 189 Function *TrapFunc = M->getFunction("__poison_checker_assert"); 190 B.CreateCall(TrapFunc, Cond); 191 } 192 193 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 194 assert(Cond->getType()->isIntegerTy(1)); 195 CreateAssert(B, B.CreateNot(Cond)); 196 } 197 198 static bool rewrite(Function &F) { 199 auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 200 201 DenseMap<Value *, Value *> ValToPoison; 202 203 for (BasicBlock &BB : F) 204 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 205 auto *OldPHI = cast<PHINode>(&*I); 206 auto *NewPHI = PHINode::Create(Int1Ty, 207 OldPHI->getNumIncomingValues()); 208 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 209 NewPHI->addIncoming(UndefValue::get(Int1Ty), 210 OldPHI->getIncomingBlock(i)); 211 NewPHI->insertBefore(OldPHI); 212 ValToPoison[OldPHI] = NewPHI; 213 } 214 215 for (BasicBlock &BB : F) 216 for (Instruction &I : BB) { 217 if (isa<PHINode>(I)) continue; 218 219 IRBuilder<> B(cast<Instruction>(&I)); 220 if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) 221 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 222 223 if (LocalCheck) 224 if (auto *RI = dyn_cast<ReturnInst>(&I)) 225 if (RI->getNumOperands() != 0) { 226 Value *Op = RI->getOperand(0); 227 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 228 } 229 230 SmallVector<Value*, 4> Checks; 231 if (propagatesFullPoison(&I)) 232 for (Value *V : I.operands()) 233 Checks.push_back(getPoisonFor(ValToPoison, V)); 234 235 if (auto *Check = generatePoisonChecks(I)) 236 Checks.push_back(Check); 237 ValToPoison[&I] = buildOrChain(B, Checks); 238 } 239 240 for (BasicBlock &BB : F) 241 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 242 auto *OldPHI = cast<PHINode>(&*I); 243 if (!ValToPoison.count(OldPHI)) 244 continue; // skip the newly inserted phis 245 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 246 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 247 auto *OldVal = OldPHI->getIncomingValue(i); 248 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 249 } 250 } 251 return true; 252 } 253 254 255 PreservedAnalyses PoisonCheckingPass::run(Module &M, 256 ModuleAnalysisManager &AM) { 257 bool Changed = false; 258 for (auto &F : M) 259 Changed |= rewrite(F); 260 261 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 262 } 263 264 PreservedAnalyses PoisonCheckingPass::run(Function &F, 265 FunctionAnalysisManager &AM) { 266 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 267 } 268 269 270 /* Major TODO Items: 271 - Control dependent poison UB 272 - Strict mode - (i.e. must analyze every operand) 273 - Poison through memory 274 - Function ABIs 275 276 Minor TODO items: 277 - Add propagation rules for and/or instructions 278 - Add hasPoisonFlags predicate to ValueTracking 279 - Add poison check rules for: 280 - exact flags, out of bounds operands 281 - inbounds (can't be strict due to unknown allocation sizes) 282 - fmf and fp casts 283 */ 284