1 //===- PoisonChecking.cpp - -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements a transform pass which instruments IR such that poison semantics 10 // are made explicit. That is, it provides a (possibly partial) executable 11 // semantics for every instruction w.r.t. poison as specified in the LLVM 12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass 13 // is focused purely on the semantics of LLVM IR, not any particular source 14 // language. If you're looking for something to see if your C/C++ contains 15 // UB, this is not it. 16 // 17 // The rewritten semantics of each instruction will include the following 18 // components: 19 // 20 // 1) The original instruction, unmodified. 21 // 2) A propagation rule which translates dynamic information about the poison 22 // state of each input to whether the dynamic output of the instruction 23 // produces poison. 24 // 3) A flag validation rule which validates any poison producing flags on the 25 // instruction itself (e.g. checks for overflow on nsw). 26 // 4) A check rule which traps (to a handler function) if this instruction must 27 // execute undefined behavior given the poison state of it's inputs. 28 // 29 // At the moment, the UB detection is done in a best effort manner; that is, 30 // the resulting code may produce a false negative result (not report UB when 31 // it actually exists according to the LangRef spec), but should never produce 32 // a false positive (report UB where it doesn't exist). The intention is to 33 // eventually support a "strict" mode which never dynamically reports a false 34 // negative at the cost of rejecting some valid inputs to translation. 35 // 36 // Use cases for this pass include: 37 // - Understanding (and testing!) the implications of the definition of poison 38 // from the LangRef. 39 // - Validating the output of a IR fuzzer to ensure that all programs produced 40 // are well defined on the specific input used. 41 // - Finding/confirming poison specific miscompiles by checking the poison 42 // status of an input/IR pair is the same before and after an optimization 43 // transform. 44 // - Checking that a bugpoint reduction does not introduce UB which didn't 45 // exist in the original program being reduced. 46 // 47 // The major sources of inaccuracy are currently: 48 // - Most validation rules not yet implemented for instructions with poison 49 // relavant flags. At the moment, only nsw/nuw on add/sub are supported. 50 // - UB which is control dependent on a branch on poison is not yet 51 // reported. Currently, only data flow dependence is modeled. 52 // - Poison which is propagated through memory is not modeled. As such, 53 // storing poison to memory and then reloading it will cause a false negative 54 // as we consider the reloaded value to not be poisoned. 55 // - Poison propagation across function boundaries is not modeled. At the 56 // moment, all arguments and return values are assumed not to be poison. 57 // - Undef is not modeled. In particular, the optimizer's freedom to pick 58 // concrete values for undef bits so as to maximize potential for producing 59 // poison is not modeled. 60 // 61 //===----------------------------------------------------------------------===// 62 63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h" 64 #include "llvm/ADT/DenseMap.h" 65 #include "llvm/ADT/Statistic.h" 66 #include "llvm/Analysis/MemoryBuiltins.h" 67 #include "llvm/Analysis/ValueTracking.h" 68 #include "llvm/IR/IRBuilder.h" 69 #include "llvm/IR/InstVisitor.h" 70 #include "llvm/IR/IntrinsicInst.h" 71 #include "llvm/IR/PatternMatch.h" 72 #include "llvm/Support/CommandLine.h" 73 #include "llvm/Support/Debug.h" 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "poison-checking" 78 79 static cl::opt<bool> 80 LocalCheck("poison-checking-function-local", 81 cl::init(false), 82 cl::desc("Check that returns are non-poison (for testing)")); 83 84 85 static bool isConstantFalse(Value* V) { 86 assert(V->getType()->isIntegerTy(1)); 87 if (auto *CI = dyn_cast<ConstantInt>(V)) 88 return CI->isZero(); 89 return false; 90 } 91 92 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 93 if (Ops.size() == 0) 94 return B.getFalse(); 95 unsigned i = 0; 96 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 97 if (i == Ops.size()) 98 return B.getFalse(); 99 Value *Accum = Ops[i++]; 100 for (; i < Ops.size(); i++) 101 if (!isConstantFalse(Ops[i])) 102 Accum = B.CreateOr(Accum, Ops[i]); 103 return Accum; 104 } 105 106 static void generateCreationChecksForBinOp(Instruction &I, 107 SmallVectorImpl<Value*> &Checks) { 108 assert(isa<BinaryOperator>(I)); 109 110 IRBuilder<> B(&I); 111 Value *LHS = I.getOperand(0); 112 Value *RHS = I.getOperand(1); 113 switch (I.getOpcode()) { 114 default: 115 return; 116 case Instruction::Add: { 117 if (I.hasNoSignedWrap()) { 118 auto *OverflowOp = 119 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 120 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 121 } 122 if (I.hasNoUnsignedWrap()) { 123 auto *OverflowOp = 124 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 125 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 126 } 127 break; 128 } 129 case Instruction::Sub: { 130 if (I.hasNoSignedWrap()) { 131 auto *OverflowOp = 132 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 133 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 134 } 135 if (I.hasNoUnsignedWrap()) { 136 auto *OverflowOp = 137 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 138 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 139 } 140 break; 141 } 142 case Instruction::Mul: { 143 if (I.hasNoSignedWrap()) { 144 auto *OverflowOp = 145 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 146 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 147 } 148 if (I.hasNoUnsignedWrap()) { 149 auto *OverflowOp = 150 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 151 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 152 } 153 break; 154 } 155 case Instruction::UDiv: { 156 if (I.isExact()) { 157 auto *Check = 158 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), 159 ConstantInt::get(LHS->getType(), 0)); 160 Checks.push_back(Check); 161 } 162 break; 163 } 164 case Instruction::SDiv: { 165 if (I.isExact()) { 166 auto *Check = 167 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), 168 ConstantInt::get(LHS->getType(), 0)); 169 Checks.push_back(Check); 170 } 171 break; 172 } 173 case Instruction::AShr: 174 case Instruction::LShr: 175 case Instruction::Shl: { 176 Value *ShiftCheck = 177 B.CreateICmp(ICmpInst::ICMP_UGE, RHS, 178 ConstantInt::get(RHS->getType(), 179 LHS->getType()->getScalarSizeInBits())); 180 Checks.push_back(ShiftCheck); 181 break; 182 } 183 }; 184 } 185 186 /// Given an instruction which can produce poison on non-poison inputs 187 /// (i.e. canCreatePoison returns true), generate runtime checks to produce 188 /// boolean indicators of when poison would result. 189 static void generateCreationChecks(Instruction &I, 190 SmallVectorImpl<Value*> &Checks) { 191 IRBuilder<> B(&I); 192 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) 193 generateCreationChecksForBinOp(I, Checks); 194 195 // Handle non-binops seperately 196 switch (I.getOpcode()) { 197 default: 198 // Note there are a couple of missing cases here, once implemented, this 199 // should become an llvm_unreachable. 200 break; 201 case Instruction::ExtractElement: { 202 Value *Vec = I.getOperand(0); 203 auto *VecVTy = cast<VectorType>(Vec->getType()); 204 if (VecVTy->isScalable()) 205 break; 206 Value *Idx = I.getOperand(1); 207 unsigned NumElts = VecVTy->getNumElements(); 208 Value *Check = 209 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 210 ConstantInt::get(Idx->getType(), NumElts)); 211 Checks.push_back(Check); 212 break; 213 } 214 case Instruction::InsertElement: { 215 Value *Vec = I.getOperand(0); 216 auto *VecVTy = cast<VectorType>(Vec->getType()); 217 if (VecVTy->isScalable()) 218 break; 219 Value *Idx = I.getOperand(2); 220 unsigned NumElts = VecVTy->getNumElements(); 221 Value *Check = 222 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 223 ConstantInt::get(Idx->getType(), NumElts)); 224 Checks.push_back(Check); 225 break; 226 } 227 }; 228 } 229 230 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 231 auto Itr = ValToPoison.find(V); 232 if (Itr != ValToPoison.end()) 233 return Itr->second; 234 if (isa<Constant>(V)) { 235 return ConstantInt::getFalse(V->getContext()); 236 } 237 // Return false for unknwon values - this implements a non-strict mode where 238 // unhandled IR constructs are simply considered to never produce poison. At 239 // some point in the future, we probably want a "strict mode" for testing if 240 // nothing else. 241 return ConstantInt::getFalse(V->getContext()); 242 } 243 244 static void CreateAssert(IRBuilder<> &B, Value *Cond) { 245 assert(Cond->getType()->isIntegerTy(1)); 246 if (auto *CI = dyn_cast<ConstantInt>(Cond)) 247 if (CI->isAllOnesValue()) 248 return; 249 250 Module *M = B.GetInsertBlock()->getModule(); 251 M->getOrInsertFunction("__poison_checker_assert", 252 Type::getVoidTy(M->getContext()), 253 Type::getInt1Ty(M->getContext())); 254 Function *TrapFunc = M->getFunction("__poison_checker_assert"); 255 B.CreateCall(TrapFunc, Cond); 256 } 257 258 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 259 assert(Cond->getType()->isIntegerTy(1)); 260 CreateAssert(B, B.CreateNot(Cond)); 261 } 262 263 static bool rewrite(Function &F) { 264 auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 265 266 DenseMap<Value *, Value *> ValToPoison; 267 268 for (BasicBlock &BB : F) 269 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 270 auto *OldPHI = cast<PHINode>(&*I); 271 auto *NewPHI = PHINode::Create(Int1Ty, 272 OldPHI->getNumIncomingValues()); 273 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 274 NewPHI->addIncoming(UndefValue::get(Int1Ty), 275 OldPHI->getIncomingBlock(i)); 276 NewPHI->insertBefore(OldPHI); 277 ValToPoison[OldPHI] = NewPHI; 278 } 279 280 for (BasicBlock &BB : F) 281 for (Instruction &I : BB) { 282 if (isa<PHINode>(I)) continue; 283 284 IRBuilder<> B(cast<Instruction>(&I)); 285 286 // Note: There are many more sources of documented UB, but this pass only 287 // attempts to find UB triggered by propagation of poison. 288 if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) 289 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 290 291 if (LocalCheck) 292 if (auto *RI = dyn_cast<ReturnInst>(&I)) 293 if (RI->getNumOperands() != 0) { 294 Value *Op = RI->getOperand(0); 295 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 296 } 297 298 SmallVector<Value*, 4> Checks; 299 if (propagatesFullPoison(&I)) 300 for (Value *V : I.operands()) 301 Checks.push_back(getPoisonFor(ValToPoison, V)); 302 303 if (canCreatePoison(&I)) 304 generateCreationChecks(I, Checks); 305 ValToPoison[&I] = buildOrChain(B, Checks); 306 } 307 308 for (BasicBlock &BB : F) 309 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 310 auto *OldPHI = cast<PHINode>(&*I); 311 if (!ValToPoison.count(OldPHI)) 312 continue; // skip the newly inserted phis 313 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 314 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 315 auto *OldVal = OldPHI->getIncomingValue(i); 316 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 317 } 318 } 319 return true; 320 } 321 322 323 PreservedAnalyses PoisonCheckingPass::run(Module &M, 324 ModuleAnalysisManager &AM) { 325 bool Changed = false; 326 for (auto &F : M) 327 Changed |= rewrite(F); 328 329 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 330 } 331 332 PreservedAnalyses PoisonCheckingPass::run(Function &F, 333 FunctionAnalysisManager &AM) { 334 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 335 } 336 337 338 /* Major TODO Items: 339 - Control dependent poison UB 340 - Strict mode - (i.e. must analyze every operand) 341 - Poison through memory 342 - Function ABIs 343 - Full coverage of intrinsics, etc.. (ouch) 344 345 Instructions w/Unclear Semantics: 346 - shufflevector - It would seem reasonable for an out of bounds mask element 347 to produce poison, but the LangRef does not state. 348 - and/or - It would seem reasonable for poison to propagate from both 349 arguments, but LangRef doesn't state and propagatesFullPoison doesn't 350 include these two. 351 - all binary ops w/vector operands - The likely interpretation would be that 352 any element overflowing should produce poison for the entire result, but 353 the LangRef does not state. 354 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems 355 strange that only certian flags should be documented as producing poison. 356 357 Cases of clear poison semantics not yet implemented: 358 - Exact flags on ashr/lshr produce poison 359 - NSW/NUW flags on shl produce poison 360 - Inbounds flag on getelementptr produce poison 361 - fptosi/fptoui (out of bounds input) produce poison 362 - Scalable vector types for insertelement/extractelement 363 - Floating point binary ops w/fmf nnan/noinfs flags produce poison 364 */ 365