1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass statically checks for common and easily-identified constructs 11 // which produce undefined or likely unintended behavior in LLVM IR. 12 // 13 // It is not a guarantee of correctness, in two ways. First, it isn't 14 // comprehensive. There are checks which could be done statically which are 15 // not yet implemented. Some of these are indicated by TODO comments, but 16 // those aren't comprehensive either. Second, many conditions cannot be 17 // checked statically. This pass does no dynamic instrumentation, so it 18 // can't check for all possible problems. 19 // 20 // Another limitation is that it assumes all code will be executed. A store 21 // through a null pointer in a basic block which is never reached is harmless, 22 // but this pass will warn about it anyway. This is the main reason why most 23 // of these checks live here instead of in the Verifier pass. 24 // 25 // Optimization passes may make conditions that this pass checks for more or 26 // less obvious. If an optimization pass appears to be introducing a warning, 27 // it may be that the optimization pass is merely exposing an existing 28 // condition in the code. 29 // 30 // This code may be run before instcombine. In many cases, instcombine checks 31 // for the same kinds of things and turns instructions with undefined behavior 32 // into unreachable (or equivalent). Because of this, this pass makes some 33 // effort to look through bitcasts and so on. 34 // 35 //===----------------------------------------------------------------------===// 36 37 #include "llvm/Analysis/Lint.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallSet.h" 40 #include "llvm/Analysis/AliasAnalysis.h" 41 #include "llvm/Analysis/AssumptionCache.h" 42 #include "llvm/Analysis/ConstantFolding.h" 43 #include "llvm/Analysis/InstructionSimplify.h" 44 #include "llvm/Analysis/Loads.h" 45 #include "llvm/Analysis/Passes.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/CallSite.h" 49 #include "llvm/IR/DataLayout.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/InstVisitor.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/LegacyPassManager.h" 55 #include "llvm/Pass.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 using namespace llvm; 59 60 namespace { 61 namespace MemRef { 62 static const unsigned Read = 1; 63 static const unsigned Write = 2; 64 static const unsigned Callee = 4; 65 static const unsigned Branchee = 8; 66 } 67 68 class Lint : public FunctionPass, public InstVisitor<Lint> { 69 friend class InstVisitor<Lint>; 70 71 void visitFunction(Function &F); 72 73 void visitCallSite(CallSite CS); 74 void visitMemoryReference(Instruction &I, Value *Ptr, 75 uint64_t Size, unsigned Align, 76 Type *Ty, unsigned Flags); 77 void visitEHBeginCatch(IntrinsicInst *II); 78 void visitEHEndCatch(IntrinsicInst *II); 79 80 void visitCallInst(CallInst &I); 81 void visitInvokeInst(InvokeInst &I); 82 void visitReturnInst(ReturnInst &I); 83 void visitLoadInst(LoadInst &I); 84 void visitStoreInst(StoreInst &I); 85 void visitXor(BinaryOperator &I); 86 void visitSub(BinaryOperator &I); 87 void visitLShr(BinaryOperator &I); 88 void visitAShr(BinaryOperator &I); 89 void visitShl(BinaryOperator &I); 90 void visitSDiv(BinaryOperator &I); 91 void visitUDiv(BinaryOperator &I); 92 void visitSRem(BinaryOperator &I); 93 void visitURem(BinaryOperator &I); 94 void visitAllocaInst(AllocaInst &I); 95 void visitVAArgInst(VAArgInst &I); 96 void visitIndirectBrInst(IndirectBrInst &I); 97 void visitExtractElementInst(ExtractElementInst &I); 98 void visitInsertElementInst(InsertElementInst &I); 99 void visitUnreachableInst(UnreachableInst &I); 100 101 Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; 102 Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, 103 SmallPtrSetImpl<Value *> &Visited) const; 104 105 public: 106 Module *Mod; 107 AliasAnalysis *AA; 108 AssumptionCache *AC; 109 DominatorTree *DT; 110 TargetLibraryInfo *TLI; 111 112 std::string Messages; 113 raw_string_ostream MessagesStr; 114 115 static char ID; // Pass identification, replacement for typeid 116 Lint() : FunctionPass(ID), MessagesStr(Messages) { 117 initializeLintPass(*PassRegistry::getPassRegistry()); 118 } 119 120 bool runOnFunction(Function &F) override; 121 122 void getAnalysisUsage(AnalysisUsage &AU) const override { 123 AU.setPreservesAll(); 124 AU.addRequired<AliasAnalysis>(); 125 AU.addRequired<AssumptionCacheTracker>(); 126 AU.addRequired<TargetLibraryInfoWrapperPass>(); 127 AU.addRequired<DominatorTreeWrapperPass>(); 128 } 129 void print(raw_ostream &O, const Module *M) const override {} 130 131 void WriteValues(ArrayRef<const Value *> Vs) { 132 for (const Value *V : Vs) { 133 if (!V) 134 continue; 135 if (isa<Instruction>(V)) { 136 MessagesStr << *V << '\n'; 137 } else { 138 V->printAsOperand(MessagesStr, true, Mod); 139 MessagesStr << '\n'; 140 } 141 } 142 } 143 144 /// \brief A check failed, so printout out the condition and the message. 145 /// 146 /// This provides a nice place to put a breakpoint if you want to see why 147 /// something is not correct. 148 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 149 150 /// \brief A check failed (with values to print). 151 /// 152 /// This calls the Message-only version so that the above is easier to set 153 /// a breakpoint on. 154 template <typename T1, typename... Ts> 155 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { 156 CheckFailed(Message); 157 WriteValues({V1, Vs...}); 158 } 159 }; 160 } 161 162 char Lint::ID = 0; 163 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", 164 false, true) 165 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 166 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 167 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 168 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 169 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", 170 false, true) 171 172 // Assert - We know that cond should be true, if not print an error message. 173 #define Assert(C, ...) \ 174 do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) 175 176 // Lint::run - This is the main Analysis entry point for a 177 // function. 178 // 179 bool Lint::runOnFunction(Function &F) { 180 Mod = F.getParent(); 181 AA = &getAnalysis<AliasAnalysis>(); 182 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 183 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 184 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 185 visit(F); 186 dbgs() << MessagesStr.str(); 187 Messages.clear(); 188 return false; 189 } 190 191 void Lint::visitFunction(Function &F) { 192 // This isn't undefined behavior, it's just a little unusual, and it's a 193 // fairly common mistake to neglect to name a function. 194 Assert(F.hasName() || F.hasLocalLinkage(), 195 "Unusual: Unnamed function with non-local linkage", &F); 196 197 // TODO: Check for irreducible control flow. 198 } 199 200 void Lint::visitCallSite(CallSite CS) { 201 Instruction &I = *CS.getInstruction(); 202 Value *Callee = CS.getCalledValue(); 203 const DataLayout &DL = CS->getModule()->getDataLayout(); 204 205 visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, 206 MemRef::Callee); 207 208 if (Function *F = dyn_cast<Function>(findValue(Callee, DL, 209 /*OffsetOk=*/false))) { 210 Assert(CS.getCallingConv() == F->getCallingConv(), 211 "Undefined behavior: Caller and callee calling convention differ", 212 &I); 213 214 FunctionType *FT = F->getFunctionType(); 215 unsigned NumActualArgs = CS.arg_size(); 216 217 Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 218 : FT->getNumParams() == NumActualArgs, 219 "Undefined behavior: Call argument count mismatches callee " 220 "argument count", 221 &I); 222 223 Assert(FT->getReturnType() == I.getType(), 224 "Undefined behavior: Call return type mismatches " 225 "callee return type", 226 &I); 227 228 // Check argument types (in case the callee was casted) and attributes. 229 // TODO: Verify that caller and callee attributes are compatible. 230 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 231 CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 232 for (; AI != AE; ++AI) { 233 Value *Actual = *AI; 234 if (PI != PE) { 235 Argument *Formal = PI++; 236 Assert(Formal->getType() == Actual->getType(), 237 "Undefined behavior: Call argument type mismatches " 238 "callee parameter type", 239 &I); 240 241 // Check that noalias arguments don't alias other arguments. This is 242 // not fully precise because we don't know the sizes of the dereferenced 243 // memory regions. 244 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) 245 for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) 246 if (AI != BI && (*BI)->getType()->isPointerTy()) { 247 AliasResult Result = AA->alias(*AI, *BI); 248 Assert(Result != MustAlias && Result != PartialAlias, 249 "Unusual: noalias argument aliases another argument", &I); 250 } 251 252 // Check that an sret argument points to valid memory. 253 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 254 Type *Ty = 255 cast<PointerType>(Formal->getType())->getElementType(); 256 visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), 257 DL.getABITypeAlignment(Ty), Ty, 258 MemRef::Read | MemRef::Write); 259 } 260 } 261 } 262 } 263 264 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) 265 for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 266 AI != AE; ++AI) { 267 Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); 268 Assert(!isa<AllocaInst>(Obj), 269 "Undefined behavior: Call with \"tail\" keyword references " 270 "alloca", 271 &I); 272 } 273 274 275 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 276 switch (II->getIntrinsicID()) { 277 default: break; 278 279 // TODO: Check more intrinsics 280 281 case Intrinsic::memcpy: { 282 MemCpyInst *MCI = cast<MemCpyInst>(&I); 283 // TODO: If the size is known, use it. 284 visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, 285 MCI->getAlignment(), nullptr, MemRef::Write); 286 visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, 287 MCI->getAlignment(), nullptr, MemRef::Read); 288 289 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 290 // isn't expressive enough for what we really want to do. Known partial 291 // overlap is not distinguished from the case where nothing is known. 292 uint64_t Size = 0; 293 if (const ConstantInt *Len = 294 dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, 295 /*OffsetOk=*/false))) 296 if (Len->getValue().isIntN(32)) 297 Size = Len->getValue().getZExtValue(); 298 Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 299 MustAlias, 300 "Undefined behavior: memcpy source and destination overlap", &I); 301 break; 302 } 303 case Intrinsic::memmove: { 304 MemMoveInst *MMI = cast<MemMoveInst>(&I); 305 // TODO: If the size is known, use it. 306 visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, 307 MMI->getAlignment(), nullptr, MemRef::Write); 308 visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, 309 MMI->getAlignment(), nullptr, MemRef::Read); 310 break; 311 } 312 case Intrinsic::memset: { 313 MemSetInst *MSI = cast<MemSetInst>(&I); 314 // TODO: If the size is known, use it. 315 visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, 316 MSI->getAlignment(), nullptr, MemRef::Write); 317 break; 318 } 319 320 case Intrinsic::vastart: 321 Assert(I.getParent()->getParent()->isVarArg(), 322 "Undefined behavior: va_start called in a non-varargs function", 323 &I); 324 325 visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, 326 nullptr, MemRef::Read | MemRef::Write); 327 break; 328 case Intrinsic::vacopy: 329 visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, 330 nullptr, MemRef::Write); 331 visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, 332 nullptr, MemRef::Read); 333 break; 334 case Intrinsic::vaend: 335 visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, 336 nullptr, MemRef::Read | MemRef::Write); 337 break; 338 339 case Intrinsic::stackrestore: 340 // Stackrestore doesn't read or write memory, but it sets the 341 // stack pointer, which the compiler may read from or write to 342 // at any time, so check it for both readability and writeability. 343 visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, 344 nullptr, MemRef::Read | MemRef::Write); 345 break; 346 347 case Intrinsic::eh_begincatch: 348 visitEHBeginCatch(II); 349 break; 350 case Intrinsic::eh_endcatch: 351 visitEHEndCatch(II); 352 break; 353 } 354 } 355 356 void Lint::visitCallInst(CallInst &I) { 357 return visitCallSite(&I); 358 } 359 360 void Lint::visitInvokeInst(InvokeInst &I) { 361 return visitCallSite(&I); 362 } 363 364 void Lint::visitReturnInst(ReturnInst &I) { 365 Function *F = I.getParent()->getParent(); 366 Assert(!F->doesNotReturn(), 367 "Unusual: Return statement in function with noreturn attribute", &I); 368 369 if (Value *V = I.getReturnValue()) { 370 Value *Obj = 371 findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); 372 Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 373 } 374 } 375 376 // TODO: Check that the reference is in bounds. 377 // TODO: Check readnone/readonly function attributes. 378 void Lint::visitMemoryReference(Instruction &I, 379 Value *Ptr, uint64_t Size, unsigned Align, 380 Type *Ty, unsigned Flags) { 381 // If no memory is being referenced, it doesn't matter if the pointer 382 // is valid. 383 if (Size == 0) 384 return; 385 386 Value *UnderlyingObject = 387 findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); 388 Assert(!isa<ConstantPointerNull>(UnderlyingObject), 389 "Undefined behavior: Null pointer dereference", &I); 390 Assert(!isa<UndefValue>(UnderlyingObject), 391 "Undefined behavior: Undef pointer dereference", &I); 392 Assert(!isa<ConstantInt>(UnderlyingObject) || 393 !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), 394 "Unusual: All-ones pointer dereference", &I); 395 Assert(!isa<ConstantInt>(UnderlyingObject) || 396 !cast<ConstantInt>(UnderlyingObject)->isOne(), 397 "Unusual: Address one pointer dereference", &I); 398 399 if (Flags & MemRef::Write) { 400 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 401 Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 402 &I); 403 Assert(!isa<Function>(UnderlyingObject) && 404 !isa<BlockAddress>(UnderlyingObject), 405 "Undefined behavior: Write to text section", &I); 406 } 407 if (Flags & MemRef::Read) { 408 Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 409 &I); 410 Assert(!isa<BlockAddress>(UnderlyingObject), 411 "Undefined behavior: Load from block address", &I); 412 } 413 if (Flags & MemRef::Callee) { 414 Assert(!isa<BlockAddress>(UnderlyingObject), 415 "Undefined behavior: Call to block address", &I); 416 } 417 if (Flags & MemRef::Branchee) { 418 Assert(!isa<Constant>(UnderlyingObject) || 419 isa<BlockAddress>(UnderlyingObject), 420 "Undefined behavior: Branch to non-blockaddress", &I); 421 } 422 423 // Check for buffer overflows and misalignment. 424 // Only handles memory references that read/write something simple like an 425 // alloca instruction or a global variable. 426 auto &DL = I.getModule()->getDataLayout(); 427 int64_t Offset = 0; 428 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { 429 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 430 // something we can handle and if so extract the size of this base object 431 // along with its alignment. 432 uint64_t BaseSize = MemoryLocation::UnknownSize; 433 unsigned BaseAlign = 0; 434 435 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 436 Type *ATy = AI->getAllocatedType(); 437 if (!AI->isArrayAllocation() && ATy->isSized()) 438 BaseSize = DL.getTypeAllocSize(ATy); 439 BaseAlign = AI->getAlignment(); 440 if (BaseAlign == 0 && ATy->isSized()) 441 BaseAlign = DL.getABITypeAlignment(ATy); 442 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 443 // If the global may be defined differently in another compilation unit 444 // then don't warn about funky memory accesses. 445 if (GV->hasDefinitiveInitializer()) { 446 Type *GTy = GV->getType()->getElementType(); 447 if (GTy->isSized()) 448 BaseSize = DL.getTypeAllocSize(GTy); 449 BaseAlign = GV->getAlignment(); 450 if (BaseAlign == 0 && GTy->isSized()) 451 BaseAlign = DL.getABITypeAlignment(GTy); 452 } 453 } 454 455 // Accesses from before the start or after the end of the object are not 456 // defined. 457 Assert(Size == MemoryLocation::UnknownSize || 458 BaseSize == MemoryLocation::UnknownSize || 459 (Offset >= 0 && Offset + Size <= BaseSize), 460 "Undefined behavior: Buffer overflow", &I); 461 462 // Accesses that say that the memory is more aligned than it is are not 463 // defined. 464 if (Align == 0 && Ty && Ty->isSized()) 465 Align = DL.getABITypeAlignment(Ty); 466 Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), 467 "Undefined behavior: Memory reference address is misaligned", &I); 468 } 469 } 470 471 void Lint::visitLoadInst(LoadInst &I) { 472 visitMemoryReference(I, I.getPointerOperand(), 473 AA->getTypeStoreSize(I.getType()), I.getAlignment(), 474 I.getType(), MemRef::Read); 475 } 476 477 void Lint::visitStoreInst(StoreInst &I) { 478 visitMemoryReference(I, I.getPointerOperand(), 479 AA->getTypeStoreSize(I.getOperand(0)->getType()), 480 I.getAlignment(), 481 I.getOperand(0)->getType(), MemRef::Write); 482 } 483 484 void Lint::visitXor(BinaryOperator &I) { 485 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 486 "Undefined result: xor(undef, undef)", &I); 487 } 488 489 void Lint::visitSub(BinaryOperator &I) { 490 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 491 "Undefined result: sub(undef, undef)", &I); 492 } 493 494 void Lint::visitLShr(BinaryOperator &I) { 495 if (ConstantInt *CI = dyn_cast<ConstantInt>( 496 findValue(I.getOperand(1), I.getModule()->getDataLayout(), 497 /*OffsetOk=*/false))) 498 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 499 "Undefined result: Shift count out of range", &I); 500 } 501 502 void Lint::visitAShr(BinaryOperator &I) { 503 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( 504 I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) 505 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 506 "Undefined result: Shift count out of range", &I); 507 } 508 509 void Lint::visitShl(BinaryOperator &I) { 510 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( 511 I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) 512 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 513 "Undefined result: Shift count out of range", &I); 514 } 515 516 static bool 517 allPredsCameFromLandingPad(BasicBlock *BB, 518 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 519 VisitedBlocks.insert(BB); 520 if (BB->isLandingPad()) 521 return true; 522 // If we find a block with no predecessors, the search failed. 523 if (pred_empty(BB)) 524 return false; 525 for (BasicBlock *Pred : predecessors(BB)) { 526 if (VisitedBlocks.count(Pred)) 527 continue; 528 if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) 529 return false; 530 } 531 return true; 532 } 533 534 static bool 535 allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, 536 IntrinsicInst **SecondBeginCatch, 537 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 538 VisitedBlocks.insert(BB); 539 for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { 540 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); 541 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) 542 return true; 543 // If we find another begincatch while looking for an endcatch, 544 // that's also an error. 545 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { 546 *SecondBeginCatch = IC; 547 return false; 548 } 549 } 550 551 // If we reach a block with no successors while searching, the 552 // search has failed. 553 if (succ_empty(BB)) 554 return false; 555 // Otherwise, search all of the successors. 556 for (BasicBlock *Succ : successors(BB)) { 557 if (VisitedBlocks.count(Succ)) 558 continue; 559 if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, 560 VisitedBlocks)) 561 return false; 562 } 563 return true; 564 } 565 566 void Lint::visitEHBeginCatch(IntrinsicInst *II) { 567 // The checks in this function make a potentially dubious assumption about 568 // the CFG, namely that any block involved in a catch is only used for the 569 // catch. This will very likely be true of IR generated by a front end, 570 // but it may cease to be true, for example, if the IR is run through a 571 // pass which combines similar blocks. 572 // 573 // In general, if we encounter a block the isn't dominated by the catch 574 // block while we are searching the catch block's successors for a call 575 // to end catch intrinsic, then it is possible that it will be legal for 576 // a path through this block to never reach a call to llvm.eh.endcatch. 577 // An analogous statement could be made about our search for a landing 578 // pad among the catch block's predecessors. 579 // 580 // What is actually required is that no path is possible at runtime that 581 // reaches a call to llvm.eh.begincatch without having previously visited 582 // a landingpad instruction and that no path is possible at runtime that 583 // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch 584 // (mentally adjusting for the fact that in reality these calls will be 585 // removed before code generation). 586 // 587 // Because this is a lint check, we take a pessimistic approach and warn if 588 // the control flow is potentially incorrect. 589 590 SmallSet<BasicBlock *, 4> VisitedBlocks; 591 BasicBlock *CatchBB = II->getParent(); 592 593 // The begin catch must occur in a landing pad block or all paths 594 // to it must have come from a landing pad. 595 Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), 596 "llvm.eh.begincatch may be reachable without passing a landingpad", 597 II); 598 599 // Reset the visited block list. 600 VisitedBlocks.clear(); 601 602 IntrinsicInst *SecondBeginCatch = nullptr; 603 604 // This has to be called before it is asserted. Otherwise, the first assert 605 // below can never be hit. 606 bool EndCatchFound = allSuccessorsReachEndCatch( 607 CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), 608 &SecondBeginCatch, VisitedBlocks); 609 Assert( 610 SecondBeginCatch == nullptr, 611 "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", 612 II, SecondBeginCatch); 613 Assert(EndCatchFound, 614 "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", 615 II); 616 } 617 618 static bool allPredCameFromBeginCatch( 619 BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, 620 IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { 621 VisitedBlocks.insert(BB); 622 // Look for a begincatch in this block. 623 for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; 624 ++RI) { 625 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); 626 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) 627 return true; 628 // If we find another end catch before we find a begin catch, that's 629 // an error. 630 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { 631 *SecondEndCatch = IC; 632 return false; 633 } 634 // If we encounter a landingpad instruction, the search failed. 635 if (isa<LandingPadInst>(*RI)) 636 return false; 637 } 638 // If while searching we find a block with no predeccesors, 639 // the search failed. 640 if (pred_empty(BB)) 641 return false; 642 // Search any predecessors we haven't seen before. 643 for (BasicBlock *Pred : predecessors(BB)) { 644 if (VisitedBlocks.count(Pred)) 645 continue; 646 if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, 647 VisitedBlocks)) 648 return false; 649 } 650 return true; 651 } 652 653 void Lint::visitEHEndCatch(IntrinsicInst *II) { 654 // The check in this function makes a potentially dubious assumption about 655 // the CFG, namely that any block involved in a catch is only used for the 656 // catch. This will very likely be true of IR generated by a front end, 657 // but it may cease to be true, for example, if the IR is run through a 658 // pass which combines similar blocks. 659 // 660 // In general, if we encounter a block the isn't post-dominated by the 661 // end catch block while we are searching the end catch block's predecessors 662 // for a call to the begin catch intrinsic, then it is possible that it will 663 // be legal for a path to reach the end catch block without ever having 664 // called llvm.eh.begincatch. 665 // 666 // What is actually required is that no path is possible at runtime that 667 // reaches a call to llvm.eh.endcatch without having previously visited 668 // a call to llvm.eh.begincatch (mentally adjusting for the fact that in 669 // reality these calls will be removed before code generation). 670 // 671 // Because this is a lint check, we take a pessimistic approach and warn if 672 // the control flow is potentially incorrect. 673 674 BasicBlock *EndCatchBB = II->getParent(); 675 676 // Alls paths to the end catch call must pass through a begin catch call. 677 678 // If llvm.eh.begincatch wasn't called in the current block, we'll use this 679 // lambda to recursively look for it in predecessors. 680 SmallSet<BasicBlock *, 4> VisitedBlocks; 681 IntrinsicInst *SecondEndCatch = nullptr; 682 683 // This has to be called before it is asserted. Otherwise, the first assert 684 // below can never be hit. 685 bool BeginCatchFound = 686 allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), 687 &SecondEndCatch, VisitedBlocks); 688 Assert( 689 SecondEndCatch == nullptr, 690 "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", 691 II, SecondEndCatch); 692 Assert(BeginCatchFound, 693 "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", 694 II); 695 } 696 697 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 698 AssumptionCache *AC) { 699 // Assume undef could be zero. 700 if (isa<UndefValue>(V)) 701 return true; 702 703 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 704 if (!VecTy) { 705 unsigned BitWidth = V->getType()->getIntegerBitWidth(); 706 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 707 computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, 708 dyn_cast<Instruction>(V), DT); 709 return KnownZero.isAllOnesValue(); 710 } 711 712 // Per-component check doesn't work with zeroinitializer 713 Constant *C = dyn_cast<Constant>(V); 714 if (!C) 715 return false; 716 717 if (C->isZeroValue()) 718 return true; 719 720 // For a vector, KnownZero will only be true if all values are zero, so check 721 // this per component 722 unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); 723 for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { 724 Constant *Elem = C->getAggregateElement(I); 725 if (isa<UndefValue>(Elem)) 726 return true; 727 728 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 729 computeKnownBits(Elem, KnownZero, KnownOne, DL); 730 if (KnownZero.isAllOnesValue()) 731 return true; 732 } 733 734 return false; 735 } 736 737 void Lint::visitSDiv(BinaryOperator &I) { 738 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 739 "Undefined behavior: Division by zero", &I); 740 } 741 742 void Lint::visitUDiv(BinaryOperator &I) { 743 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 744 "Undefined behavior: Division by zero", &I); 745 } 746 747 void Lint::visitSRem(BinaryOperator &I) { 748 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 749 "Undefined behavior: Division by zero", &I); 750 } 751 752 void Lint::visitURem(BinaryOperator &I) { 753 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 754 "Undefined behavior: Division by zero", &I); 755 } 756 757 void Lint::visitAllocaInst(AllocaInst &I) { 758 if (isa<ConstantInt>(I.getArraySize())) 759 // This isn't undefined behavior, it's just an obvious pessimization. 760 Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 761 "Pessimization: Static alloca outside of entry block", &I); 762 763 // TODO: Check for an unusual size (MSB set?) 764 } 765 766 void Lint::visitVAArgInst(VAArgInst &I) { 767 visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0, 768 nullptr, MemRef::Read | MemRef::Write); 769 } 770 771 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 772 visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0, 773 nullptr, MemRef::Branchee); 774 775 Assert(I.getNumDestinations() != 0, 776 "Undefined behavior: indirectbr with no destinations", &I); 777 } 778 779 void Lint::visitExtractElementInst(ExtractElementInst &I) { 780 if (ConstantInt *CI = dyn_cast<ConstantInt>( 781 findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), 782 /*OffsetOk=*/false))) 783 Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), 784 "Undefined result: extractelement index out of range", &I); 785 } 786 787 void Lint::visitInsertElementInst(InsertElementInst &I) { 788 if (ConstantInt *CI = dyn_cast<ConstantInt>( 789 findValue(I.getOperand(2), I.getModule()->getDataLayout(), 790 /*OffsetOk=*/false))) 791 Assert(CI->getValue().ult(I.getType()->getNumElements()), 792 "Undefined result: insertelement index out of range", &I); 793 } 794 795 void Lint::visitUnreachableInst(UnreachableInst &I) { 796 // This isn't undefined behavior, it's merely suspicious. 797 Assert(&I == I.getParent()->begin() || 798 std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), 799 "Unusual: unreachable immediately preceded by instruction without " 800 "side effects", 801 &I); 802 } 803 804 /// findValue - Look through bitcasts and simple memory reference patterns 805 /// to identify an equivalent, but more informative, value. If OffsetOk 806 /// is true, look through getelementptrs with non-zero offsets too. 807 /// 808 /// Most analysis passes don't require this logic, because instcombine 809 /// will simplify most of these kinds of things away. But it's a goal of 810 /// this Lint pass to be useful even on non-optimized IR. 811 Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { 812 SmallPtrSet<Value *, 4> Visited; 813 return findValueImpl(V, DL, OffsetOk, Visited); 814 } 815 816 /// findValueImpl - Implementation helper for findValue. 817 Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, 818 SmallPtrSetImpl<Value *> &Visited) const { 819 // Detect self-referential values. 820 if (!Visited.insert(V).second) 821 return UndefValue::get(V->getType()); 822 823 // TODO: Look through sext or zext cast, when the result is known to 824 // be interpreted as signed or unsigned, respectively. 825 // TODO: Look through eliminable cast pairs. 826 // TODO: Look through calls with unique return values. 827 // TODO: Look through vector insert/extract/shuffle. 828 V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); 829 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 830 BasicBlock::iterator BBI = L; 831 BasicBlock *BB = L->getParent(); 832 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 833 for (;;) { 834 if (!VisitedBlocks.insert(BB).second) 835 break; 836 if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), 837 BB, BBI, 6, AA)) 838 return findValueImpl(U, DL, OffsetOk, Visited); 839 if (BBI != BB->begin()) break; 840 BB = BB->getUniquePredecessor(); 841 if (!BB) break; 842 BBI = BB->end(); 843 } 844 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 845 if (Value *W = PN->hasConstantValue()) 846 if (W != V) 847 return findValueImpl(W, DL, OffsetOk, Visited); 848 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 849 if (CI->isNoopCast(DL)) 850 return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); 851 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 852 if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), 853 Ex->getIndices())) 854 if (W != V) 855 return findValueImpl(W, DL, OffsetOk, Visited); 856 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 857 // Same as above, but for ConstantExpr instead of Instruction. 858 if (Instruction::isCast(CE->getOpcode())) { 859 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 860 CE->getOperand(0)->getType(), CE->getType(), 861 DL.getIntPtrType(V->getType()))) 862 return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); 863 } else if (CE->getOpcode() == Instruction::ExtractValue) { 864 ArrayRef<unsigned> Indices = CE->getIndices(); 865 if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 866 if (W != V) 867 return findValueImpl(W, DL, OffsetOk, Visited); 868 } 869 } 870 871 // As a last resort, try SimplifyInstruction or constant folding. 872 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 873 if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) 874 return findValueImpl(W, DL, OffsetOk, Visited); 875 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 876 if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) 877 if (W != V) 878 return findValueImpl(W, DL, OffsetOk, Visited); 879 } 880 881 return V; 882 } 883 884 //===----------------------------------------------------------------------===// 885 // Implement the public interfaces to this file... 886 //===----------------------------------------------------------------------===// 887 888 FunctionPass *llvm::createLintPass() { 889 return new Lint(); 890 } 891 892 /// lintFunction - Check a function for errors, printing messages on stderr. 893 /// 894 void llvm::lintFunction(const Function &f) { 895 Function &F = const_cast<Function&>(f); 896 assert(!F.isDeclaration() && "Cannot lint external functions"); 897 898 legacy::FunctionPassManager FPM(F.getParent()); 899 Lint *V = new Lint(); 900 FPM.add(V); 901 FPM.run(F); 902 } 903 904 /// lintModule - Check a module for errors, printing messages on stderr. 905 /// 906 void llvm::lintModule(const Module &M) { 907 legacy::PassManager PM; 908 Lint *V = new Lint(); 909 PM.add(V); 910 PM.run(const_cast<Module&>(M)); 911 } 912