1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass statically checks for common and easily-identified constructs 11 // which produce undefined or likely unintended behavior in LLVM IR. 12 // 13 // It is not a guarantee of correctness, in two ways. First, it isn't 14 // comprehensive. There are checks which could be done statically which are 15 // not yet implemented. Some of these are indicated by TODO comments, but 16 // those aren't comprehensive either. Second, many conditions cannot be 17 // checked statically. This pass does no dynamic instrumentation, so it 18 // can't check for all possible problems. 19 // 20 // Another limitation is that it assumes all code will be executed. A store 21 // through a null pointer in a basic block which is never reached is harmless, 22 // but this pass will warn about it anyway. This is the main reason why most 23 // of these checks live here instead of in the Verifier pass. 24 // 25 // Optimization passes may make conditions that this pass checks for more or 26 // less obvious. If an optimization pass appears to be introducing a warning, 27 // it may be that the optimization pass is merely exposing an existing 28 // condition in the code. 29 // 30 // This code may be run before instcombine. In many cases, instcombine checks 31 // for the same kinds of things and turns instructions with undefined behavior 32 // into unreachable (or equivalent). Because of this, this pass makes some 33 // effort to look through bitcasts and so on. 34 // 35 //===----------------------------------------------------------------------===// 36 37 #include "llvm/Analysis/Lint.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallSet.h" 40 #include "llvm/Analysis/AliasAnalysis.h" 41 #include "llvm/Analysis/AssumptionCache.h" 42 #include "llvm/Analysis/ConstantFolding.h" 43 #include "llvm/Analysis/InstructionSimplify.h" 44 #include "llvm/Analysis/Loads.h" 45 #include "llvm/Analysis/Passes.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/CallSite.h" 49 #include "llvm/IR/DataLayout.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/InstVisitor.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/LegacyPassManager.h" 55 #include "llvm/Pass.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 using namespace llvm; 59 60 namespace { 61 namespace MemRef { 62 static unsigned Read = 1; 63 static unsigned Write = 2; 64 static unsigned Callee = 4; 65 static unsigned Branchee = 8; 66 } 67 68 class Lint : public FunctionPass, public InstVisitor<Lint> { 69 friend class InstVisitor<Lint>; 70 71 void visitFunction(Function &F); 72 73 void visitCallSite(CallSite CS); 74 void visitMemoryReference(Instruction &I, Value *Ptr, 75 uint64_t Size, unsigned Align, 76 Type *Ty, unsigned Flags); 77 void visitEHBeginCatch(IntrinsicInst *II); 78 void visitEHEndCatch(IntrinsicInst *II); 79 80 void visitCallInst(CallInst &I); 81 void visitInvokeInst(InvokeInst &I); 82 void visitReturnInst(ReturnInst &I); 83 void visitLoadInst(LoadInst &I); 84 void visitStoreInst(StoreInst &I); 85 void visitXor(BinaryOperator &I); 86 void visitSub(BinaryOperator &I); 87 void visitLShr(BinaryOperator &I); 88 void visitAShr(BinaryOperator &I); 89 void visitShl(BinaryOperator &I); 90 void visitSDiv(BinaryOperator &I); 91 void visitUDiv(BinaryOperator &I); 92 void visitSRem(BinaryOperator &I); 93 void visitURem(BinaryOperator &I); 94 void visitAllocaInst(AllocaInst &I); 95 void visitVAArgInst(VAArgInst &I); 96 void visitIndirectBrInst(IndirectBrInst &I); 97 void visitExtractElementInst(ExtractElementInst &I); 98 void visitInsertElementInst(InsertElementInst &I); 99 void visitUnreachableInst(UnreachableInst &I); 100 101 Value *findValue(Value *V, bool OffsetOk) const; 102 Value *findValueImpl(Value *V, bool OffsetOk, 103 SmallPtrSetImpl<Value *> &Visited) const; 104 105 public: 106 Module *Mod; 107 AliasAnalysis *AA; 108 AssumptionCache *AC; 109 DominatorTree *DT; 110 const DataLayout *DL; 111 TargetLibraryInfo *TLI; 112 113 std::string Messages; 114 raw_string_ostream MessagesStr; 115 116 static char ID; // Pass identification, replacement for typeid 117 Lint() : FunctionPass(ID), MessagesStr(Messages) { 118 initializeLintPass(*PassRegistry::getPassRegistry()); 119 } 120 121 bool runOnFunction(Function &F) override; 122 123 void getAnalysisUsage(AnalysisUsage &AU) const override { 124 AU.setPreservesAll(); 125 AU.addRequired<AliasAnalysis>(); 126 AU.addRequired<AssumptionCacheTracker>(); 127 AU.addRequired<TargetLibraryInfoWrapperPass>(); 128 AU.addRequired<DominatorTreeWrapperPass>(); 129 } 130 void print(raw_ostream &O, const Module *M) const override {} 131 132 void WriteValue(const Value *V) { 133 if (!V) return; 134 if (isa<Instruction>(V)) { 135 MessagesStr << *V << '\n'; 136 } else { 137 V->printAsOperand(MessagesStr, true, Mod); 138 MessagesStr << '\n'; 139 } 140 } 141 142 // CheckFailed - A check failed, so print out the condition and the message 143 // that failed. This provides a nice place to put a breakpoint if you want 144 // to see why something is not correct. 145 void CheckFailed(const Twine &Message, 146 const Value *V1 = nullptr, const Value *V2 = nullptr, 147 const Value *V3 = nullptr, const Value *V4 = nullptr) { 148 MessagesStr << Message.str() << "\n"; 149 WriteValue(V1); 150 WriteValue(V2); 151 WriteValue(V3); 152 WriteValue(V4); 153 } 154 }; 155 } 156 157 char Lint::ID = 0; 158 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", 159 false, true) 160 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 161 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 162 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 163 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 164 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", 165 false, true) 166 167 // Assert - We know that cond should be true, if not print an error message. 168 #define Assert(C, M) \ 169 do { if (!(C)) { CheckFailed(M); return; } } while (0) 170 #define Assert1(C, M, V1) \ 171 do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) 172 #define Assert2(C, M, V1, V2) \ 173 do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) 174 #define Assert3(C, M, V1, V2, V3) \ 175 do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) 176 #define Assert4(C, M, V1, V2, V3, V4) \ 177 do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) 178 179 // Lint::run - This is the main Analysis entry point for a 180 // function. 181 // 182 bool Lint::runOnFunction(Function &F) { 183 Mod = F.getParent(); 184 AA = &getAnalysis<AliasAnalysis>(); 185 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 186 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 187 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 188 DL = DLP ? &DLP->getDataLayout() : nullptr; 189 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 190 visit(F); 191 dbgs() << MessagesStr.str(); 192 Messages.clear(); 193 return false; 194 } 195 196 void Lint::visitFunction(Function &F) { 197 // This isn't undefined behavior, it's just a little unusual, and it's a 198 // fairly common mistake to neglect to name a function. 199 Assert1(F.hasName() || F.hasLocalLinkage(), 200 "Unusual: Unnamed function with non-local linkage", &F); 201 202 // TODO: Check for irreducible control flow. 203 } 204 205 void Lint::visitCallSite(CallSite CS) { 206 Instruction &I = *CS.getInstruction(); 207 Value *Callee = CS.getCalledValue(); 208 209 visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, 210 0, nullptr, MemRef::Callee); 211 212 if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { 213 Assert1(CS.getCallingConv() == F->getCallingConv(), 214 "Undefined behavior: Caller and callee calling convention differ", 215 &I); 216 217 FunctionType *FT = F->getFunctionType(); 218 unsigned NumActualArgs = CS.arg_size(); 219 220 Assert1(FT->isVarArg() ? 221 FT->getNumParams() <= NumActualArgs : 222 FT->getNumParams() == NumActualArgs, 223 "Undefined behavior: Call argument count mismatches callee " 224 "argument count", &I); 225 226 Assert1(FT->getReturnType() == I.getType(), 227 "Undefined behavior: Call return type mismatches " 228 "callee return type", &I); 229 230 // Check argument types (in case the callee was casted) and attributes. 231 // TODO: Verify that caller and callee attributes are compatible. 232 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 233 CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 234 for (; AI != AE; ++AI) { 235 Value *Actual = *AI; 236 if (PI != PE) { 237 Argument *Formal = PI++; 238 Assert1(Formal->getType() == Actual->getType(), 239 "Undefined behavior: Call argument type mismatches " 240 "callee parameter type", &I); 241 242 // Check that noalias arguments don't alias other arguments. This is 243 // not fully precise because we don't know the sizes of the dereferenced 244 // memory regions. 245 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) 246 for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) 247 if (AI != BI && (*BI)->getType()->isPointerTy()) { 248 AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); 249 Assert1(Result != AliasAnalysis::MustAlias && 250 Result != AliasAnalysis::PartialAlias, 251 "Unusual: noalias argument aliases another argument", &I); 252 } 253 254 // Check that an sret argument points to valid memory. 255 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 256 Type *Ty = 257 cast<PointerType>(Formal->getType())->getElementType(); 258 visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), 259 DL ? DL->getABITypeAlignment(Ty) : 0, 260 Ty, MemRef::Read | MemRef::Write); 261 } 262 } 263 } 264 } 265 266 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) 267 for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 268 AI != AE; ++AI) { 269 Value *Obj = findValue(*AI, /*OffsetOk=*/true); 270 Assert1(!isa<AllocaInst>(Obj), 271 "Undefined behavior: Call with \"tail\" keyword references " 272 "alloca", &I); 273 } 274 275 276 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 277 switch (II->getIntrinsicID()) { 278 default: break; 279 280 // TODO: Check more intrinsics 281 282 case Intrinsic::memcpy: { 283 MemCpyInst *MCI = cast<MemCpyInst>(&I); 284 // TODO: If the size is known, use it. 285 visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, 286 MCI->getAlignment(), nullptr, 287 MemRef::Write); 288 visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, 289 MCI->getAlignment(), nullptr, 290 MemRef::Read); 291 292 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 293 // isn't expressive enough for what we really want to do. Known partial 294 // overlap is not distinguished from the case where nothing is known. 295 uint64_t Size = 0; 296 if (const ConstantInt *Len = 297 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 298 /*OffsetOk=*/false))) 299 if (Len->getValue().isIntN(32)) 300 Size = Len->getValue().getZExtValue(); 301 Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 302 AliasAnalysis::MustAlias, 303 "Undefined behavior: memcpy source and destination overlap", &I); 304 break; 305 } 306 case Intrinsic::memmove: { 307 MemMoveInst *MMI = cast<MemMoveInst>(&I); 308 // TODO: If the size is known, use it. 309 visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, 310 MMI->getAlignment(), nullptr, 311 MemRef::Write); 312 visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, 313 MMI->getAlignment(), nullptr, 314 MemRef::Read); 315 break; 316 } 317 case Intrinsic::memset: { 318 MemSetInst *MSI = cast<MemSetInst>(&I); 319 // TODO: If the size is known, use it. 320 visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, 321 MSI->getAlignment(), nullptr, 322 MemRef::Write); 323 break; 324 } 325 326 case Intrinsic::vastart: 327 Assert1(I.getParent()->getParent()->isVarArg(), 328 "Undefined behavior: va_start called in a non-varargs function", 329 &I); 330 331 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 332 0, nullptr, MemRef::Read | MemRef::Write); 333 break; 334 case Intrinsic::vacopy: 335 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 336 0, nullptr, MemRef::Write); 337 visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, 338 0, nullptr, MemRef::Read); 339 break; 340 case Intrinsic::vaend: 341 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 342 0, nullptr, MemRef::Read | MemRef::Write); 343 break; 344 345 case Intrinsic::stackrestore: 346 // Stackrestore doesn't read or write memory, but it sets the 347 // stack pointer, which the compiler may read from or write to 348 // at any time, so check it for both readability and writeability. 349 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 350 0, nullptr, MemRef::Read | MemRef::Write); 351 break; 352 353 case Intrinsic::eh_begincatch: 354 visitEHBeginCatch(II); 355 break; 356 case Intrinsic::eh_endcatch: 357 visitEHEndCatch(II); 358 break; 359 } 360 } 361 362 void Lint::visitCallInst(CallInst &I) { 363 return visitCallSite(&I); 364 } 365 366 void Lint::visitInvokeInst(InvokeInst &I) { 367 return visitCallSite(&I); 368 } 369 370 void Lint::visitReturnInst(ReturnInst &I) { 371 Function *F = I.getParent()->getParent(); 372 Assert1(!F->doesNotReturn(), 373 "Unusual: Return statement in function with noreturn attribute", 374 &I); 375 376 if (Value *V = I.getReturnValue()) { 377 Value *Obj = findValue(V, /*OffsetOk=*/true); 378 Assert1(!isa<AllocaInst>(Obj), 379 "Unusual: Returning alloca value", &I); 380 } 381 } 382 383 // TODO: Check that the reference is in bounds. 384 // TODO: Check readnone/readonly function attributes. 385 void Lint::visitMemoryReference(Instruction &I, 386 Value *Ptr, uint64_t Size, unsigned Align, 387 Type *Ty, unsigned Flags) { 388 // If no memory is being referenced, it doesn't matter if the pointer 389 // is valid. 390 if (Size == 0) 391 return; 392 393 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 394 Assert1(!isa<ConstantPointerNull>(UnderlyingObject), 395 "Undefined behavior: Null pointer dereference", &I); 396 Assert1(!isa<UndefValue>(UnderlyingObject), 397 "Undefined behavior: Undef pointer dereference", &I); 398 Assert1(!isa<ConstantInt>(UnderlyingObject) || 399 !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), 400 "Unusual: All-ones pointer dereference", &I); 401 Assert1(!isa<ConstantInt>(UnderlyingObject) || 402 !cast<ConstantInt>(UnderlyingObject)->isOne(), 403 "Unusual: Address one pointer dereference", &I); 404 405 if (Flags & MemRef::Write) { 406 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 407 Assert1(!GV->isConstant(), 408 "Undefined behavior: Write to read-only memory", &I); 409 Assert1(!isa<Function>(UnderlyingObject) && 410 !isa<BlockAddress>(UnderlyingObject), 411 "Undefined behavior: Write to text section", &I); 412 } 413 if (Flags & MemRef::Read) { 414 Assert1(!isa<Function>(UnderlyingObject), 415 "Unusual: Load from function body", &I); 416 Assert1(!isa<BlockAddress>(UnderlyingObject), 417 "Undefined behavior: Load from block address", &I); 418 } 419 if (Flags & MemRef::Callee) { 420 Assert1(!isa<BlockAddress>(UnderlyingObject), 421 "Undefined behavior: Call to block address", &I); 422 } 423 if (Flags & MemRef::Branchee) { 424 Assert1(!isa<Constant>(UnderlyingObject) || 425 isa<BlockAddress>(UnderlyingObject), 426 "Undefined behavior: Branch to non-blockaddress", &I); 427 } 428 429 // Check for buffer overflows and misalignment. 430 // Only handles memory references that read/write something simple like an 431 // alloca instruction or a global variable. 432 int64_t Offset = 0; 433 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { 434 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 435 // something we can handle and if so extract the size of this base object 436 // along with its alignment. 437 uint64_t BaseSize = AliasAnalysis::UnknownSize; 438 unsigned BaseAlign = 0; 439 440 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 441 Type *ATy = AI->getAllocatedType(); 442 if (DL && !AI->isArrayAllocation() && ATy->isSized()) 443 BaseSize = DL->getTypeAllocSize(ATy); 444 BaseAlign = AI->getAlignment(); 445 if (DL && BaseAlign == 0 && ATy->isSized()) 446 BaseAlign = DL->getABITypeAlignment(ATy); 447 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 448 // If the global may be defined differently in another compilation unit 449 // then don't warn about funky memory accesses. 450 if (GV->hasDefinitiveInitializer()) { 451 Type *GTy = GV->getType()->getElementType(); 452 if (DL && GTy->isSized()) 453 BaseSize = DL->getTypeAllocSize(GTy); 454 BaseAlign = GV->getAlignment(); 455 if (DL && BaseAlign == 0 && GTy->isSized()) 456 BaseAlign = DL->getABITypeAlignment(GTy); 457 } 458 } 459 460 // Accesses from before the start or after the end of the object are not 461 // defined. 462 Assert1(Size == AliasAnalysis::UnknownSize || 463 BaseSize == AliasAnalysis::UnknownSize || 464 (Offset >= 0 && Offset + Size <= BaseSize), 465 "Undefined behavior: Buffer overflow", &I); 466 467 // Accesses that say that the memory is more aligned than it is are not 468 // defined. 469 if (DL && Align == 0 && Ty && Ty->isSized()) 470 Align = DL->getABITypeAlignment(Ty); 471 Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), 472 "Undefined behavior: Memory reference address is misaligned", &I); 473 } 474 } 475 476 void Lint::visitLoadInst(LoadInst &I) { 477 visitMemoryReference(I, I.getPointerOperand(), 478 AA->getTypeStoreSize(I.getType()), I.getAlignment(), 479 I.getType(), MemRef::Read); 480 } 481 482 void Lint::visitStoreInst(StoreInst &I) { 483 visitMemoryReference(I, I.getPointerOperand(), 484 AA->getTypeStoreSize(I.getOperand(0)->getType()), 485 I.getAlignment(), 486 I.getOperand(0)->getType(), MemRef::Write); 487 } 488 489 void Lint::visitXor(BinaryOperator &I) { 490 Assert1(!isa<UndefValue>(I.getOperand(0)) || 491 !isa<UndefValue>(I.getOperand(1)), 492 "Undefined result: xor(undef, undef)", &I); 493 } 494 495 void Lint::visitSub(BinaryOperator &I) { 496 Assert1(!isa<UndefValue>(I.getOperand(0)) || 497 !isa<UndefValue>(I.getOperand(1)), 498 "Undefined result: sub(undef, undef)", &I); 499 } 500 501 void Lint::visitLShr(BinaryOperator &I) { 502 if (ConstantInt *CI = 503 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 504 Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 505 "Undefined result: Shift count out of range", &I); 506 } 507 508 void Lint::visitAShr(BinaryOperator &I) { 509 if (ConstantInt *CI = 510 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 511 Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 512 "Undefined result: Shift count out of range", &I); 513 } 514 515 void Lint::visitShl(BinaryOperator &I) { 516 if (ConstantInt *CI = 517 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 518 Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 519 "Undefined result: Shift count out of range", &I); 520 } 521 522 static bool 523 allPredsCameFromLandingPad(BasicBlock *BB, 524 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 525 VisitedBlocks.insert(BB); 526 if (BB->isLandingPad()) 527 return true; 528 // If we find a block with no predecessors, the search failed. 529 if (pred_empty(BB)) 530 return false; 531 for (BasicBlock *Pred : predecessors(BB)) { 532 if (VisitedBlocks.count(Pred)) 533 continue; 534 if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) 535 return false; 536 } 537 return true; 538 } 539 540 static bool 541 allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, 542 IntrinsicInst **SecondBeginCatch, 543 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 544 VisitedBlocks.insert(BB); 545 for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { 546 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); 547 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) 548 return true; 549 // If we find another begincatch while looking for an endcatch, 550 // that's also an error. 551 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { 552 *SecondBeginCatch = IC; 553 return false; 554 } 555 } 556 557 // If we reach a block with no successors while searching, the 558 // search has failed. 559 if (succ_empty(BB)) 560 return false; 561 // Otherwise, search all of the successors. 562 for (BasicBlock *Succ : successors(BB)) { 563 if (VisitedBlocks.count(Succ)) 564 continue; 565 if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, 566 VisitedBlocks)) 567 return false; 568 } 569 return true; 570 } 571 572 void Lint::visitEHBeginCatch(IntrinsicInst *II) { 573 // The checks in this function make a potentially dubious assumption about 574 // the CFG, namely that any block involved in a catch is only used for the 575 // catch. This will very likely be true of IR generated by a front end, 576 // but it may cease to be true, for example, if the IR is run through a 577 // pass which combines similar blocks. 578 // 579 // In general, if we encounter a block the isn't dominated by the catch 580 // block while we are searching the catch block's successors for a call 581 // to end catch intrinsic, then it is possible that it will be legal for 582 // a path through this block to never reach a call to llvm.eh.endcatch. 583 // An analogous statement could be made about our search for a landing 584 // pad among the catch block's predecessors. 585 // 586 // What is actually required is that no path is possible at runtime that 587 // reaches a call to llvm.eh.begincatch without having previously visited 588 // a landingpad instruction and that no path is possible at runtime that 589 // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch 590 // (mentally adjusting for the fact that in reality these calls will be 591 // removed before code generation). 592 // 593 // Because this is a lint check, we take a pessimistic approach and warn if 594 // the control flow is potentially incorrect. 595 596 SmallSet<BasicBlock *, 4> VisitedBlocks; 597 BasicBlock *CatchBB = II->getParent(); 598 599 // The begin catch must occur in a landing pad block or all paths 600 // to it must have come from a landing pad. 601 Assert1(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), 602 "llvm.eh.begincatch may be reachable without passing a landingpad", 603 II); 604 605 // Reset the visited block list. 606 VisitedBlocks.clear(); 607 608 IntrinsicInst *SecondBeginCatch = nullptr; 609 610 // This has to be called before it is asserted. Otherwise, the first assert 611 // below can never be hit. 612 bool EndCatchFound = allSuccessorsReachEndCatch( 613 CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), 614 &SecondBeginCatch, VisitedBlocks); 615 Assert2( 616 SecondBeginCatch == nullptr, 617 "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", 618 II, SecondBeginCatch); 619 Assert1(EndCatchFound, 620 "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", 621 II); 622 } 623 624 static bool allPredCameFromBeginCatch( 625 BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, 626 IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { 627 VisitedBlocks.insert(BB); 628 // Look for a begincatch in this block. 629 for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; 630 ++RI) { 631 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); 632 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) 633 return true; 634 // If we find another end catch before we find a begin catch, that's 635 // an error. 636 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { 637 *SecondEndCatch = IC; 638 return false; 639 } 640 // If we encounter a landingpad instruction, the search failed. 641 if (isa<LandingPadInst>(*RI)) 642 return false; 643 } 644 // If while searching we find a block with no predeccesors, 645 // the search failed. 646 if (pred_empty(BB)) 647 return false; 648 // Search any predecessors we haven't seen before. 649 for (BasicBlock *Pred : predecessors(BB)) { 650 if (VisitedBlocks.count(Pred)) 651 continue; 652 if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, 653 VisitedBlocks)) 654 return false; 655 } 656 return true; 657 } 658 659 void Lint::visitEHEndCatch(IntrinsicInst *II) { 660 // The check in this function makes a potentially dubious assumption about 661 // the CFG, namely that any block involved in a catch is only used for the 662 // catch. This will very likely be true of IR generated by a front end, 663 // but it may cease to be true, for example, if the IR is run through a 664 // pass which combines similar blocks. 665 // 666 // In general, if we encounter a block the isn't post-dominated by the 667 // end catch block while we are searching the end catch block's predecessors 668 // for a call to the begin catch intrinsic, then it is possible that it will 669 // be legal for a path to reach the end catch block without ever having 670 // called llvm.eh.begincatch. 671 // 672 // What is actually required is that no path is possible at runtime that 673 // reaches a call to llvm.eh.endcatch without having previously visited 674 // a call to llvm.eh.begincatch (mentally adjusting for the fact that in 675 // reality these calls will be removed before code generation). 676 // 677 // Because this is a lint check, we take a pessimistic approach and warn if 678 // the control flow is potentially incorrect. 679 680 BasicBlock *EndCatchBB = II->getParent(); 681 682 // Alls paths to the end catch call must pass through a begin catch call. 683 684 // If llvm.eh.begincatch wasn't called in the current block, we'll use this 685 // lambda to recursively look for it in predecessors. 686 SmallSet<BasicBlock *, 4> VisitedBlocks; 687 IntrinsicInst *SecondEndCatch = nullptr; 688 689 // This has to be called before it is asserted. Otherwise, the first assert 690 // below can never be hit. 691 bool BeginCatchFound = 692 allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), 693 &SecondEndCatch, VisitedBlocks); 694 Assert2( 695 SecondEndCatch == nullptr, 696 "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", 697 II, SecondEndCatch); 698 Assert1( 699 BeginCatchFound, 700 "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", 701 II); 702 } 703 704 static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, 705 AssumptionCache *AC) { 706 // Assume undef could be zero. 707 if (isa<UndefValue>(V)) 708 return true; 709 710 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 711 if (!VecTy) { 712 unsigned BitWidth = V->getType()->getIntegerBitWidth(); 713 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 714 computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, 715 dyn_cast<Instruction>(V), DT); 716 return KnownZero.isAllOnesValue(); 717 } 718 719 // Per-component check doesn't work with zeroinitializer 720 Constant *C = dyn_cast<Constant>(V); 721 if (!C) 722 return false; 723 724 if (C->isZeroValue()) 725 return true; 726 727 // For a vector, KnownZero will only be true if all values are zero, so check 728 // this per component 729 unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); 730 for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { 731 Constant *Elem = C->getAggregateElement(I); 732 if (isa<UndefValue>(Elem)) 733 return true; 734 735 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 736 computeKnownBits(Elem, KnownZero, KnownOne, DL); 737 if (KnownZero.isAllOnesValue()) 738 return true; 739 } 740 741 return false; 742 } 743 744 void Lint::visitSDiv(BinaryOperator &I) { 745 Assert1(!isZero(I.getOperand(1), DL, DT, AC), 746 "Undefined behavior: Division by zero", &I); 747 } 748 749 void Lint::visitUDiv(BinaryOperator &I) { 750 Assert1(!isZero(I.getOperand(1), DL, DT, AC), 751 "Undefined behavior: Division by zero", &I); 752 } 753 754 void Lint::visitSRem(BinaryOperator &I) { 755 Assert1(!isZero(I.getOperand(1), DL, DT, AC), 756 "Undefined behavior: Division by zero", &I); 757 } 758 759 void Lint::visitURem(BinaryOperator &I) { 760 Assert1(!isZero(I.getOperand(1), DL, DT, AC), 761 "Undefined behavior: Division by zero", &I); 762 } 763 764 void Lint::visitAllocaInst(AllocaInst &I) { 765 if (isa<ConstantInt>(I.getArraySize())) 766 // This isn't undefined behavior, it's just an obvious pessimization. 767 Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 768 "Pessimization: Static alloca outside of entry block", &I); 769 770 // TODO: Check for an unusual size (MSB set?) 771 } 772 773 void Lint::visitVAArgInst(VAArgInst &I) { 774 visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 775 nullptr, MemRef::Read | MemRef::Write); 776 } 777 778 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 779 visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 780 nullptr, MemRef::Branchee); 781 782 Assert1(I.getNumDestinations() != 0, 783 "Undefined behavior: indirectbr with no destinations", &I); 784 } 785 786 void Lint::visitExtractElementInst(ExtractElementInst &I) { 787 if (ConstantInt *CI = 788 dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 789 /*OffsetOk=*/false))) 790 Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), 791 "Undefined result: extractelement index out of range", &I); 792 } 793 794 void Lint::visitInsertElementInst(InsertElementInst &I) { 795 if (ConstantInt *CI = 796 dyn_cast<ConstantInt>(findValue(I.getOperand(2), 797 /*OffsetOk=*/false))) 798 Assert1(CI->getValue().ult(I.getType()->getNumElements()), 799 "Undefined result: insertelement index out of range", &I); 800 } 801 802 void Lint::visitUnreachableInst(UnreachableInst &I) { 803 // This isn't undefined behavior, it's merely suspicious. 804 Assert1(&I == I.getParent()->begin() || 805 std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), 806 "Unusual: unreachable immediately preceded by instruction without " 807 "side effects", &I); 808 } 809 810 /// findValue - Look through bitcasts and simple memory reference patterns 811 /// to identify an equivalent, but more informative, value. If OffsetOk 812 /// is true, look through getelementptrs with non-zero offsets too. 813 /// 814 /// Most analysis passes don't require this logic, because instcombine 815 /// will simplify most of these kinds of things away. But it's a goal of 816 /// this Lint pass to be useful even on non-optimized IR. 817 Value *Lint::findValue(Value *V, bool OffsetOk) const { 818 SmallPtrSet<Value *, 4> Visited; 819 return findValueImpl(V, OffsetOk, Visited); 820 } 821 822 /// findValueImpl - Implementation helper for findValue. 823 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 824 SmallPtrSetImpl<Value *> &Visited) const { 825 // Detect self-referential values. 826 if (!Visited.insert(V).second) 827 return UndefValue::get(V->getType()); 828 829 // TODO: Look through sext or zext cast, when the result is known to 830 // be interpreted as signed or unsigned, respectively. 831 // TODO: Look through eliminable cast pairs. 832 // TODO: Look through calls with unique return values. 833 // TODO: Look through vector insert/extract/shuffle. 834 V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); 835 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 836 BasicBlock::iterator BBI = L; 837 BasicBlock *BB = L->getParent(); 838 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 839 for (;;) { 840 if (!VisitedBlocks.insert(BB).second) 841 break; 842 if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), 843 BB, BBI, 6, AA)) 844 return findValueImpl(U, OffsetOk, Visited); 845 if (BBI != BB->begin()) break; 846 BB = BB->getUniquePredecessor(); 847 if (!BB) break; 848 BBI = BB->end(); 849 } 850 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 851 if (Value *W = PN->hasConstantValue()) 852 if (W != V) 853 return findValueImpl(W, OffsetOk, Visited); 854 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 855 if (CI->isNoopCast(DL)) 856 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 857 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 858 if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), 859 Ex->getIndices())) 860 if (W != V) 861 return findValueImpl(W, OffsetOk, Visited); 862 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 863 // Same as above, but for ConstantExpr instead of Instruction. 864 if (Instruction::isCast(CE->getOpcode())) { 865 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 866 CE->getOperand(0)->getType(), 867 CE->getType(), 868 DL ? DL->getIntPtrType(V->getType()) : 869 Type::getInt64Ty(V->getContext()))) 870 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 871 } else if (CE->getOpcode() == Instruction::ExtractValue) { 872 ArrayRef<unsigned> Indices = CE->getIndices(); 873 if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 874 if (W != V) 875 return findValueImpl(W, OffsetOk, Visited); 876 } 877 } 878 879 // As a last resort, try SimplifyInstruction or constant folding. 880 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 881 if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) 882 return findValueImpl(W, OffsetOk, Visited); 883 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 884 if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) 885 if (W != V) 886 return findValueImpl(W, OffsetOk, Visited); 887 } 888 889 return V; 890 } 891 892 //===----------------------------------------------------------------------===// 893 // Implement the public interfaces to this file... 894 //===----------------------------------------------------------------------===// 895 896 FunctionPass *llvm::createLintPass() { 897 return new Lint(); 898 } 899 900 /// lintFunction - Check a function for errors, printing messages on stderr. 901 /// 902 void llvm::lintFunction(const Function &f) { 903 Function &F = const_cast<Function&>(f); 904 assert(!F.isDeclaration() && "Cannot lint external functions"); 905 906 legacy::FunctionPassManager FPM(F.getParent()); 907 Lint *V = new Lint(); 908 FPM.add(V); 909 FPM.run(F); 910 } 911 912 /// lintModule - Check a module for errors, printing messages on stderr. 913 /// 914 void llvm::lintModule(const Module &M) { 915 legacy::PassManager PM; 916 Lint *V = new Lint(); 917 PM.add(V); 918 PM.run(const_cast<Module&>(M)); 919 } 920