1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/ConstantFolding.h" 44 #include "llvm/Analysis/InstructionSimplify.h" 45 #include "llvm/Analysis/Loads.h" 46 #include "llvm/Analysis/MemoryLocation.h" 47 #include "llvm/Analysis/Passes.h" 48 #include "llvm/Analysis/TargetLibraryInfo.h" 49 #include "llvm/Analysis/ValueTracking.h" 50 #include "llvm/IR/Argument.h" 51 #include "llvm/IR/BasicBlock.h" 52 #include "llvm/IR/Constant.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/DerivedTypes.h" 56 #include "llvm/IR/Dominators.h" 57 #include "llvm/IR/Function.h" 58 #include "llvm/IR/GlobalVariable.h" 59 #include "llvm/IR/InstVisitor.h" 60 #include "llvm/IR/InstrTypes.h" 61 #include "llvm/IR/Instruction.h" 62 #include "llvm/IR/Instructions.h" 63 #include "llvm/IR/IntrinsicInst.h" 64 #include "llvm/IR/LegacyPassManager.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/PassManager.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/InitializePasses.h" 70 #include "llvm/Pass.h" 71 #include "llvm/Support/Casting.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/KnownBits.h" 74 #include "llvm/Support/MathExtras.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include <cassert> 77 #include <cstdint> 78 #include <iterator> 79 #include <string> 80 81 using namespace llvm; 82 83 namespace { 84 namespace MemRef { 85 static const unsigned Read = 1; 86 static const unsigned Write = 2; 87 static const unsigned Callee = 4; 88 static const unsigned Branchee = 8; 89 } // end namespace MemRef 90 91 class Lint : public InstVisitor<Lint> { 92 friend class InstVisitor<Lint>; 93 94 void visitFunction(Function &F); 95 96 void visitCallBase(CallBase &CB); 97 void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, 98 MaybeAlign Alignment, Type *Ty, unsigned Flags); 99 void visitEHBeginCatch(IntrinsicInst *II); 100 void visitEHEndCatch(IntrinsicInst *II); 101 102 void visitReturnInst(ReturnInst &I); 103 void visitLoadInst(LoadInst &I); 104 void visitStoreInst(StoreInst &I); 105 void visitXor(BinaryOperator &I); 106 void visitSub(BinaryOperator &I); 107 void visitLShr(BinaryOperator &I); 108 void visitAShr(BinaryOperator &I); 109 void visitShl(BinaryOperator &I); 110 void visitSDiv(BinaryOperator &I); 111 void visitUDiv(BinaryOperator &I); 112 void visitSRem(BinaryOperator &I); 113 void visitURem(BinaryOperator &I); 114 void visitAllocaInst(AllocaInst &I); 115 void visitVAArgInst(VAArgInst &I); 116 void visitIndirectBrInst(IndirectBrInst &I); 117 void visitExtractElementInst(ExtractElementInst &I); 118 void visitInsertElementInst(InsertElementInst &I); 119 void visitUnreachableInst(UnreachableInst &I); 120 121 Value *findValue(Value *V, bool OffsetOk) const; 122 Value *findValueImpl(Value *V, bool OffsetOk, 123 SmallPtrSetImpl<Value *> &Visited) const; 124 125 public: 126 Module *Mod; 127 const DataLayout *DL; 128 AliasAnalysis *AA; 129 AssumptionCache *AC; 130 DominatorTree *DT; 131 TargetLibraryInfo *TLI; 132 133 std::string Messages; 134 raw_string_ostream MessagesStr; 135 136 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, 137 AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) 138 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), 139 MessagesStr(Messages) {} 140 141 void WriteValues(ArrayRef<const Value *> Vs) { 142 for (const Value *V : Vs) { 143 if (!V) 144 continue; 145 if (isa<Instruction>(V)) { 146 MessagesStr << *V << '\n'; 147 } else { 148 V->printAsOperand(MessagesStr, true, Mod); 149 MessagesStr << '\n'; 150 } 151 } 152 } 153 154 /// A check failed, so printout out the condition and the message. 155 /// 156 /// This provides a nice place to put a breakpoint if you want to see why 157 /// something is not correct. 158 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 159 160 /// A check failed (with values to print). 161 /// 162 /// This calls the Message-only version so that the above is easier to set 163 /// a breakpoint on. 164 template <typename T1, typename... Ts> 165 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { 166 CheckFailed(Message); 167 WriteValues({V1, Vs...}); 168 } 169 }; 170 } // end anonymous namespace 171 172 // Assert - We know that cond should be true, if not print an error message. 173 #define Assert(C, ...) \ 174 do { \ 175 if (!(C)) { \ 176 CheckFailed(__VA_ARGS__); \ 177 return; \ 178 } \ 179 } while (false) 180 181 void Lint::visitFunction(Function &F) { 182 // This isn't undefined behavior, it's just a little unusual, and it's a 183 // fairly common mistake to neglect to name a function. 184 Assert(F.hasName() || F.hasLocalLinkage(), 185 "Unusual: Unnamed function with non-local linkage", &F); 186 187 // TODO: Check for irreducible control flow. 188 } 189 190 void Lint::visitCallBase(CallBase &I) { 191 Value *Callee = I.getCalledOperand(); 192 193 visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, None, nullptr, 194 MemRef::Callee); 195 196 if (Function *F = dyn_cast<Function>(findValue(Callee, 197 /*OffsetOk=*/false))) { 198 Assert(I.getCallingConv() == F->getCallingConv(), 199 "Undefined behavior: Caller and callee calling convention differ", 200 &I); 201 202 FunctionType *FT = F->getFunctionType(); 203 unsigned NumActualArgs = I.arg_size(); 204 205 Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 206 : FT->getNumParams() == NumActualArgs, 207 "Undefined behavior: Call argument count mismatches callee " 208 "argument count", 209 &I); 210 211 Assert(FT->getReturnType() == I.getType(), 212 "Undefined behavior: Call return type mismatches " 213 "callee return type", 214 &I); 215 216 // Check argument types (in case the callee was casted) and attributes. 217 // TODO: Verify that caller and callee attributes are compatible. 218 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 219 auto AI = I.arg_begin(), AE = I.arg_end(); 220 for (; AI != AE; ++AI) { 221 Value *Actual = *AI; 222 if (PI != PE) { 223 Argument *Formal = &*PI++; 224 Assert(Formal->getType() == Actual->getType(), 225 "Undefined behavior: Call argument type mismatches " 226 "callee parameter type", 227 &I); 228 229 // Check that noalias arguments don't alias other arguments. This is 230 // not fully precise because we don't know the sizes of the dereferenced 231 // memory regions. 232 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 233 AttributeList PAL = I.getAttributes(); 234 unsigned ArgNo = 0; 235 for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 236 // Skip ByVal arguments since they will be memcpy'd to the callee's 237 // stack so we're not really passing the pointer anyway. 238 if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal)) 239 continue; 240 // If both arguments are readonly, they have no dependence. 241 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 242 continue; 243 if (AI != BI && (*BI)->getType()->isPointerTy()) { 244 AliasResult Result = AA->alias(*AI, *BI); 245 Assert(Result != MustAlias && Result != PartialAlias, 246 "Unusual: noalias argument aliases another argument", &I); 247 } 248 } 249 } 250 251 // Check that an sret argument points to valid memory. 252 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 253 Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); 254 visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), 255 DL->getABITypeAlign(Ty), Ty, 256 MemRef::Read | MemRef::Write); 257 } 258 } 259 } 260 } 261 262 if (const auto *CI = dyn_cast<CallInst>(&I)) { 263 if (CI->isTailCall()) { 264 const AttributeList &PAL = CI->getAttributes(); 265 unsigned ArgNo = 0; 266 for (Value *Arg : I.args()) { 267 // Skip ByVal arguments since they will be memcpy'd to the callee's 268 // stack anyway. 269 if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) 270 continue; 271 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 272 Assert(!isa<AllocaInst>(Obj), 273 "Undefined behavior: Call with \"tail\" keyword references " 274 "alloca", 275 &I); 276 } 277 } 278 } 279 280 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 281 switch (II->getIntrinsicID()) { 282 default: 283 break; 284 285 // TODO: Check more intrinsics 286 287 case Intrinsic::memcpy: { 288 MemCpyInst *MCI = cast<MemCpyInst>(&I); 289 // TODO: If the size is known, use it. 290 visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, 291 MCI->getDestAlign(), nullptr, MemRef::Write); 292 visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, 293 MCI->getSourceAlign(), nullptr, MemRef::Read); 294 295 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 296 // isn't expressive enough for what we really want to do. Known partial 297 // overlap is not distinguished from the case where nothing is known. 298 auto Size = LocationSize::unknown(); 299 if (const ConstantInt *Len = 300 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 301 /*OffsetOk=*/false))) 302 if (Len->getValue().isIntN(32)) 303 Size = LocationSize::precise(Len->getValue().getZExtValue()); 304 Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 305 MustAlias, 306 "Undefined behavior: memcpy source and destination overlap", &I); 307 break; 308 } 309 case Intrinsic::memcpy_inline: { 310 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); 311 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); 312 visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlign(), 313 nullptr, MemRef::Write); 314 visitMemoryReference(I, MCII->getSource(), Size, MCII->getSourceAlign(), 315 nullptr, MemRef::Read); 316 317 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 318 // isn't expressive enough for what we really want to do. Known partial 319 // overlap is not distinguished from the case where nothing is known. 320 const LocationSize LS = LocationSize::precise(Size); 321 Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias, 322 "Undefined behavior: memcpy source and destination overlap", &I); 323 break; 324 } 325 case Intrinsic::memmove: { 326 MemMoveInst *MMI = cast<MemMoveInst>(&I); 327 // TODO: If the size is known, use it. 328 visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, 329 MMI->getDestAlign(), nullptr, MemRef::Write); 330 visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, 331 MMI->getSourceAlign(), nullptr, MemRef::Read); 332 break; 333 } 334 case Intrinsic::memset: { 335 MemSetInst *MSI = cast<MemSetInst>(&I); 336 // TODO: If the size is known, use it. 337 visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, 338 MSI->getDestAlign(), nullptr, MemRef::Write); 339 break; 340 } 341 342 case Intrinsic::vastart: 343 Assert(I.getParent()->getParent()->isVarArg(), 344 "Undefined behavior: va_start called in a non-varargs function", 345 &I); 346 347 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 348 None, nullptr, MemRef::Read | MemRef::Write); 349 break; 350 case Intrinsic::vacopy: 351 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 352 None, nullptr, MemRef::Write); 353 visitMemoryReference(I, I.getArgOperand(1), MemoryLocation::UnknownSize, 354 None, nullptr, MemRef::Read); 355 break; 356 case Intrinsic::vaend: 357 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 358 None, nullptr, MemRef::Read | MemRef::Write); 359 break; 360 361 case Intrinsic::stackrestore: 362 // Stackrestore doesn't read or write memory, but it sets the 363 // stack pointer, which the compiler may read from or write to 364 // at any time, so check it for both readability and writeability. 365 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 366 None, nullptr, MemRef::Read | MemRef::Write); 367 break; 368 } 369 } 370 371 void Lint::visitReturnInst(ReturnInst &I) { 372 Function *F = I.getParent()->getParent(); 373 Assert(!F->doesNotReturn(), 374 "Unusual: Return statement in function with noreturn attribute", &I); 375 376 if (Value *V = I.getReturnValue()) { 377 Value *Obj = findValue(V, /*OffsetOk=*/true); 378 Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 379 } 380 } 381 382 // TODO: Check that the reference is in bounds. 383 // TODO: Check readnone/readonly function attributes. 384 void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, 385 MaybeAlign Align, Type *Ty, unsigned Flags) { 386 // If no memory is being referenced, it doesn't matter if the pointer 387 // is valid. 388 if (Size == 0) 389 return; 390 391 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 392 Assert(!isa<ConstantPointerNull>(UnderlyingObject), 393 "Undefined behavior: Null pointer dereference", &I); 394 Assert(!isa<UndefValue>(UnderlyingObject), 395 "Undefined behavior: Undef pointer dereference", &I); 396 Assert(!isa<ConstantInt>(UnderlyingObject) || 397 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 398 "Unusual: All-ones pointer dereference", &I); 399 Assert(!isa<ConstantInt>(UnderlyingObject) || 400 !cast<ConstantInt>(UnderlyingObject)->isOne(), 401 "Unusual: Address one pointer dereference", &I); 402 403 if (Flags & MemRef::Write) { 404 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 405 Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 406 &I); 407 Assert(!isa<Function>(UnderlyingObject) && 408 !isa<BlockAddress>(UnderlyingObject), 409 "Undefined behavior: Write to text section", &I); 410 } 411 if (Flags & MemRef::Read) { 412 Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 413 &I); 414 Assert(!isa<BlockAddress>(UnderlyingObject), 415 "Undefined behavior: Load from block address", &I); 416 } 417 if (Flags & MemRef::Callee) { 418 Assert(!isa<BlockAddress>(UnderlyingObject), 419 "Undefined behavior: Call to block address", &I); 420 } 421 if (Flags & MemRef::Branchee) { 422 Assert(!isa<Constant>(UnderlyingObject) || 423 isa<BlockAddress>(UnderlyingObject), 424 "Undefined behavior: Branch to non-blockaddress", &I); 425 } 426 427 // Check for buffer overflows and misalignment. 428 // Only handles memory references that read/write something simple like an 429 // alloca instruction or a global variable. 430 int64_t Offset = 0; 431 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 432 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 433 // something we can handle and if so extract the size of this base object 434 // along with its alignment. 435 uint64_t BaseSize = MemoryLocation::UnknownSize; 436 MaybeAlign BaseAlign; 437 438 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 439 Type *ATy = AI->getAllocatedType(); 440 if (!AI->isArrayAllocation() && ATy->isSized()) 441 BaseSize = DL->getTypeAllocSize(ATy); 442 BaseAlign = AI->getAlign(); 443 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 444 // If the global may be defined differently in another compilation unit 445 // then don't warn about funky memory accesses. 446 if (GV->hasDefinitiveInitializer()) { 447 Type *GTy = GV->getValueType(); 448 if (GTy->isSized()) 449 BaseSize = DL->getTypeAllocSize(GTy); 450 BaseAlign = GV->getAlign(); 451 if (!BaseAlign && GTy->isSized()) 452 BaseAlign = DL->getABITypeAlign(GTy); 453 } 454 } 455 456 // Accesses from before the start or after the end of the object are not 457 // defined. 458 Assert(Size == MemoryLocation::UnknownSize || 459 BaseSize == MemoryLocation::UnknownSize || 460 (Offset >= 0 && Offset + Size <= BaseSize), 461 "Undefined behavior: Buffer overflow", &I); 462 463 // Accesses that say that the memory is more aligned than it is are not 464 // defined. 465 if (!Align && Ty && Ty->isSized()) 466 Align = DL->getABITypeAlign(Ty); 467 if (BaseAlign && Align) 468 Assert(*Align <= commonAlignment(*BaseAlign, Offset), 469 "Undefined behavior: Memory reference address is misaligned", &I); 470 } 471 } 472 473 void Lint::visitLoadInst(LoadInst &I) { 474 visitMemoryReference(I, I.getPointerOperand(), 475 DL->getTypeStoreSize(I.getType()), I.getAlign(), 476 I.getType(), MemRef::Read); 477 } 478 479 void Lint::visitStoreInst(StoreInst &I) { 480 visitMemoryReference(I, I.getPointerOperand(), 481 DL->getTypeStoreSize(I.getOperand(0)->getType()), 482 I.getAlign(), I.getOperand(0)->getType(), MemRef::Write); 483 } 484 485 void Lint::visitXor(BinaryOperator &I) { 486 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 487 "Undefined result: xor(undef, undef)", &I); 488 } 489 490 void Lint::visitSub(BinaryOperator &I) { 491 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 492 "Undefined result: sub(undef, undef)", &I); 493 } 494 495 void Lint::visitLShr(BinaryOperator &I) { 496 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 497 /*OffsetOk=*/false))) 498 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 499 "Undefined result: Shift count out of range", &I); 500 } 501 502 void Lint::visitAShr(BinaryOperator &I) { 503 if (ConstantInt *CI = 504 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 505 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 506 "Undefined result: Shift count out of range", &I); 507 } 508 509 void Lint::visitShl(BinaryOperator &I) { 510 if (ConstantInt *CI = 511 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 512 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 513 "Undefined result: Shift count out of range", &I); 514 } 515 516 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 517 AssumptionCache *AC) { 518 // Assume undef could be zero. 519 if (isa<UndefValue>(V)) 520 return true; 521 522 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 523 if (!VecTy) { 524 KnownBits Known = 525 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); 526 return Known.isZero(); 527 } 528 529 // Per-component check doesn't work with zeroinitializer 530 Constant *C = dyn_cast<Constant>(V); 531 if (!C) 532 return false; 533 534 if (C->isZeroValue()) 535 return true; 536 537 // For a vector, KnownZero will only be true if all values are zero, so check 538 // this per component 539 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); 540 I != N; ++I) { 541 Constant *Elem = C->getAggregateElement(I); 542 if (isa<UndefValue>(Elem)) 543 return true; 544 545 KnownBits Known = computeKnownBits(Elem, DL); 546 if (Known.isZero()) 547 return true; 548 } 549 550 return false; 551 } 552 553 void Lint::visitSDiv(BinaryOperator &I) { 554 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 555 "Undefined behavior: Division by zero", &I); 556 } 557 558 void Lint::visitUDiv(BinaryOperator &I) { 559 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 560 "Undefined behavior: Division by zero", &I); 561 } 562 563 void Lint::visitSRem(BinaryOperator &I) { 564 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 565 "Undefined behavior: Division by zero", &I); 566 } 567 568 void Lint::visitURem(BinaryOperator &I) { 569 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 570 "Undefined behavior: Division by zero", &I); 571 } 572 573 void Lint::visitAllocaInst(AllocaInst &I) { 574 if (isa<ConstantInt>(I.getArraySize())) 575 // This isn't undefined behavior, it's just an obvious pessimization. 576 Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 577 "Pessimization: Static alloca outside of entry block", &I); 578 579 // TODO: Check for an unusual size (MSB set?) 580 } 581 582 void Lint::visitVAArgInst(VAArgInst &I) { 583 visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, None, 584 nullptr, MemRef::Read | MemRef::Write); 585 } 586 587 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 588 visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, None, 589 nullptr, MemRef::Branchee); 590 591 Assert(I.getNumDestinations() != 0, 592 "Undefined behavior: indirectbr with no destinations", &I); 593 } 594 595 void Lint::visitExtractElementInst(ExtractElementInst &I) { 596 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 597 /*OffsetOk=*/false))) 598 Assert( 599 CI->getValue().ult( 600 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()), 601 "Undefined result: extractelement index out of range", &I); 602 } 603 604 void Lint::visitInsertElementInst(InsertElementInst &I) { 605 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 606 /*OffsetOk=*/false))) 607 Assert(CI->getValue().ult( 608 cast<FixedVectorType>(I.getType())->getNumElements()), 609 "Undefined result: insertelement index out of range", &I); 610 } 611 612 void Lint::visitUnreachableInst(UnreachableInst &I) { 613 // This isn't undefined behavior, it's merely suspicious. 614 Assert(&I == &I.getParent()->front() || 615 std::prev(I.getIterator())->mayHaveSideEffects(), 616 "Unusual: unreachable immediately preceded by instruction without " 617 "side effects", 618 &I); 619 } 620 621 /// findValue - Look through bitcasts and simple memory reference patterns 622 /// to identify an equivalent, but more informative, value. If OffsetOk 623 /// is true, look through getelementptrs with non-zero offsets too. 624 /// 625 /// Most analysis passes don't require this logic, because instcombine 626 /// will simplify most of these kinds of things away. But it's a goal of 627 /// this Lint pass to be useful even on non-optimized IR. 628 Value *Lint::findValue(Value *V, bool OffsetOk) const { 629 SmallPtrSet<Value *, 4> Visited; 630 return findValueImpl(V, OffsetOk, Visited); 631 } 632 633 /// findValueImpl - Implementation helper for findValue. 634 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 635 SmallPtrSetImpl<Value *> &Visited) const { 636 // Detect self-referential values. 637 if (!Visited.insert(V).second) 638 return UndefValue::get(V->getType()); 639 640 // TODO: Look through sext or zext cast, when the result is known to 641 // be interpreted as signed or unsigned, respectively. 642 // TODO: Look through eliminable cast pairs. 643 // TODO: Look through calls with unique return values. 644 // TODO: Look through vector insert/extract/shuffle. 645 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); 646 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 647 BasicBlock::iterator BBI = L->getIterator(); 648 BasicBlock *BB = L->getParent(); 649 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 650 for (;;) { 651 if (!VisitedBlocks.insert(BB).second) 652 break; 653 if (Value *U = 654 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) 655 return findValueImpl(U, OffsetOk, Visited); 656 if (BBI != BB->begin()) 657 break; 658 BB = BB->getUniquePredecessor(); 659 if (!BB) 660 break; 661 BBI = BB->end(); 662 } 663 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 664 if (Value *W = PN->hasConstantValue()) 665 return findValueImpl(W, OffsetOk, Visited); 666 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 667 if (CI->isNoopCast(*DL)) 668 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 669 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 670 if (Value *W = 671 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) 672 if (W != V) 673 return findValueImpl(W, OffsetOk, Visited); 674 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 675 // Same as above, but for ConstantExpr instead of Instruction. 676 if (Instruction::isCast(CE->getOpcode())) { 677 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 678 CE->getOperand(0)->getType(), CE->getType(), 679 *DL)) 680 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 681 } else if (CE->getOpcode() == Instruction::ExtractValue) { 682 ArrayRef<unsigned> Indices = CE->getIndices(); 683 if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 684 if (W != V) 685 return findValueImpl(W, OffsetOk, Visited); 686 } 687 } 688 689 // As a last resort, try SimplifyInstruction or constant folding. 690 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 691 if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) 692 return findValueImpl(W, OffsetOk, Visited); 693 } else if (auto *C = dyn_cast<Constant>(V)) { 694 Value *W = ConstantFoldConstant(C, *DL, TLI); 695 if (W != V) 696 return findValueImpl(W, OffsetOk, Visited); 697 } 698 699 return V; 700 } 701 702 PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { 703 auto *Mod = F.getParent(); 704 auto *DL = &F.getParent()->getDataLayout(); 705 auto *AA = &AM.getResult<AAManager>(F); 706 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 707 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 708 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 709 Lint L(Mod, DL, AA, AC, DT, TLI); 710 L.visit(F); 711 dbgs() << L.MessagesStr.str(); 712 return PreservedAnalyses::all(); 713 } 714 715 class LintLegacyPass : public FunctionPass { 716 public: 717 static char ID; // Pass identification, replacement for typeid 718 LintLegacyPass() : FunctionPass(ID) { 719 initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); 720 } 721 722 bool runOnFunction(Function &F) override; 723 724 void getAnalysisUsage(AnalysisUsage &AU) const override { 725 AU.setPreservesAll(); 726 AU.addRequired<AAResultsWrapperPass>(); 727 AU.addRequired<AssumptionCacheTracker>(); 728 AU.addRequired<TargetLibraryInfoWrapperPass>(); 729 AU.addRequired<DominatorTreeWrapperPass>(); 730 } 731 void print(raw_ostream &O, const Module *M) const override {} 732 }; 733 734 char LintLegacyPass::ID = 0; 735 INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", 736 false, true) 737 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 738 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 739 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 740 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 741 INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", 742 false, true) 743 744 bool LintLegacyPass::runOnFunction(Function &F) { 745 auto *Mod = F.getParent(); 746 auto *DL = &F.getParent()->getDataLayout(); 747 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 748 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 749 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 750 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 751 Lint L(Mod, DL, AA, AC, DT, TLI); 752 L.visit(F); 753 dbgs() << L.MessagesStr.str(); 754 return false; 755 } 756 757 //===----------------------------------------------------------------------===// 758 // Implement the public interfaces to this file... 759 //===----------------------------------------------------------------------===// 760 761 FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } 762 763 /// lintFunction - Check a function for errors, printing messages on stderr. 764 /// 765 void llvm::lintFunction(const Function &f) { 766 Function &F = const_cast<Function &>(f); 767 assert(!F.isDeclaration() && "Cannot lint external functions"); 768 769 legacy::FunctionPassManager FPM(F.getParent()); 770 auto *V = new LintLegacyPass(); 771 FPM.add(V); 772 FPM.run(F); 773 } 774 775 /// lintModule - Check a module for errors, printing messages on stderr. 776 /// 777 void llvm::lintModule(const Module &M) { 778 legacy::PassManager PM; 779 auto *V = new LintLegacyPass(); 780 PM.add(V); 781 PM.run(const_cast<Module &>(M)); 782 } 783