1 //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements extra semantic analysis beyond what is enforced 11 // by the C type system. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Sema/Sema.h" 16 #include "clang/Sema/SemaInternal.h" 17 #include "clang/Sema/ScopeInfo.h" 18 #include "clang/Analysis/Analyses/FormatString.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/CharUnits.h" 21 #include "clang/AST/DeclCXX.h" 22 #include "clang/AST/DeclObjC.h" 23 #include "clang/AST/ExprCXX.h" 24 #include "clang/AST/ExprObjC.h" 25 #include "clang/AST/DeclObjC.h" 26 #include "clang/AST/StmtCXX.h" 27 #include "clang/AST/StmtObjC.h" 28 #include "clang/Lex/Preprocessor.h" 29 #include "llvm/ADT/BitVector.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include "clang/Basic/TargetBuiltins.h" 33 #include "clang/Basic/TargetInfo.h" 34 #include "clang/Basic/ConvertUTF.h" 35 #include <limits> 36 using namespace clang; 37 using namespace sema; 38 39 SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 40 unsigned ByteNo) const { 41 return SL->getLocationOfByte(ByteNo, PP.getSourceManager(), 42 PP.getLangOptions(), PP.getTargetInfo()); 43 } 44 45 46 /// CheckablePrintfAttr - does a function call have a "printf" attribute 47 /// and arguments that merit checking? 48 bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 49 if (Format->getType() == "printf") return true; 50 if (Format->getType() == "printf0") { 51 // printf0 allows null "format" string; if so don't check format/args 52 unsigned format_idx = Format->getFormatIdx() - 1; 53 // Does the index refer to the implicit object argument? 54 if (isa<CXXMemberCallExpr>(TheCall)) { 55 if (format_idx == 0) 56 return false; 57 --format_idx; 58 } 59 if (format_idx < TheCall->getNumArgs()) { 60 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 61 if (!Format->isNullPointerConstant(Context, 62 Expr::NPC_ValueDependentIsNull)) 63 return true; 64 } 65 } 66 return false; 67 } 68 69 /// Checks that a call expression's argument count is the desired number. 70 /// This is useful when doing custom type-checking. Returns true on error. 71 static bool checkArgCount(Sema &S, CallExpr *call, unsigned desiredArgCount) { 72 unsigned argCount = call->getNumArgs(); 73 if (argCount == desiredArgCount) return false; 74 75 if (argCount < desiredArgCount) 76 return S.Diag(call->getLocEnd(), diag::err_typecheck_call_too_few_args) 77 << 0 /*function call*/ << desiredArgCount << argCount 78 << call->getSourceRange(); 79 80 // Highlight all the excess arguments. 81 SourceRange range(call->getArg(desiredArgCount)->getLocStart(), 82 call->getArg(argCount - 1)->getLocEnd()); 83 84 return S.Diag(range.getBegin(), diag::err_typecheck_call_too_many_args) 85 << 0 /*function call*/ << desiredArgCount << argCount 86 << call->getArg(1)->getSourceRange(); 87 } 88 89 ExprResult 90 Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 91 ExprResult TheCallResult(Owned(TheCall)); 92 93 // Find out if any arguments are required to be integer constant expressions. 94 unsigned ICEArguments = 0; 95 ASTContext::GetBuiltinTypeError Error; 96 Context.GetBuiltinType(BuiltinID, Error, &ICEArguments); 97 if (Error != ASTContext::GE_None) 98 ICEArguments = 0; // Don't diagnose previously diagnosed errors. 99 100 // If any arguments are required to be ICE's, check and diagnose. 101 for (unsigned ArgNo = 0; ICEArguments != 0; ++ArgNo) { 102 // Skip arguments not required to be ICE's. 103 if ((ICEArguments & (1 << ArgNo)) == 0) continue; 104 105 llvm::APSInt Result; 106 if (SemaBuiltinConstantArg(TheCall, ArgNo, Result)) 107 return true; 108 ICEArguments &= ~(1 << ArgNo); 109 } 110 111 switch (BuiltinID) { 112 case Builtin::BI__builtin___CFStringMakeConstantString: 113 assert(TheCall->getNumArgs() == 1 && 114 "Wrong # arguments to builtin CFStringMakeConstantString"); 115 if (CheckObjCString(TheCall->getArg(0))) 116 return ExprError(); 117 break; 118 case Builtin::BI__builtin_stdarg_start: 119 case Builtin::BI__builtin_va_start: 120 if (SemaBuiltinVAStart(TheCall)) 121 return ExprError(); 122 break; 123 case Builtin::BI__builtin_isgreater: 124 case Builtin::BI__builtin_isgreaterequal: 125 case Builtin::BI__builtin_isless: 126 case Builtin::BI__builtin_islessequal: 127 case Builtin::BI__builtin_islessgreater: 128 case Builtin::BI__builtin_isunordered: 129 if (SemaBuiltinUnorderedCompare(TheCall)) 130 return ExprError(); 131 break; 132 case Builtin::BI__builtin_fpclassify: 133 if (SemaBuiltinFPClassification(TheCall, 6)) 134 return ExprError(); 135 break; 136 case Builtin::BI__builtin_isfinite: 137 case Builtin::BI__builtin_isinf: 138 case Builtin::BI__builtin_isinf_sign: 139 case Builtin::BI__builtin_isnan: 140 case Builtin::BI__builtin_isnormal: 141 if (SemaBuiltinFPClassification(TheCall, 1)) 142 return ExprError(); 143 break; 144 case Builtin::BI__builtin_shufflevector: 145 return SemaBuiltinShuffleVector(TheCall); 146 // TheCall will be freed by the smart pointer here, but that's fine, since 147 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 148 case Builtin::BI__builtin_prefetch: 149 if (SemaBuiltinPrefetch(TheCall)) 150 return ExprError(); 151 break; 152 case Builtin::BI__builtin_object_size: 153 if (SemaBuiltinObjectSize(TheCall)) 154 return ExprError(); 155 break; 156 case Builtin::BI__builtin_longjmp: 157 if (SemaBuiltinLongjmp(TheCall)) 158 return ExprError(); 159 break; 160 161 case Builtin::BI__builtin_classify_type: 162 if (checkArgCount(*this, TheCall, 1)) return true; 163 TheCall->setType(Context.IntTy); 164 break; 165 case Builtin::BI__builtin_constant_p: 166 if (checkArgCount(*this, TheCall, 1)) return true; 167 TheCall->setType(Context.IntTy); 168 break; 169 case Builtin::BI__sync_fetch_and_add: 170 case Builtin::BI__sync_fetch_and_sub: 171 case Builtin::BI__sync_fetch_and_or: 172 case Builtin::BI__sync_fetch_and_and: 173 case Builtin::BI__sync_fetch_and_xor: 174 case Builtin::BI__sync_add_and_fetch: 175 case Builtin::BI__sync_sub_and_fetch: 176 case Builtin::BI__sync_and_and_fetch: 177 case Builtin::BI__sync_or_and_fetch: 178 case Builtin::BI__sync_xor_and_fetch: 179 case Builtin::BI__sync_val_compare_and_swap: 180 case Builtin::BI__sync_bool_compare_and_swap: 181 case Builtin::BI__sync_lock_test_and_set: 182 case Builtin::BI__sync_lock_release: 183 case Builtin::BI__sync_swap: 184 return SemaBuiltinAtomicOverloaded(move(TheCallResult)); 185 } 186 187 // Since the target specific builtins for each arch overlap, only check those 188 // of the arch we are compiling for. 189 if (BuiltinID >= Builtin::FirstTSBuiltin) { 190 switch (Context.Target.getTriple().getArch()) { 191 case llvm::Triple::arm: 192 case llvm::Triple::thumb: 193 if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) 194 return ExprError(); 195 break; 196 default: 197 break; 198 } 199 } 200 201 return move(TheCallResult); 202 } 203 204 // Get the valid immediate range for the specified NEON type code. 205 static unsigned RFT(unsigned t, bool shift = false) { 206 bool quad = t & 0x10; 207 208 switch (t & 0x7) { 209 case 0: // i8 210 return shift ? 7 : (8 << (int)quad) - 1; 211 case 1: // i16 212 return shift ? 15 : (4 << (int)quad) - 1; 213 case 2: // i32 214 return shift ? 31 : (2 << (int)quad) - 1; 215 case 3: // i64 216 return shift ? 63 : (1 << (int)quad) - 1; 217 case 4: // f32 218 assert(!shift && "cannot shift float types!"); 219 return (2 << (int)quad) - 1; 220 case 5: // poly8 221 return shift ? 7 : (8 << (int)quad) - 1; 222 case 6: // poly16 223 return shift ? 15 : (4 << (int)quad) - 1; 224 case 7: // float16 225 assert(!shift && "cannot shift float types!"); 226 return (4 << (int)quad) - 1; 227 } 228 return 0; 229 } 230 231 bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 232 llvm::APSInt Result; 233 234 unsigned mask = 0; 235 unsigned TV = 0; 236 switch (BuiltinID) { 237 #define GET_NEON_OVERLOAD_CHECK 238 #include "clang/Basic/arm_neon.inc" 239 #undef GET_NEON_OVERLOAD_CHECK 240 } 241 242 // For NEON intrinsics which are overloaded on vector element type, validate 243 // the immediate which specifies which variant to emit. 244 if (mask) { 245 unsigned ArgNo = TheCall->getNumArgs()-1; 246 if (SemaBuiltinConstantArg(TheCall, ArgNo, Result)) 247 return true; 248 249 TV = Result.getLimitedValue(32); 250 if ((TV > 31) || (mask & (1 << TV)) == 0) 251 return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) 252 << TheCall->getArg(ArgNo)->getSourceRange(); 253 } 254 255 // For NEON intrinsics which take an immediate value as part of the 256 // instruction, range check them here. 257 unsigned i = 0, l = 0, u = 0; 258 switch (BuiltinID) { 259 default: return false; 260 case ARM::BI__builtin_arm_ssat: i = 1; l = 1; u = 31; break; 261 case ARM::BI__builtin_arm_usat: i = 1; u = 31; break; 262 case ARM::BI__builtin_arm_vcvtr_f: 263 case ARM::BI__builtin_arm_vcvtr_d: i = 1; u = 1; break; 264 #define GET_NEON_IMMEDIATE_CHECK 265 #include "clang/Basic/arm_neon.inc" 266 #undef GET_NEON_IMMEDIATE_CHECK 267 }; 268 269 // Check that the immediate argument is actually a constant. 270 if (SemaBuiltinConstantArg(TheCall, i, Result)) 271 return true; 272 273 // Range check against the upper/lower values for this isntruction. 274 unsigned Val = Result.getZExtValue(); 275 if (Val < l || Val > (u + l)) 276 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 277 << l << u+l << TheCall->getArg(i)->getSourceRange(); 278 279 // FIXME: VFP Intrinsics should error if VFP not present. 280 return false; 281 } 282 283 /// CheckFunctionCall - Check a direct function call for various correctness 284 /// and safety properties not strictly enforced by the C type system. 285 bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 286 // Get the IdentifierInfo* for the called function. 287 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 288 289 // None of the checks below are needed for functions that don't have 290 // simple names (e.g., C++ conversion functions). 291 if (!FnInfo) 292 return false; 293 294 // FIXME: This mechanism should be abstracted to be less fragile and 295 // more efficient. For example, just map function ids to custom 296 // handlers. 297 298 // Printf and scanf checking. 299 for (specific_attr_iterator<FormatAttr> 300 i = FDecl->specific_attr_begin<FormatAttr>(), 301 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 302 303 const FormatAttr *Format = *i; 304 const bool b = Format->getType() == "scanf"; 305 if (b || CheckablePrintfAttr(Format, TheCall)) { 306 bool HasVAListArg = Format->getFirstArg() == 0; 307 CheckPrintfScanfArguments(TheCall, HasVAListArg, 308 Format->getFormatIdx() - 1, 309 HasVAListArg ? 0 : Format->getFirstArg() - 1, 310 !b); 311 } 312 } 313 314 for (specific_attr_iterator<NonNullAttr> 315 i = FDecl->specific_attr_begin<NonNullAttr>(), 316 e = FDecl->specific_attr_end<NonNullAttr>(); i != e; ++i) { 317 CheckNonNullArguments(*i, TheCall->getArgs(), 318 TheCall->getCallee()->getLocStart()); 319 } 320 321 return false; 322 } 323 324 bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 325 // Printf checking. 326 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 327 if (!Format) 328 return false; 329 330 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 331 if (!V) 332 return false; 333 334 QualType Ty = V->getType(); 335 if (!Ty->isBlockPointerType()) 336 return false; 337 338 const bool b = Format->getType() == "scanf"; 339 if (!b && !CheckablePrintfAttr(Format, TheCall)) 340 return false; 341 342 bool HasVAListArg = Format->getFirstArg() == 0; 343 CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 344 HasVAListArg ? 0 : Format->getFirstArg() - 1, !b); 345 346 return false; 347 } 348 349 /// SemaBuiltinAtomicOverloaded - We have a call to a function like 350 /// __sync_fetch_and_add, which is an overloaded function based on the pointer 351 /// type of its first argument. The main ActOnCallExpr routines have already 352 /// promoted the types of arguments because all of these calls are prototyped as 353 /// void(...). 354 /// 355 /// This function goes through and does final semantic checking for these 356 /// builtins, 357 ExprResult 358 Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { 359 CallExpr *TheCall = (CallExpr *)TheCallResult.get(); 360 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 361 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 362 363 // Ensure that we have at least one argument to do type inference from. 364 if (TheCall->getNumArgs() < 1) { 365 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 366 << 0 << 1 << TheCall->getNumArgs() 367 << TheCall->getCallee()->getSourceRange(); 368 return ExprError(); 369 } 370 371 // Inspect the first argument of the atomic builtin. This should always be 372 // a pointer type, whose element is an integral scalar or pointer type. 373 // Because it is a pointer type, we don't have to worry about any implicit 374 // casts here. 375 // FIXME: We don't allow floating point scalars as input. 376 Expr *FirstArg = TheCall->getArg(0); 377 if (!FirstArg->getType()->isPointerType()) { 378 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 379 << FirstArg->getType() << FirstArg->getSourceRange(); 380 return ExprError(); 381 } 382 383 QualType ValType = 384 FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 385 if (!ValType->isIntegerType() && !ValType->isAnyPointerType() && 386 !ValType->isBlockPointerType()) { 387 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer_intptr) 388 << FirstArg->getType() << FirstArg->getSourceRange(); 389 return ExprError(); 390 } 391 392 // The majority of builtins return a value, but a few have special return 393 // types, so allow them to override appropriately below. 394 QualType ResultType = ValType; 395 396 // We need to figure out which concrete builtin this maps onto. For example, 397 // __sync_fetch_and_add with a 2 byte object turns into 398 // __sync_fetch_and_add_2. 399 #define BUILTIN_ROW(x) \ 400 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 401 Builtin::BI##x##_8, Builtin::BI##x##_16 } 402 403 static const unsigned BuiltinIndices[][5] = { 404 BUILTIN_ROW(__sync_fetch_and_add), 405 BUILTIN_ROW(__sync_fetch_and_sub), 406 BUILTIN_ROW(__sync_fetch_and_or), 407 BUILTIN_ROW(__sync_fetch_and_and), 408 BUILTIN_ROW(__sync_fetch_and_xor), 409 410 BUILTIN_ROW(__sync_add_and_fetch), 411 BUILTIN_ROW(__sync_sub_and_fetch), 412 BUILTIN_ROW(__sync_and_and_fetch), 413 BUILTIN_ROW(__sync_or_and_fetch), 414 BUILTIN_ROW(__sync_xor_and_fetch), 415 416 BUILTIN_ROW(__sync_val_compare_and_swap), 417 BUILTIN_ROW(__sync_bool_compare_and_swap), 418 BUILTIN_ROW(__sync_lock_test_and_set), 419 BUILTIN_ROW(__sync_lock_release), 420 BUILTIN_ROW(__sync_swap) 421 }; 422 #undef BUILTIN_ROW 423 424 // Determine the index of the size. 425 unsigned SizeIndex; 426 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 427 case 1: SizeIndex = 0; break; 428 case 2: SizeIndex = 1; break; 429 case 4: SizeIndex = 2; break; 430 case 8: SizeIndex = 3; break; 431 case 16: SizeIndex = 4; break; 432 default: 433 Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 434 << FirstArg->getType() << FirstArg->getSourceRange(); 435 return ExprError(); 436 } 437 438 // Each of these builtins has one pointer argument, followed by some number of 439 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 440 // that we ignore. Find out which row of BuiltinIndices to read from as well 441 // as the number of fixed args. 442 unsigned BuiltinID = FDecl->getBuiltinID(); 443 unsigned BuiltinIndex, NumFixed = 1; 444 switch (BuiltinID) { 445 default: assert(0 && "Unknown overloaded atomic builtin!"); 446 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 447 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 448 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 449 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 450 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 451 452 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 5; break; 453 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 6; break; 454 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 7; break; 455 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 8; break; 456 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex = 9; break; 457 458 case Builtin::BI__sync_val_compare_and_swap: 459 BuiltinIndex = 10; 460 NumFixed = 2; 461 break; 462 case Builtin::BI__sync_bool_compare_and_swap: 463 BuiltinIndex = 11; 464 NumFixed = 2; 465 ResultType = Context.BoolTy; 466 break; 467 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 12; break; 468 case Builtin::BI__sync_lock_release: 469 BuiltinIndex = 13; 470 NumFixed = 0; 471 ResultType = Context.VoidTy; 472 break; 473 case Builtin::BI__sync_swap: BuiltinIndex = 14; break; 474 } 475 476 // Now that we know how many fixed arguments we expect, first check that we 477 // have at least that many. 478 if (TheCall->getNumArgs() < 1+NumFixed) { 479 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 480 << 0 << 1+NumFixed << TheCall->getNumArgs() 481 << TheCall->getCallee()->getSourceRange(); 482 return ExprError(); 483 } 484 485 // Get the decl for the concrete builtin from this, we can tell what the 486 // concrete integer type we should convert to is. 487 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 488 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 489 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 490 FunctionDecl *NewBuiltinDecl = 491 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 492 TUScope, false, DRE->getLocStart())); 493 494 // The first argument --- the pointer --- has a fixed type; we 495 // deduce the types of the rest of the arguments accordingly. Walk 496 // the remaining arguments, converting them to the deduced value type. 497 for (unsigned i = 0; i != NumFixed; ++i) { 498 ExprResult Arg = TheCall->getArg(i+1); 499 500 // If the argument is an implicit cast, then there was a promotion due to 501 // "...", just remove it now. 502 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg.get())) { 503 Arg = ICE->getSubExpr(); 504 ICE->setSubExpr(0); 505 TheCall->setArg(i+1, Arg.get()); 506 } 507 508 // GCC does an implicit conversion to the pointer or integer ValType. This 509 // can fail in some cases (1i -> int**), check for this error case now. 510 CastKind Kind = CK_Invalid; 511 ExprValueKind VK = VK_RValue; 512 CXXCastPath BasePath; 513 Arg = CheckCastTypes(Arg.get()->getSourceRange(), ValType, Arg.take(), Kind, VK, BasePath); 514 if (Arg.isInvalid()) 515 return ExprError(); 516 517 // Okay, we have something that *can* be converted to the right type. Check 518 // to see if there is a potentially weird extension going on here. This can 519 // happen when you do an atomic operation on something like an char* and 520 // pass in 42. The 42 gets converted to char. This is even more strange 521 // for things like 45.123 -> char, etc. 522 // FIXME: Do this check. 523 Arg = ImpCastExprToType(Arg.take(), ValType, Kind, VK, &BasePath); 524 TheCall->setArg(i+1, Arg.get()); 525 } 526 527 // Switch the DeclRefExpr to refer to the new decl. 528 DRE->setDecl(NewBuiltinDecl); 529 DRE->setType(NewBuiltinDecl->getType()); 530 531 // Set the callee in the CallExpr. 532 // FIXME: This leaks the original parens and implicit casts. 533 ExprResult PromotedCall = UsualUnaryConversions(DRE); 534 if (PromotedCall.isInvalid()) 535 return ExprError(); 536 TheCall->setCallee(PromotedCall.take()); 537 538 // Change the result type of the call to match the original value type. This 539 // is arbitrary, but the codegen for these builtins ins design to handle it 540 // gracefully. 541 TheCall->setType(ResultType); 542 543 return move(TheCallResult); 544 } 545 546 547 /// CheckObjCString - Checks that the argument to the builtin 548 /// CFString constructor is correct 549 /// Note: It might also make sense to do the UTF-16 conversion here (would 550 /// simplify the backend). 551 bool Sema::CheckObjCString(Expr *Arg) { 552 Arg = Arg->IgnoreParenCasts(); 553 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 554 555 if (!Literal || Literal->isWide()) { 556 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 557 << Arg->getSourceRange(); 558 return true; 559 } 560 561 if (Literal->containsNonAsciiOrNull()) { 562 llvm::StringRef String = Literal->getString(); 563 unsigned NumBytes = String.size(); 564 llvm::SmallVector<UTF16, 128> ToBuf(NumBytes); 565 const UTF8 *FromPtr = (UTF8 *)String.data(); 566 UTF16 *ToPtr = &ToBuf[0]; 567 568 ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, 569 &ToPtr, ToPtr + NumBytes, 570 strictConversion); 571 // Check for conversion failure. 572 if (Result != conversionOK) 573 Diag(Arg->getLocStart(), 574 diag::warn_cfstring_truncated) << Arg->getSourceRange(); 575 } 576 return false; 577 } 578 579 /// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 580 /// Emit an error and return true on failure, return false on success. 581 bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 582 Expr *Fn = TheCall->getCallee(); 583 if (TheCall->getNumArgs() > 2) { 584 Diag(TheCall->getArg(2)->getLocStart(), 585 diag::err_typecheck_call_too_many_args) 586 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 587 << Fn->getSourceRange() 588 << SourceRange(TheCall->getArg(2)->getLocStart(), 589 (*(TheCall->arg_end()-1))->getLocEnd()); 590 return true; 591 } 592 593 if (TheCall->getNumArgs() < 2) { 594 return Diag(TheCall->getLocEnd(), 595 diag::err_typecheck_call_too_few_args_at_least) 596 << 0 /*function call*/ << 2 << TheCall->getNumArgs(); 597 } 598 599 // Determine whether the current function is variadic or not. 600 BlockScopeInfo *CurBlock = getCurBlock(); 601 bool isVariadic; 602 if (CurBlock) 603 isVariadic = CurBlock->TheDecl->isVariadic(); 604 else if (FunctionDecl *FD = getCurFunctionDecl()) 605 isVariadic = FD->isVariadic(); 606 else 607 isVariadic = getCurMethodDecl()->isVariadic(); 608 609 if (!isVariadic) { 610 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 611 return true; 612 } 613 614 // Verify that the second argument to the builtin is the last argument of the 615 // current function or method. 616 bool SecondArgIsLastNamedArgument = false; 617 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 618 619 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 620 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 621 // FIXME: This isn't correct for methods (results in bogus warning). 622 // Get the last formal in the current function. 623 const ParmVarDecl *LastArg; 624 if (CurBlock) 625 LastArg = *(CurBlock->TheDecl->param_end()-1); 626 else if (FunctionDecl *FD = getCurFunctionDecl()) 627 LastArg = *(FD->param_end()-1); 628 else 629 LastArg = *(getCurMethodDecl()->param_end()-1); 630 SecondArgIsLastNamedArgument = PV == LastArg; 631 } 632 } 633 634 if (!SecondArgIsLastNamedArgument) 635 Diag(TheCall->getArg(1)->getLocStart(), 636 diag::warn_second_parameter_of_va_start_not_last_named_argument); 637 return false; 638 } 639 640 /// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 641 /// friends. This is declared to take (...), so we have to check everything. 642 bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 643 if (TheCall->getNumArgs() < 2) 644 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 645 << 0 << 2 << TheCall->getNumArgs()/*function call*/; 646 if (TheCall->getNumArgs() > 2) 647 return Diag(TheCall->getArg(2)->getLocStart(), 648 diag::err_typecheck_call_too_many_args) 649 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 650 << SourceRange(TheCall->getArg(2)->getLocStart(), 651 (*(TheCall->arg_end()-1))->getLocEnd()); 652 653 ExprResult OrigArg0 = TheCall->getArg(0); 654 ExprResult OrigArg1 = TheCall->getArg(1); 655 656 // Do standard promotions between the two arguments, returning their common 657 // type. 658 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 659 if (OrigArg0.isInvalid() || OrigArg1.isInvalid()) 660 return true; 661 662 // Make sure any conversions are pushed back into the call; this is 663 // type safe since unordered compare builtins are declared as "_Bool 664 // foo(...)". 665 TheCall->setArg(0, OrigArg0.get()); 666 TheCall->setArg(1, OrigArg1.get()); 667 668 if (OrigArg0.get()->isTypeDependent() || OrigArg1.get()->isTypeDependent()) 669 return false; 670 671 // If the common type isn't a real floating type, then the arguments were 672 // invalid for this operation. 673 if (!Res->isRealFloatingType()) 674 return Diag(OrigArg0.get()->getLocStart(), 675 diag::err_typecheck_call_invalid_ordered_compare) 676 << OrigArg0.get()->getType() << OrigArg1.get()->getType() 677 << SourceRange(OrigArg0.get()->getLocStart(), OrigArg1.get()->getLocEnd()); 678 679 return false; 680 } 681 682 /// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 683 /// __builtin_isnan and friends. This is declared to take (...), so we have 684 /// to check everything. We expect the last argument to be a floating point 685 /// value. 686 bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 687 if (TheCall->getNumArgs() < NumArgs) 688 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 689 << 0 << NumArgs << TheCall->getNumArgs()/*function call*/; 690 if (TheCall->getNumArgs() > NumArgs) 691 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 692 diag::err_typecheck_call_too_many_args) 693 << 0 /*function call*/ << NumArgs << TheCall->getNumArgs() 694 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 695 (*(TheCall->arg_end()-1))->getLocEnd()); 696 697 Expr *OrigArg = TheCall->getArg(NumArgs-1); 698 699 if (OrigArg->isTypeDependent()) 700 return false; 701 702 // This operation requires a non-_Complex floating-point number. 703 if (!OrigArg->getType()->isRealFloatingType()) 704 return Diag(OrigArg->getLocStart(), 705 diag::err_typecheck_call_invalid_unary_fp) 706 << OrigArg->getType() << OrigArg->getSourceRange(); 707 708 // If this is an implicit conversion from float -> double, remove it. 709 if (ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(OrigArg)) { 710 Expr *CastArg = Cast->getSubExpr(); 711 if (CastArg->getType()->isSpecificBuiltinType(BuiltinType::Float)) { 712 assert(Cast->getType()->isSpecificBuiltinType(BuiltinType::Double) && 713 "promotion from float to double is the only expected cast here"); 714 Cast->setSubExpr(0); 715 TheCall->setArg(NumArgs-1, CastArg); 716 OrigArg = CastArg; 717 } 718 } 719 720 return false; 721 } 722 723 /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 724 // This is declared to take (...), so we have to check everything. 725 ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 726 if (TheCall->getNumArgs() < 2) 727 return ExprError(Diag(TheCall->getLocEnd(), 728 diag::err_typecheck_call_too_few_args_at_least) 729 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 730 << TheCall->getSourceRange()); 731 732 // Determine which of the following types of shufflevector we're checking: 733 // 1) unary, vector mask: (lhs, mask) 734 // 2) binary, vector mask: (lhs, rhs, mask) 735 // 3) binary, scalar mask: (lhs, rhs, index, ..., index) 736 QualType resType = TheCall->getArg(0)->getType(); 737 unsigned numElements = 0; 738 739 if (!TheCall->getArg(0)->isTypeDependent() && 740 !TheCall->getArg(1)->isTypeDependent()) { 741 QualType LHSType = TheCall->getArg(0)->getType(); 742 QualType RHSType = TheCall->getArg(1)->getType(); 743 744 if (!LHSType->isVectorType() || !RHSType->isVectorType()) { 745 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 746 << SourceRange(TheCall->getArg(0)->getLocStart(), 747 TheCall->getArg(1)->getLocEnd()); 748 return ExprError(); 749 } 750 751 numElements = LHSType->getAs<VectorType>()->getNumElements(); 752 unsigned numResElements = TheCall->getNumArgs() - 2; 753 754 // Check to see if we have a call with 2 vector arguments, the unary shuffle 755 // with mask. If so, verify that RHS is an integer vector type with the 756 // same number of elts as lhs. 757 if (TheCall->getNumArgs() == 2) { 758 if (!RHSType->hasIntegerRepresentation() || 759 RHSType->getAs<VectorType>()->getNumElements() != numElements) 760 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 761 << SourceRange(TheCall->getArg(1)->getLocStart(), 762 TheCall->getArg(1)->getLocEnd()); 763 numResElements = numElements; 764 } 765 else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) { 766 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 767 << SourceRange(TheCall->getArg(0)->getLocStart(), 768 TheCall->getArg(1)->getLocEnd()); 769 return ExprError(); 770 } else if (numElements != numResElements) { 771 QualType eltType = LHSType->getAs<VectorType>()->getElementType(); 772 resType = Context.getVectorType(eltType, numResElements, 773 VectorType::GenericVector); 774 } 775 } 776 777 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 778 if (TheCall->getArg(i)->isTypeDependent() || 779 TheCall->getArg(i)->isValueDependent()) 780 continue; 781 782 llvm::APSInt Result(32); 783 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 784 return ExprError(Diag(TheCall->getLocStart(), 785 diag::err_shufflevector_nonconstant_argument) 786 << TheCall->getArg(i)->getSourceRange()); 787 788 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 789 return ExprError(Diag(TheCall->getLocStart(), 790 diag::err_shufflevector_argument_too_large) 791 << TheCall->getArg(i)->getSourceRange()); 792 } 793 794 llvm::SmallVector<Expr*, 32> exprs; 795 796 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 797 exprs.push_back(TheCall->getArg(i)); 798 TheCall->setArg(i, 0); 799 } 800 801 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 802 exprs.size(), resType, 803 TheCall->getCallee()->getLocStart(), 804 TheCall->getRParenLoc())); 805 } 806 807 /// SemaBuiltinPrefetch - Handle __builtin_prefetch. 808 // This is declared to take (const void*, ...) and can take two 809 // optional constant int args. 810 bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 811 unsigned NumArgs = TheCall->getNumArgs(); 812 813 if (NumArgs > 3) 814 return Diag(TheCall->getLocEnd(), 815 diag::err_typecheck_call_too_many_args_at_most) 816 << 0 /*function call*/ << 3 << NumArgs 817 << TheCall->getSourceRange(); 818 819 // Argument 0 is checked for us and the remaining arguments must be 820 // constant integers. 821 for (unsigned i = 1; i != NumArgs; ++i) { 822 Expr *Arg = TheCall->getArg(i); 823 824 llvm::APSInt Result; 825 if (SemaBuiltinConstantArg(TheCall, i, Result)) 826 return true; 827 828 // FIXME: gcc issues a warning and rewrites these to 0. These 829 // seems especially odd for the third argument since the default 830 // is 3. 831 if (i == 1) { 832 if (Result.getLimitedValue() > 1) 833 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 834 << "0" << "1" << Arg->getSourceRange(); 835 } else { 836 if (Result.getLimitedValue() > 3) 837 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 838 << "0" << "3" << Arg->getSourceRange(); 839 } 840 } 841 842 return false; 843 } 844 845 /// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr 846 /// TheCall is a constant expression. 847 bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, 848 llvm::APSInt &Result) { 849 Expr *Arg = TheCall->getArg(ArgNum); 850 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 851 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 852 853 if (Arg->isTypeDependent() || Arg->isValueDependent()) return false; 854 855 if (!Arg->isIntegerConstantExpr(Result, Context)) 856 return Diag(TheCall->getLocStart(), diag::err_constant_integer_arg_type) 857 << FDecl->getDeclName() << Arg->getSourceRange(); 858 859 return false; 860 } 861 862 /// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 863 /// int type). This simply type checks that type is one of the defined 864 /// constants (0-3). 865 // For compatability check 0-3, llvm only handles 0 and 2. 866 bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 867 llvm::APSInt Result; 868 869 // Check constant-ness first. 870 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 871 return true; 872 873 Expr *Arg = TheCall->getArg(1); 874 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 875 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 876 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 877 } 878 879 return false; 880 } 881 882 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 883 /// This checks that val is a constant 1. 884 bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 885 Expr *Arg = TheCall->getArg(1); 886 llvm::APSInt Result; 887 888 // TODO: This is less than ideal. Overload this to take a value. 889 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 890 return true; 891 892 if (Result != 1) 893 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 894 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 895 896 return false; 897 } 898 899 // Handle i > 1 ? "x" : "y", recursively. 900 bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 901 bool HasVAListArg, 902 unsigned format_idx, unsigned firstDataArg, 903 bool isPrintf) { 904 tryAgain: 905 if (E->isTypeDependent() || E->isValueDependent()) 906 return false; 907 908 E = E->IgnoreParens(); 909 910 switch (E->getStmtClass()) { 911 case Stmt::BinaryConditionalOperatorClass: 912 case Stmt::ConditionalOperatorClass: { 913 const AbstractConditionalOperator *C = cast<AbstractConditionalOperator>(E); 914 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg, 915 format_idx, firstDataArg, isPrintf) 916 && SemaCheckStringLiteral(C->getFalseExpr(), TheCall, HasVAListArg, 917 format_idx, firstDataArg, isPrintf); 918 } 919 920 case Stmt::IntegerLiteralClass: 921 // Technically -Wformat-nonliteral does not warn about this case. 922 // The behavior of printf and friends in this case is implementation 923 // dependent. Ideally if the format string cannot be null then 924 // it should have a 'nonnull' attribute in the function prototype. 925 return true; 926 927 case Stmt::ImplicitCastExprClass: { 928 E = cast<ImplicitCastExpr>(E)->getSubExpr(); 929 goto tryAgain; 930 } 931 932 case Stmt::OpaqueValueExprClass: 933 if (const Expr *src = cast<OpaqueValueExpr>(E)->getSourceExpr()) { 934 E = src; 935 goto tryAgain; 936 } 937 return false; 938 939 case Stmt::PredefinedExprClass: 940 // While __func__, etc., are technically not string literals, they 941 // cannot contain format specifiers and thus are not a security 942 // liability. 943 return true; 944 945 case Stmt::DeclRefExprClass: { 946 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 947 948 // As an exception, do not flag errors for variables binding to 949 // const string literals. 950 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 951 bool isConstant = false; 952 QualType T = DR->getType(); 953 954 if (const ArrayType *AT = Context.getAsArrayType(T)) { 955 isConstant = AT->getElementType().isConstant(Context); 956 } else if (const PointerType *PT = T->getAs<PointerType>()) { 957 isConstant = T.isConstant(Context) && 958 PT->getPointeeType().isConstant(Context); 959 } 960 961 if (isConstant) { 962 if (const Expr *Init = VD->getAnyInitializer()) 963 return SemaCheckStringLiteral(Init, TheCall, 964 HasVAListArg, format_idx, firstDataArg, 965 isPrintf); 966 } 967 968 // For vprintf* functions (i.e., HasVAListArg==true), we add a 969 // special check to see if the format string is a function parameter 970 // of the function calling the printf function. If the function 971 // has an attribute indicating it is a printf-like function, then we 972 // should suppress warnings concerning non-literals being used in a call 973 // to a vprintf function. For example: 974 // 975 // void 976 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 977 // va_list ap; 978 // va_start(ap, fmt); 979 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 980 // ... 981 // 982 // 983 // FIXME: We don't have full attribute support yet, so just check to see 984 // if the argument is a DeclRefExpr that references a parameter. We'll 985 // add proper support for checking the attribute later. 986 if (HasVAListArg) 987 if (isa<ParmVarDecl>(VD)) 988 return true; 989 } 990 991 return false; 992 } 993 994 case Stmt::CallExprClass: { 995 const CallExpr *CE = cast<CallExpr>(E); 996 if (const ImplicitCastExpr *ICE 997 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 998 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 999 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 1000 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 1001 unsigned ArgIndex = FA->getFormatIdx(); 1002 const Expr *Arg = CE->getArg(ArgIndex - 1); 1003 1004 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 1005 format_idx, firstDataArg, isPrintf); 1006 } 1007 } 1008 } 1009 } 1010 1011 return false; 1012 } 1013 case Stmt::ObjCStringLiteralClass: 1014 case Stmt::StringLiteralClass: { 1015 const StringLiteral *StrE = NULL; 1016 1017 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 1018 StrE = ObjCFExpr->getString(); 1019 else 1020 StrE = cast<StringLiteral>(E); 1021 1022 if (StrE) { 1023 CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx, 1024 firstDataArg, isPrintf); 1025 return true; 1026 } 1027 1028 return false; 1029 } 1030 1031 default: 1032 return false; 1033 } 1034 } 1035 1036 void 1037 Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 1038 const Expr * const *ExprArgs, 1039 SourceLocation CallSiteLoc) { 1040 for (NonNullAttr::args_iterator i = NonNull->args_begin(), 1041 e = NonNull->args_end(); 1042 i != e; ++i) { 1043 const Expr *ArgExpr = ExprArgs[*i]; 1044 if (ArgExpr->isNullPointerConstant(Context, 1045 Expr::NPC_ValueDependentIsNotNull)) 1046 Diag(CallSiteLoc, diag::warn_null_arg) << ArgExpr->getSourceRange(); 1047 } 1048 } 1049 1050 /// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar 1051 /// functions) for correct use of format strings. 1052 void 1053 Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg, 1054 unsigned format_idx, unsigned firstDataArg, 1055 bool isPrintf) { 1056 1057 const Expr *Fn = TheCall->getCallee(); 1058 1059 // The way the format attribute works in GCC, the implicit this argument 1060 // of member functions is counted. However, it doesn't appear in our own 1061 // lists, so decrement format_idx in that case. 1062 if (isa<CXXMemberCallExpr>(TheCall)) { 1063 const CXXMethodDecl *method_decl = 1064 dyn_cast<CXXMethodDecl>(TheCall->getCalleeDecl()); 1065 if (method_decl && method_decl->isInstance()) { 1066 // Catch a format attribute mistakenly referring to the object argument. 1067 if (format_idx == 0) 1068 return; 1069 --format_idx; 1070 if(firstDataArg != 0) 1071 --firstDataArg; 1072 } 1073 } 1074 1075 // CHECK: printf/scanf-like function is called with no format string. 1076 if (format_idx >= TheCall->getNumArgs()) { 1077 Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string) 1078 << Fn->getSourceRange(); 1079 return; 1080 } 1081 1082 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 1083 1084 // CHECK: format string is not a string literal. 1085 // 1086 // Dynamically generated format strings are difficult to 1087 // automatically vet at compile time. Requiring that format strings 1088 // are string literals: (1) permits the checking of format strings by 1089 // the compiler and thereby (2) can practically remove the source of 1090 // many format string exploits. 1091 1092 // Format string can be either ObjC string (e.g. @"%d") or 1093 // C string (e.g. "%d") 1094 // ObjC string uses the same format specifiers as C string, so we can use 1095 // the same format string checking logic for both ObjC and C strings. 1096 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1097 firstDataArg, isPrintf)) 1098 return; // Literal format string found, check done! 1099 1100 // If there are no arguments specified, warn with -Wformat-security, otherwise 1101 // warn only with -Wformat-nonliteral. 1102 if (TheCall->getNumArgs() == format_idx+1) 1103 Diag(TheCall->getArg(format_idx)->getLocStart(), 1104 diag::warn_format_nonliteral_noargs) 1105 << OrigFormatExpr->getSourceRange(); 1106 else 1107 Diag(TheCall->getArg(format_idx)->getLocStart(), 1108 diag::warn_format_nonliteral) 1109 << OrigFormatExpr->getSourceRange(); 1110 } 1111 1112 namespace { 1113 class CheckFormatHandler : public analyze_format_string::FormatStringHandler { 1114 protected: 1115 Sema &S; 1116 const StringLiteral *FExpr; 1117 const Expr *OrigFormatExpr; 1118 const unsigned FirstDataArg; 1119 const unsigned NumDataArgs; 1120 const bool IsObjCLiteral; 1121 const char *Beg; // Start of format string. 1122 const bool HasVAListArg; 1123 const CallExpr *TheCall; 1124 unsigned FormatIdx; 1125 llvm::BitVector CoveredArgs; 1126 bool usesPositionalArgs; 1127 bool atFirstArg; 1128 public: 1129 CheckFormatHandler(Sema &s, const StringLiteral *fexpr, 1130 const Expr *origFormatExpr, unsigned firstDataArg, 1131 unsigned numDataArgs, bool isObjCLiteral, 1132 const char *beg, bool hasVAListArg, 1133 const CallExpr *theCall, unsigned formatIdx) 1134 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1135 FirstDataArg(firstDataArg), 1136 NumDataArgs(numDataArgs), 1137 IsObjCLiteral(isObjCLiteral), Beg(beg), 1138 HasVAListArg(hasVAListArg), 1139 TheCall(theCall), FormatIdx(formatIdx), 1140 usesPositionalArgs(false), atFirstArg(true) { 1141 CoveredArgs.resize(numDataArgs); 1142 CoveredArgs.reset(); 1143 } 1144 1145 void DoneProcessing(); 1146 1147 void HandleIncompleteSpecifier(const char *startSpecifier, 1148 unsigned specifierLen); 1149 1150 virtual void HandleInvalidPosition(const char *startSpecifier, 1151 unsigned specifierLen, 1152 analyze_format_string::PositionContext p); 1153 1154 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1155 1156 void HandleNullChar(const char *nullCharacter); 1157 1158 protected: 1159 bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, 1160 const char *startSpec, 1161 unsigned specifierLen, 1162 const char *csStart, unsigned csLen); 1163 1164 SourceRange getFormatStringRange(); 1165 CharSourceRange getSpecifierRange(const char *startSpecifier, 1166 unsigned specifierLen); 1167 SourceLocation getLocationOfByte(const char *x); 1168 1169 const Expr *getDataArg(unsigned i) const; 1170 1171 bool CheckNumArgs(const analyze_format_string::FormatSpecifier &FS, 1172 const analyze_format_string::ConversionSpecifier &CS, 1173 const char *startSpecifier, unsigned specifierLen, 1174 unsigned argIndex); 1175 }; 1176 } 1177 1178 SourceRange CheckFormatHandler::getFormatStringRange() { 1179 return OrigFormatExpr->getSourceRange(); 1180 } 1181 1182 CharSourceRange CheckFormatHandler:: 1183 getSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1184 SourceLocation Start = getLocationOfByte(startSpecifier); 1185 SourceLocation End = getLocationOfByte(startSpecifier + specifierLen - 1); 1186 1187 // Advance the end SourceLocation by one due to half-open ranges. 1188 End = End.getFileLocWithOffset(1); 1189 1190 return CharSourceRange::getCharRange(Start, End); 1191 } 1192 1193 SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { 1194 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1195 } 1196 1197 void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, 1198 unsigned specifierLen){ 1199 SourceLocation Loc = getLocationOfByte(startSpecifier); 1200 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1201 << getSpecifierRange(startSpecifier, specifierLen); 1202 } 1203 1204 void 1205 CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1206 analyze_format_string::PositionContext p) { 1207 SourceLocation Loc = getLocationOfByte(startPos); 1208 S.Diag(Loc, diag::warn_format_invalid_positional_specifier) 1209 << (unsigned) p << getSpecifierRange(startPos, posLen); 1210 } 1211 1212 void CheckFormatHandler::HandleZeroPosition(const char *startPos, 1213 unsigned posLen) { 1214 SourceLocation Loc = getLocationOfByte(startPos); 1215 S.Diag(Loc, diag::warn_format_zero_positional_specifier) 1216 << getSpecifierRange(startPos, posLen); 1217 } 1218 1219 void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { 1220 if (!IsObjCLiteral) { 1221 // The presence of a null character is likely an error. 1222 S.Diag(getLocationOfByte(nullCharacter), 1223 diag::warn_printf_format_string_contains_null_char) 1224 << getFormatStringRange(); 1225 } 1226 } 1227 1228 const Expr *CheckFormatHandler::getDataArg(unsigned i) const { 1229 return TheCall->getArg(FirstDataArg + i); 1230 } 1231 1232 void CheckFormatHandler::DoneProcessing() { 1233 // Does the number of data arguments exceed the number of 1234 // format conversions in the format string? 1235 if (!HasVAListArg) { 1236 // Find any arguments that weren't covered. 1237 CoveredArgs.flip(); 1238 signed notCoveredArg = CoveredArgs.find_first(); 1239 if (notCoveredArg >= 0) { 1240 assert((unsigned)notCoveredArg < NumDataArgs); 1241 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1242 diag::warn_printf_data_arg_not_used) 1243 << getFormatStringRange(); 1244 } 1245 } 1246 } 1247 1248 bool 1249 CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, 1250 SourceLocation Loc, 1251 const char *startSpec, 1252 unsigned specifierLen, 1253 const char *csStart, 1254 unsigned csLen) { 1255 1256 bool keepGoing = true; 1257 if (argIndex < NumDataArgs) { 1258 // Consider the argument coverered, even though the specifier doesn't 1259 // make sense. 1260 CoveredArgs.set(argIndex); 1261 } 1262 else { 1263 // If argIndex exceeds the number of data arguments we 1264 // don't issue a warning because that is just a cascade of warnings (and 1265 // they may have intended '%%' anyway). We don't want to continue processing 1266 // the format string after this point, however, as we will like just get 1267 // gibberish when trying to match arguments. 1268 keepGoing = false; 1269 } 1270 1271 S.Diag(Loc, diag::warn_format_invalid_conversion) 1272 << llvm::StringRef(csStart, csLen) 1273 << getSpecifierRange(startSpec, specifierLen); 1274 1275 return keepGoing; 1276 } 1277 1278 bool 1279 CheckFormatHandler::CheckNumArgs( 1280 const analyze_format_string::FormatSpecifier &FS, 1281 const analyze_format_string::ConversionSpecifier &CS, 1282 const char *startSpecifier, unsigned specifierLen, unsigned argIndex) { 1283 1284 if (argIndex >= NumDataArgs) { 1285 if (FS.usesPositionalArg()) { 1286 S.Diag(getLocationOfByte(CS.getStart()), 1287 diag::warn_printf_positional_arg_exceeds_data_args) 1288 << (argIndex+1) << NumDataArgs 1289 << getSpecifierRange(startSpecifier, specifierLen); 1290 } 1291 else { 1292 S.Diag(getLocationOfByte(CS.getStart()), 1293 diag::warn_printf_insufficient_data_args) 1294 << getSpecifierRange(startSpecifier, specifierLen); 1295 } 1296 1297 return false; 1298 } 1299 return true; 1300 } 1301 1302 //===--- CHECK: Printf format string checking ------------------------------===// 1303 1304 namespace { 1305 class CheckPrintfHandler : public CheckFormatHandler { 1306 public: 1307 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1308 const Expr *origFormatExpr, unsigned firstDataArg, 1309 unsigned numDataArgs, bool isObjCLiteral, 1310 const char *beg, bool hasVAListArg, 1311 const CallExpr *theCall, unsigned formatIdx) 1312 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1313 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1314 theCall, formatIdx) {} 1315 1316 1317 bool HandleInvalidPrintfConversionSpecifier( 1318 const analyze_printf::PrintfSpecifier &FS, 1319 const char *startSpecifier, 1320 unsigned specifierLen); 1321 1322 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 1323 const char *startSpecifier, 1324 unsigned specifierLen); 1325 1326 bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k, 1327 const char *startSpecifier, unsigned specifierLen); 1328 void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS, 1329 const analyze_printf::OptionalAmount &Amt, 1330 unsigned type, 1331 const char *startSpecifier, unsigned specifierLen); 1332 void HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1333 const analyze_printf::OptionalFlag &flag, 1334 const char *startSpecifier, unsigned specifierLen); 1335 void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS, 1336 const analyze_printf::OptionalFlag &ignoredFlag, 1337 const analyze_printf::OptionalFlag &flag, 1338 const char *startSpecifier, unsigned specifierLen); 1339 }; 1340 } 1341 1342 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier( 1343 const analyze_printf::PrintfSpecifier &FS, 1344 const char *startSpecifier, 1345 unsigned specifierLen) { 1346 const analyze_printf::PrintfConversionSpecifier &CS = 1347 FS.getConversionSpecifier(); 1348 1349 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1350 getLocationOfByte(CS.getStart()), 1351 startSpecifier, specifierLen, 1352 CS.getStart(), CS.getLength()); 1353 } 1354 1355 bool CheckPrintfHandler::HandleAmount( 1356 const analyze_format_string::OptionalAmount &Amt, 1357 unsigned k, const char *startSpecifier, 1358 unsigned specifierLen) { 1359 1360 if (Amt.hasDataArgument()) { 1361 if (!HasVAListArg) { 1362 unsigned argIndex = Amt.getArgIndex(); 1363 if (argIndex >= NumDataArgs) { 1364 S.Diag(getLocationOfByte(Amt.getStart()), 1365 diag::warn_printf_asterisk_missing_arg) 1366 << k << getSpecifierRange(startSpecifier, specifierLen); 1367 // Don't do any more checking. We will just emit 1368 // spurious errors. 1369 return false; 1370 } 1371 1372 // Type check the data argument. It should be an 'int'. 1373 // Although not in conformance with C99, we also allow the argument to be 1374 // an 'unsigned int' as that is a reasonably safe case. GCC also 1375 // doesn't emit a warning for that case. 1376 CoveredArgs.set(argIndex); 1377 const Expr *Arg = getDataArg(argIndex); 1378 QualType T = Arg->getType(); 1379 1380 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1381 assert(ATR.isValid()); 1382 1383 if (!ATR.matchesType(S.Context, T)) { 1384 S.Diag(getLocationOfByte(Amt.getStart()), 1385 diag::warn_printf_asterisk_wrong_type) 1386 << k 1387 << ATR.getRepresentativeType(S.Context) << T 1388 << getSpecifierRange(startSpecifier, specifierLen) 1389 << Arg->getSourceRange(); 1390 // Don't do any more checking. We will just emit 1391 // spurious errors. 1392 return false; 1393 } 1394 } 1395 } 1396 return true; 1397 } 1398 1399 void CheckPrintfHandler::HandleInvalidAmount( 1400 const analyze_printf::PrintfSpecifier &FS, 1401 const analyze_printf::OptionalAmount &Amt, 1402 unsigned type, 1403 const char *startSpecifier, 1404 unsigned specifierLen) { 1405 const analyze_printf::PrintfConversionSpecifier &CS = 1406 FS.getConversionSpecifier(); 1407 switch (Amt.getHowSpecified()) { 1408 case analyze_printf::OptionalAmount::Constant: 1409 S.Diag(getLocationOfByte(Amt.getStart()), 1410 diag::warn_printf_nonsensical_optional_amount) 1411 << type 1412 << CS.toString() 1413 << getSpecifierRange(startSpecifier, specifierLen) 1414 << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(), 1415 Amt.getConstantLength())); 1416 break; 1417 1418 default: 1419 S.Diag(getLocationOfByte(Amt.getStart()), 1420 diag::warn_printf_nonsensical_optional_amount) 1421 << type 1422 << CS.toString() 1423 << getSpecifierRange(startSpecifier, specifierLen); 1424 break; 1425 } 1426 } 1427 1428 void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1429 const analyze_printf::OptionalFlag &flag, 1430 const char *startSpecifier, 1431 unsigned specifierLen) { 1432 // Warn about pointless flag with a fixit removal. 1433 const analyze_printf::PrintfConversionSpecifier &CS = 1434 FS.getConversionSpecifier(); 1435 S.Diag(getLocationOfByte(flag.getPosition()), 1436 diag::warn_printf_nonsensical_flag) 1437 << flag.toString() << CS.toString() 1438 << getSpecifierRange(startSpecifier, specifierLen) 1439 << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1)); 1440 } 1441 1442 void CheckPrintfHandler::HandleIgnoredFlag( 1443 const analyze_printf::PrintfSpecifier &FS, 1444 const analyze_printf::OptionalFlag &ignoredFlag, 1445 const analyze_printf::OptionalFlag &flag, 1446 const char *startSpecifier, 1447 unsigned specifierLen) { 1448 // Warn about ignored flag with a fixit removal. 1449 S.Diag(getLocationOfByte(ignoredFlag.getPosition()), 1450 diag::warn_printf_ignored_flag) 1451 << ignoredFlag.toString() << flag.toString() 1452 << getSpecifierRange(startSpecifier, specifierLen) 1453 << FixItHint::CreateRemoval(getSpecifierRange( 1454 ignoredFlag.getPosition(), 1)); 1455 } 1456 1457 bool 1458 CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier 1459 &FS, 1460 const char *startSpecifier, 1461 unsigned specifierLen) { 1462 1463 using namespace analyze_format_string; 1464 using namespace analyze_printf; 1465 const PrintfConversionSpecifier &CS = FS.getConversionSpecifier(); 1466 1467 if (FS.consumesDataArgument()) { 1468 if (atFirstArg) { 1469 atFirstArg = false; 1470 usesPositionalArgs = FS.usesPositionalArg(); 1471 } 1472 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1473 // Cannot mix-and-match positional and non-positional arguments. 1474 S.Diag(getLocationOfByte(CS.getStart()), 1475 diag::warn_format_mix_positional_nonpositional_args) 1476 << getSpecifierRange(startSpecifier, specifierLen); 1477 return false; 1478 } 1479 } 1480 1481 // First check if the field width, precision, and conversion specifier 1482 // have matching data arguments. 1483 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1484 startSpecifier, specifierLen)) { 1485 return false; 1486 } 1487 1488 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1489 startSpecifier, specifierLen)) { 1490 return false; 1491 } 1492 1493 if (!CS.consumesDataArgument()) { 1494 // FIXME: Technically specifying a precision or field width here 1495 // makes no sense. Worth issuing a warning at some point. 1496 return true; 1497 } 1498 1499 // Consume the argument. 1500 unsigned argIndex = FS.getArgIndex(); 1501 if (argIndex < NumDataArgs) { 1502 // The check to see if the argIndex is valid will come later. 1503 // We set the bit here because we may exit early from this 1504 // function if we encounter some other error. 1505 CoveredArgs.set(argIndex); 1506 } 1507 1508 // Check for using an Objective-C specific conversion specifier 1509 // in a non-ObjC literal. 1510 if (!IsObjCLiteral && CS.isObjCArg()) { 1511 return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, 1512 specifierLen); 1513 } 1514 1515 // Check for invalid use of field width 1516 if (!FS.hasValidFieldWidth()) { 1517 HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0, 1518 startSpecifier, specifierLen); 1519 } 1520 1521 // Check for invalid use of precision 1522 if (!FS.hasValidPrecision()) { 1523 HandleInvalidAmount(FS, FS.getPrecision(), /* precision */ 1, 1524 startSpecifier, specifierLen); 1525 } 1526 1527 // Check each flag does not conflict with any other component. 1528 if (!FS.hasValidThousandsGroupingPrefix()) 1529 HandleFlag(FS, FS.hasThousandsGrouping(), startSpecifier, specifierLen); 1530 if (!FS.hasValidLeadingZeros()) 1531 HandleFlag(FS, FS.hasLeadingZeros(), startSpecifier, specifierLen); 1532 if (!FS.hasValidPlusPrefix()) 1533 HandleFlag(FS, FS.hasPlusPrefix(), startSpecifier, specifierLen); 1534 if (!FS.hasValidSpacePrefix()) 1535 HandleFlag(FS, FS.hasSpacePrefix(), startSpecifier, specifierLen); 1536 if (!FS.hasValidAlternativeForm()) 1537 HandleFlag(FS, FS.hasAlternativeForm(), startSpecifier, specifierLen); 1538 if (!FS.hasValidLeftJustified()) 1539 HandleFlag(FS, FS.isLeftJustified(), startSpecifier, specifierLen); 1540 1541 // Check that flags are not ignored by another flag 1542 if (FS.hasSpacePrefix() && FS.hasPlusPrefix()) // ' ' ignored by '+' 1543 HandleIgnoredFlag(FS, FS.hasSpacePrefix(), FS.hasPlusPrefix(), 1544 startSpecifier, specifierLen); 1545 if (FS.hasLeadingZeros() && FS.isLeftJustified()) // '0' ignored by '-' 1546 HandleIgnoredFlag(FS, FS.hasLeadingZeros(), FS.isLeftJustified(), 1547 startSpecifier, specifierLen); 1548 1549 // Check the length modifier is valid with the given conversion specifier. 1550 const LengthModifier &LM = FS.getLengthModifier(); 1551 if (!FS.hasValidLengthModifier()) 1552 S.Diag(getLocationOfByte(LM.getStart()), 1553 diag::warn_format_nonsensical_length) 1554 << LM.toString() << CS.toString() 1555 << getSpecifierRange(startSpecifier, specifierLen) 1556 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1557 LM.getLength())); 1558 1559 // Are we using '%n'? 1560 if (CS.getKind() == ConversionSpecifier::nArg) { 1561 // Issue a warning about this being a possible security issue. 1562 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1563 << getSpecifierRange(startSpecifier, specifierLen); 1564 // Continue checking the other format specifiers. 1565 return true; 1566 } 1567 1568 // The remaining checks depend on the data arguments. 1569 if (HasVAListArg) 1570 return true; 1571 1572 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1573 return false; 1574 1575 // Now type check the data expression that matches the 1576 // format specifier. 1577 const Expr *Ex = getDataArg(argIndex); 1578 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1579 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1580 // Check if we didn't match because of an implicit cast from a 'char' 1581 // or 'short' to an 'int'. This is done because printf is a varargs 1582 // function. 1583 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1584 if (ICE->getType() == S.Context.IntTy) { 1585 // All further checking is done on the subexpression. 1586 Ex = ICE->getSubExpr(); 1587 if (ATR.matchesType(S.Context, Ex->getType())) 1588 return true; 1589 } 1590 1591 // We may be able to offer a FixItHint if it is a supported type. 1592 PrintfSpecifier fixedFS = FS; 1593 bool success = fixedFS.fixType(Ex->getType()); 1594 1595 if (success) { 1596 // Get the fix string from the fixed format specifier 1597 llvm::SmallString<128> buf; 1598 llvm::raw_svector_ostream os(buf); 1599 fixedFS.toString(os); 1600 1601 // FIXME: getRepresentativeType() perhaps should return a string 1602 // instead of a QualType to better handle when the representative 1603 // type is 'wint_t' (which is defined in the system headers). 1604 S.Diag(getLocationOfByte(CS.getStart()), 1605 diag::warn_printf_conversion_argument_type_mismatch) 1606 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1607 << getSpecifierRange(startSpecifier, specifierLen) 1608 << Ex->getSourceRange() 1609 << FixItHint::CreateReplacement( 1610 getSpecifierRange(startSpecifier, specifierLen), 1611 os.str()); 1612 } 1613 else { 1614 S.Diag(getLocationOfByte(CS.getStart()), 1615 diag::warn_printf_conversion_argument_type_mismatch) 1616 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1617 << getSpecifierRange(startSpecifier, specifierLen) 1618 << Ex->getSourceRange(); 1619 } 1620 } 1621 1622 return true; 1623 } 1624 1625 //===--- CHECK: Scanf format string checking ------------------------------===// 1626 1627 namespace { 1628 class CheckScanfHandler : public CheckFormatHandler { 1629 public: 1630 CheckScanfHandler(Sema &s, const StringLiteral *fexpr, 1631 const Expr *origFormatExpr, unsigned firstDataArg, 1632 unsigned numDataArgs, bool isObjCLiteral, 1633 const char *beg, bool hasVAListArg, 1634 const CallExpr *theCall, unsigned formatIdx) 1635 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1636 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1637 theCall, formatIdx) {} 1638 1639 bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 1640 const char *startSpecifier, 1641 unsigned specifierLen); 1642 1643 bool HandleInvalidScanfConversionSpecifier( 1644 const analyze_scanf::ScanfSpecifier &FS, 1645 const char *startSpecifier, 1646 unsigned specifierLen); 1647 1648 void HandleIncompleteScanList(const char *start, const char *end); 1649 }; 1650 } 1651 1652 void CheckScanfHandler::HandleIncompleteScanList(const char *start, 1653 const char *end) { 1654 S.Diag(getLocationOfByte(end), diag::warn_scanf_scanlist_incomplete) 1655 << getSpecifierRange(start, end - start); 1656 } 1657 1658 bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( 1659 const analyze_scanf::ScanfSpecifier &FS, 1660 const char *startSpecifier, 1661 unsigned specifierLen) { 1662 1663 const analyze_scanf::ScanfConversionSpecifier &CS = 1664 FS.getConversionSpecifier(); 1665 1666 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1667 getLocationOfByte(CS.getStart()), 1668 startSpecifier, specifierLen, 1669 CS.getStart(), CS.getLength()); 1670 } 1671 1672 bool CheckScanfHandler::HandleScanfSpecifier( 1673 const analyze_scanf::ScanfSpecifier &FS, 1674 const char *startSpecifier, 1675 unsigned specifierLen) { 1676 1677 using namespace analyze_scanf; 1678 using namespace analyze_format_string; 1679 1680 const ScanfConversionSpecifier &CS = FS.getConversionSpecifier(); 1681 1682 // Handle case where '%' and '*' don't consume an argument. These shouldn't 1683 // be used to decide if we are using positional arguments consistently. 1684 if (FS.consumesDataArgument()) { 1685 if (atFirstArg) { 1686 atFirstArg = false; 1687 usesPositionalArgs = FS.usesPositionalArg(); 1688 } 1689 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1690 // Cannot mix-and-match positional and non-positional arguments. 1691 S.Diag(getLocationOfByte(CS.getStart()), 1692 diag::warn_format_mix_positional_nonpositional_args) 1693 << getSpecifierRange(startSpecifier, specifierLen); 1694 return false; 1695 } 1696 } 1697 1698 // Check if the field with is non-zero. 1699 const OptionalAmount &Amt = FS.getFieldWidth(); 1700 if (Amt.getHowSpecified() == OptionalAmount::Constant) { 1701 if (Amt.getConstantAmount() == 0) { 1702 const CharSourceRange &R = getSpecifierRange(Amt.getStart(), 1703 Amt.getConstantLength()); 1704 S.Diag(getLocationOfByte(Amt.getStart()), 1705 diag::warn_scanf_nonzero_width) 1706 << R << FixItHint::CreateRemoval(R); 1707 } 1708 } 1709 1710 if (!FS.consumesDataArgument()) { 1711 // FIXME: Technically specifying a precision or field width here 1712 // makes no sense. Worth issuing a warning at some point. 1713 return true; 1714 } 1715 1716 // Consume the argument. 1717 unsigned argIndex = FS.getArgIndex(); 1718 if (argIndex < NumDataArgs) { 1719 // The check to see if the argIndex is valid will come later. 1720 // We set the bit here because we may exit early from this 1721 // function if we encounter some other error. 1722 CoveredArgs.set(argIndex); 1723 } 1724 1725 // Check the length modifier is valid with the given conversion specifier. 1726 const LengthModifier &LM = FS.getLengthModifier(); 1727 if (!FS.hasValidLengthModifier()) { 1728 S.Diag(getLocationOfByte(LM.getStart()), 1729 diag::warn_format_nonsensical_length) 1730 << LM.toString() << CS.toString() 1731 << getSpecifierRange(startSpecifier, specifierLen) 1732 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1733 LM.getLength())); 1734 } 1735 1736 // The remaining checks depend on the data arguments. 1737 if (HasVAListArg) 1738 return true; 1739 1740 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1741 return false; 1742 1743 // FIXME: Check that the argument type matches the format specifier. 1744 1745 return true; 1746 } 1747 1748 void Sema::CheckFormatString(const StringLiteral *FExpr, 1749 const Expr *OrigFormatExpr, 1750 const CallExpr *TheCall, bool HasVAListArg, 1751 unsigned format_idx, unsigned firstDataArg, 1752 bool isPrintf) { 1753 1754 // CHECK: is the format string a wide literal? 1755 if (FExpr->isWide()) { 1756 Diag(FExpr->getLocStart(), 1757 diag::warn_format_string_is_wide_literal) 1758 << OrigFormatExpr->getSourceRange(); 1759 return; 1760 } 1761 1762 // Str - The format string. NOTE: this is NOT null-terminated! 1763 llvm::StringRef StrRef = FExpr->getString(); 1764 const char *Str = StrRef.data(); 1765 unsigned StrLen = StrRef.size(); 1766 1767 // CHECK: empty format string? 1768 if (StrLen == 0) { 1769 Diag(FExpr->getLocStart(), diag::warn_empty_format_string) 1770 << OrigFormatExpr->getSourceRange(); 1771 return; 1772 } 1773 1774 if (isPrintf) { 1775 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1776 TheCall->getNumArgs() - firstDataArg, 1777 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1778 HasVAListArg, TheCall, format_idx); 1779 1780 if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen)) 1781 H.DoneProcessing(); 1782 } 1783 else { 1784 CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1785 TheCall->getNumArgs() - firstDataArg, 1786 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1787 HasVAListArg, TheCall, format_idx); 1788 1789 if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen)) 1790 H.DoneProcessing(); 1791 } 1792 } 1793 1794 //===--- CHECK: Return Address of Stack Variable --------------------------===// 1795 1796 static Expr *EvalVal(Expr *E, llvm::SmallVectorImpl<DeclRefExpr *> &refVars); 1797 static Expr *EvalAddr(Expr* E, llvm::SmallVectorImpl<DeclRefExpr *> &refVars); 1798 1799 /// CheckReturnStackAddr - Check if a return statement returns the address 1800 /// of a stack variable. 1801 void 1802 Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1803 SourceLocation ReturnLoc) { 1804 1805 Expr *stackE = 0; 1806 llvm::SmallVector<DeclRefExpr *, 8> refVars; 1807 1808 // Perform checking for returned stack addresses, local blocks, 1809 // label addresses or references to temporaries. 1810 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1811 stackE = EvalAddr(RetValExp, refVars); 1812 } else if (lhsType->isReferenceType()) { 1813 stackE = EvalVal(RetValExp, refVars); 1814 } 1815 1816 if (stackE == 0) 1817 return; // Nothing suspicious was found. 1818 1819 SourceLocation diagLoc; 1820 SourceRange diagRange; 1821 if (refVars.empty()) { 1822 diagLoc = stackE->getLocStart(); 1823 diagRange = stackE->getSourceRange(); 1824 } else { 1825 // We followed through a reference variable. 'stackE' contains the 1826 // problematic expression but we will warn at the return statement pointing 1827 // at the reference variable. We will later display the "trail" of 1828 // reference variables using notes. 1829 diagLoc = refVars[0]->getLocStart(); 1830 diagRange = refVars[0]->getSourceRange(); 1831 } 1832 1833 if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(stackE)) { //address of local var. 1834 Diag(diagLoc, lhsType->isReferenceType() ? diag::warn_ret_stack_ref 1835 : diag::warn_ret_stack_addr) 1836 << DR->getDecl()->getDeclName() << diagRange; 1837 } else if (isa<BlockExpr>(stackE)) { // local block. 1838 Diag(diagLoc, diag::err_ret_local_block) << diagRange; 1839 } else if (isa<AddrLabelExpr>(stackE)) { // address of label. 1840 Diag(diagLoc, diag::warn_ret_addr_label) << diagRange; 1841 } else { // local temporary. 1842 Diag(diagLoc, lhsType->isReferenceType() ? diag::warn_ret_local_temp_ref 1843 : diag::warn_ret_local_temp_addr) 1844 << diagRange; 1845 } 1846 1847 // Display the "trail" of reference variables that we followed until we 1848 // found the problematic expression using notes. 1849 for (unsigned i = 0, e = refVars.size(); i != e; ++i) { 1850 VarDecl *VD = cast<VarDecl>(refVars[i]->getDecl()); 1851 // If this var binds to another reference var, show the range of the next 1852 // var, otherwise the var binds to the problematic expression, in which case 1853 // show the range of the expression. 1854 SourceRange range = (i < e-1) ? refVars[i+1]->getSourceRange() 1855 : stackE->getSourceRange(); 1856 Diag(VD->getLocation(), diag::note_ref_var_local_bind) 1857 << VD->getDeclName() << range; 1858 } 1859 } 1860 1861 /// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1862 /// check if the expression in a return statement evaluates to an address 1863 /// to a location on the stack, a local block, an address of a label, or a 1864 /// reference to local temporary. The recursion is used to traverse the 1865 /// AST of the return expression, with recursion backtracking when we 1866 /// encounter a subexpression that (1) clearly does not lead to one of the 1867 /// above problematic expressions (2) is something we cannot determine leads to 1868 /// a problematic expression based on such local checking. 1869 /// 1870 /// Both EvalAddr and EvalVal follow through reference variables to evaluate 1871 /// the expression that they point to. Such variables are added to the 1872 /// 'refVars' vector so that we know what the reference variable "trail" was. 1873 /// 1874 /// EvalAddr processes expressions that are pointers that are used as 1875 /// references (and not L-values). EvalVal handles all other values. 1876 /// At the base case of the recursion is a check for the above problematic 1877 /// expressions. 1878 /// 1879 /// This implementation handles: 1880 /// 1881 /// * pointer-to-pointer casts 1882 /// * implicit conversions from array references to pointers 1883 /// * taking the address of fields 1884 /// * arbitrary interplay between "&" and "*" operators 1885 /// * pointer arithmetic from an address of a stack variable 1886 /// * taking the address of an array element where the array is on the stack 1887 static Expr *EvalAddr(Expr *E, llvm::SmallVectorImpl<DeclRefExpr *> &refVars) { 1888 if (E->isTypeDependent()) 1889 return NULL; 1890 1891 // We should only be called for evaluating pointer expressions. 1892 assert((E->getType()->isAnyPointerType() || 1893 E->getType()->isBlockPointerType() || 1894 E->getType()->isObjCQualifiedIdType()) && 1895 "EvalAddr only works on pointers"); 1896 1897 E = E->IgnoreParens(); 1898 1899 // Our "symbolic interpreter" is just a dispatch off the currently 1900 // viewed AST node. We then recursively traverse the AST by calling 1901 // EvalAddr and EvalVal appropriately. 1902 switch (E->getStmtClass()) { 1903 case Stmt::DeclRefExprClass: { 1904 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1905 1906 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1907 // If this is a reference variable, follow through to the expression that 1908 // it points to. 1909 if (V->hasLocalStorage() && 1910 V->getType()->isReferenceType() && V->hasInit()) { 1911 // Add the reference variable to the "trail". 1912 refVars.push_back(DR); 1913 return EvalAddr(V->getInit(), refVars); 1914 } 1915 1916 return NULL; 1917 } 1918 1919 case Stmt::UnaryOperatorClass: { 1920 // The only unary operator that make sense to handle here 1921 // is AddrOf. All others don't make sense as pointers. 1922 UnaryOperator *U = cast<UnaryOperator>(E); 1923 1924 if (U->getOpcode() == UO_AddrOf) 1925 return EvalVal(U->getSubExpr(), refVars); 1926 else 1927 return NULL; 1928 } 1929 1930 case Stmt::BinaryOperatorClass: { 1931 // Handle pointer arithmetic. All other binary operators are not valid 1932 // in this context. 1933 BinaryOperator *B = cast<BinaryOperator>(E); 1934 BinaryOperatorKind op = B->getOpcode(); 1935 1936 if (op != BO_Add && op != BO_Sub) 1937 return NULL; 1938 1939 Expr *Base = B->getLHS(); 1940 1941 // Determine which argument is the real pointer base. It could be 1942 // the RHS argument instead of the LHS. 1943 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1944 1945 assert (Base->getType()->isPointerType()); 1946 return EvalAddr(Base, refVars); 1947 } 1948 1949 // For conditional operators we need to see if either the LHS or RHS are 1950 // valid DeclRefExpr*s. If one of them is valid, we return it. 1951 case Stmt::ConditionalOperatorClass: { 1952 ConditionalOperator *C = cast<ConditionalOperator>(E); 1953 1954 // Handle the GNU extension for missing LHS. 1955 if (Expr *lhsExpr = C->getLHS()) { 1956 // In C++, we can have a throw-expression, which has 'void' type. 1957 if (!lhsExpr->getType()->isVoidType()) 1958 if (Expr* LHS = EvalAddr(lhsExpr, refVars)) 1959 return LHS; 1960 } 1961 1962 // In C++, we can have a throw-expression, which has 'void' type. 1963 if (C->getRHS()->getType()->isVoidType()) 1964 return NULL; 1965 1966 return EvalAddr(C->getRHS(), refVars); 1967 } 1968 1969 case Stmt::BlockExprClass: 1970 if (cast<BlockExpr>(E)->getBlockDecl()->hasCaptures()) 1971 return E; // local block. 1972 return NULL; 1973 1974 case Stmt::AddrLabelExprClass: 1975 return E; // address of label. 1976 1977 // For casts, we need to handle conversions from arrays to 1978 // pointer values, and pointer-to-pointer conversions. 1979 case Stmt::ImplicitCastExprClass: 1980 case Stmt::CStyleCastExprClass: 1981 case Stmt::CXXFunctionalCastExprClass: { 1982 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1983 QualType T = SubExpr->getType(); 1984 1985 if (SubExpr->getType()->isPointerType() || 1986 SubExpr->getType()->isBlockPointerType() || 1987 SubExpr->getType()->isObjCQualifiedIdType()) 1988 return EvalAddr(SubExpr, refVars); 1989 else if (T->isArrayType()) 1990 return EvalVal(SubExpr, refVars); 1991 else 1992 return 0; 1993 } 1994 1995 // C++ casts. For dynamic casts, static casts, and const casts, we 1996 // are always converting from a pointer-to-pointer, so we just blow 1997 // through the cast. In the case the dynamic cast doesn't fail (and 1998 // return NULL), we take the conservative route and report cases 1999 // where we return the address of a stack variable. For Reinterpre 2000 // FIXME: The comment about is wrong; we're not always converting 2001 // from pointer to pointer. I'm guessing that this code should also 2002 // handle references to objects. 2003 case Stmt::CXXStaticCastExprClass: 2004 case Stmt::CXXDynamicCastExprClass: 2005 case Stmt::CXXConstCastExprClass: 2006 case Stmt::CXXReinterpretCastExprClass: { 2007 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 2008 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 2009 return EvalAddr(S, refVars); 2010 else 2011 return NULL; 2012 } 2013 2014 // Everything else: we simply don't reason about them. 2015 default: 2016 return NULL; 2017 } 2018 } 2019 2020 2021 /// EvalVal - This function is complements EvalAddr in the mutual recursion. 2022 /// See the comments for EvalAddr for more details. 2023 static Expr *EvalVal(Expr *E, llvm::SmallVectorImpl<DeclRefExpr *> &refVars) { 2024 do { 2025 // We should only be called for evaluating non-pointer expressions, or 2026 // expressions with a pointer type that are not used as references but instead 2027 // are l-values (e.g., DeclRefExpr with a pointer type). 2028 2029 // Our "symbolic interpreter" is just a dispatch off the currently 2030 // viewed AST node. We then recursively traverse the AST by calling 2031 // EvalAddr and EvalVal appropriately. 2032 2033 E = E->IgnoreParens(); 2034 switch (E->getStmtClass()) { 2035 case Stmt::ImplicitCastExprClass: { 2036 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 2037 if (IE->getValueKind() == VK_LValue) { 2038 E = IE->getSubExpr(); 2039 continue; 2040 } 2041 return NULL; 2042 } 2043 2044 case Stmt::DeclRefExprClass: { 2045 // When we hit a DeclRefExpr we are looking at code that refers to a 2046 // variable's name. If it's not a reference variable we check if it has 2047 // local storage within the function, and if so, return the expression. 2048 DeclRefExpr *DR = cast<DeclRefExpr>(E); 2049 2050 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 2051 if (V->hasLocalStorage()) { 2052 if (!V->getType()->isReferenceType()) 2053 return DR; 2054 2055 // Reference variable, follow through to the expression that 2056 // it points to. 2057 if (V->hasInit()) { 2058 // Add the reference variable to the "trail". 2059 refVars.push_back(DR); 2060 return EvalVal(V->getInit(), refVars); 2061 } 2062 } 2063 2064 return NULL; 2065 } 2066 2067 case Stmt::UnaryOperatorClass: { 2068 // The only unary operator that make sense to handle here 2069 // is Deref. All others don't resolve to a "name." This includes 2070 // handling all sorts of rvalues passed to a unary operator. 2071 UnaryOperator *U = cast<UnaryOperator>(E); 2072 2073 if (U->getOpcode() == UO_Deref) 2074 return EvalAddr(U->getSubExpr(), refVars); 2075 2076 return NULL; 2077 } 2078 2079 case Stmt::ArraySubscriptExprClass: { 2080 // Array subscripts are potential references to data on the stack. We 2081 // retrieve the DeclRefExpr* for the array variable if it indeed 2082 // has local storage. 2083 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase(), refVars); 2084 } 2085 2086 case Stmt::ConditionalOperatorClass: { 2087 // For conditional operators we need to see if either the LHS or RHS are 2088 // non-NULL Expr's. If one is non-NULL, we return it. 2089 ConditionalOperator *C = cast<ConditionalOperator>(E); 2090 2091 // Handle the GNU extension for missing LHS. 2092 if (Expr *lhsExpr = C->getLHS()) 2093 if (Expr *LHS = EvalVal(lhsExpr, refVars)) 2094 return LHS; 2095 2096 return EvalVal(C->getRHS(), refVars); 2097 } 2098 2099 // Accesses to members are potential references to data on the stack. 2100 case Stmt::MemberExprClass: { 2101 MemberExpr *M = cast<MemberExpr>(E); 2102 2103 // Check for indirect access. We only want direct field accesses. 2104 if (M->isArrow()) 2105 return NULL; 2106 2107 // Check whether the member type is itself a reference, in which case 2108 // we're not going to refer to the member, but to what the member refers to. 2109 if (M->getMemberDecl()->getType()->isReferenceType()) 2110 return NULL; 2111 2112 return EvalVal(M->getBase(), refVars); 2113 } 2114 2115 default: 2116 // Check that we don't return or take the address of a reference to a 2117 // temporary. This is only useful in C++. 2118 if (!E->isTypeDependent() && E->isRValue()) 2119 return E; 2120 2121 // Everything else: we simply don't reason about them. 2122 return NULL; 2123 } 2124 } while (true); 2125 } 2126 2127 //===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 2128 2129 /// Check for comparisons of floating point operands using != and ==. 2130 /// Issue a warning if these are no self-comparisons, as they are not likely 2131 /// to do what the programmer intended. 2132 void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 2133 bool EmitWarning = true; 2134 2135 Expr* LeftExprSansParen = lex->IgnoreParenImpCasts(); 2136 Expr* RightExprSansParen = rex->IgnoreParenImpCasts(); 2137 2138 // Special case: check for x == x (which is OK). 2139 // Do not emit warnings for such cases. 2140 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 2141 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 2142 if (DRL->getDecl() == DRR->getDecl()) 2143 EmitWarning = false; 2144 2145 2146 // Special case: check for comparisons against literals that can be exactly 2147 // represented by APFloat. In such cases, do not emit a warning. This 2148 // is a heuristic: often comparison against such literals are used to 2149 // detect if a value in a variable has not changed. This clearly can 2150 // lead to false negatives. 2151 if (EmitWarning) { 2152 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 2153 if (FLL->isExact()) 2154 EmitWarning = false; 2155 } else 2156 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 2157 if (FLR->isExact()) 2158 EmitWarning = false; 2159 } 2160 } 2161 2162 // Check for comparisons with builtin types. 2163 if (EmitWarning) 2164 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 2165 if (CL->isBuiltinCall(Context)) 2166 EmitWarning = false; 2167 2168 if (EmitWarning) 2169 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 2170 if (CR->isBuiltinCall(Context)) 2171 EmitWarning = false; 2172 2173 // Emit the diagnostic. 2174 if (EmitWarning) 2175 Diag(loc, diag::warn_floatingpoint_eq) 2176 << lex->getSourceRange() << rex->getSourceRange(); 2177 } 2178 2179 //===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 2180 //===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 2181 2182 namespace { 2183 2184 /// Structure recording the 'active' range of an integer-valued 2185 /// expression. 2186 struct IntRange { 2187 /// The number of bits active in the int. 2188 unsigned Width; 2189 2190 /// True if the int is known not to have negative values. 2191 bool NonNegative; 2192 2193 IntRange(unsigned Width, bool NonNegative) 2194 : Width(Width), NonNegative(NonNegative) 2195 {} 2196 2197 /// Returns the range of the bool type. 2198 static IntRange forBoolType() { 2199 return IntRange(1, true); 2200 } 2201 2202 /// Returns the range of an opaque value of the given integral type. 2203 static IntRange forValueOfType(ASTContext &C, QualType T) { 2204 return forValueOfCanonicalType(C, 2205 T->getCanonicalTypeInternal().getTypePtr()); 2206 } 2207 2208 /// Returns the range of an opaque value of a canonical integral type. 2209 static IntRange forValueOfCanonicalType(ASTContext &C, const Type *T) { 2210 assert(T->isCanonicalUnqualified()); 2211 2212 if (const VectorType *VT = dyn_cast<VectorType>(T)) 2213 T = VT->getElementType().getTypePtr(); 2214 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 2215 T = CT->getElementType().getTypePtr(); 2216 2217 // For enum types, use the known bit width of the enumerators. 2218 if (const EnumType *ET = dyn_cast<EnumType>(T)) { 2219 EnumDecl *Enum = ET->getDecl(); 2220 if (!Enum->isDefinition()) 2221 return IntRange(C.getIntWidth(QualType(T, 0)), false); 2222 2223 unsigned NumPositive = Enum->getNumPositiveBits(); 2224 unsigned NumNegative = Enum->getNumNegativeBits(); 2225 2226 return IntRange(std::max(NumPositive, NumNegative), NumNegative == 0); 2227 } 2228 2229 const BuiltinType *BT = cast<BuiltinType>(T); 2230 assert(BT->isInteger()); 2231 2232 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 2233 } 2234 2235 /// Returns the "target" range of a canonical integral type, i.e. 2236 /// the range of values expressible in the type. 2237 /// 2238 /// This matches forValueOfCanonicalType except that enums have the 2239 /// full range of their type, not the range of their enumerators. 2240 static IntRange forTargetOfCanonicalType(ASTContext &C, const Type *T) { 2241 assert(T->isCanonicalUnqualified()); 2242 2243 if (const VectorType *VT = dyn_cast<VectorType>(T)) 2244 T = VT->getElementType().getTypePtr(); 2245 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 2246 T = CT->getElementType().getTypePtr(); 2247 if (const EnumType *ET = dyn_cast<EnumType>(T)) 2248 T = ET->getDecl()->getIntegerType().getTypePtr(); 2249 2250 const BuiltinType *BT = cast<BuiltinType>(T); 2251 assert(BT->isInteger()); 2252 2253 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 2254 } 2255 2256 /// Returns the supremum of two ranges: i.e. their conservative merge. 2257 static IntRange join(IntRange L, IntRange R) { 2258 return IntRange(std::max(L.Width, R.Width), 2259 L.NonNegative && R.NonNegative); 2260 } 2261 2262 /// Returns the infinum of two ranges: i.e. their aggressive merge. 2263 static IntRange meet(IntRange L, IntRange R) { 2264 return IntRange(std::min(L.Width, R.Width), 2265 L.NonNegative || R.NonNegative); 2266 } 2267 }; 2268 2269 IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 2270 if (value.isSigned() && value.isNegative()) 2271 return IntRange(value.getMinSignedBits(), false); 2272 2273 if (value.getBitWidth() > MaxWidth) 2274 value = value.trunc(MaxWidth); 2275 2276 // isNonNegative() just checks the sign bit without considering 2277 // signedness. 2278 return IntRange(value.getActiveBits(), true); 2279 } 2280 2281 IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 2282 unsigned MaxWidth) { 2283 if (result.isInt()) 2284 return GetValueRange(C, result.getInt(), MaxWidth); 2285 2286 if (result.isVector()) { 2287 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 2288 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 2289 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 2290 R = IntRange::join(R, El); 2291 } 2292 return R; 2293 } 2294 2295 if (result.isComplexInt()) { 2296 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 2297 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 2298 return IntRange::join(R, I); 2299 } 2300 2301 // This can happen with lossless casts to intptr_t of "based" lvalues. 2302 // Assume it might use arbitrary bits. 2303 // FIXME: The only reason we need to pass the type in here is to get 2304 // the sign right on this one case. It would be nice if APValue 2305 // preserved this. 2306 assert(result.isLValue()); 2307 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 2308 } 2309 2310 /// Pseudo-evaluate the given integer expression, estimating the 2311 /// range of values it might take. 2312 /// 2313 /// \param MaxWidth - the width to which the value will be truncated 2314 IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 2315 E = E->IgnoreParens(); 2316 2317 // Try a full evaluation first. 2318 Expr::EvalResult result; 2319 if (E->Evaluate(result, C)) 2320 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 2321 2322 // I think we only want to look through implicit casts here; if the 2323 // user has an explicit widening cast, we should treat the value as 2324 // being of the new, wider type. 2325 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 2326 if (CE->getCastKind() == CK_NoOp) 2327 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 2328 2329 IntRange OutputTypeRange = IntRange::forValueOfType(C, CE->getType()); 2330 2331 bool isIntegerCast = (CE->getCastKind() == CK_IntegralCast); 2332 2333 // Assume that non-integer casts can span the full range of the type. 2334 if (!isIntegerCast) 2335 return OutputTypeRange; 2336 2337 IntRange SubRange 2338 = GetExprRange(C, CE->getSubExpr(), 2339 std::min(MaxWidth, OutputTypeRange.Width)); 2340 2341 // Bail out if the subexpr's range is as wide as the cast type. 2342 if (SubRange.Width >= OutputTypeRange.Width) 2343 return OutputTypeRange; 2344 2345 // Otherwise, we take the smaller width, and we're non-negative if 2346 // either the output type or the subexpr is. 2347 return IntRange(SubRange.Width, 2348 SubRange.NonNegative || OutputTypeRange.NonNegative); 2349 } 2350 2351 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 2352 // If we can fold the condition, just take that operand. 2353 bool CondResult; 2354 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 2355 return GetExprRange(C, CondResult ? CO->getTrueExpr() 2356 : CO->getFalseExpr(), 2357 MaxWidth); 2358 2359 // Otherwise, conservatively merge. 2360 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 2361 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 2362 return IntRange::join(L, R); 2363 } 2364 2365 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 2366 switch (BO->getOpcode()) { 2367 2368 // Boolean-valued operations are single-bit and positive. 2369 case BO_LAnd: 2370 case BO_LOr: 2371 case BO_LT: 2372 case BO_GT: 2373 case BO_LE: 2374 case BO_GE: 2375 case BO_EQ: 2376 case BO_NE: 2377 return IntRange::forBoolType(); 2378 2379 // The type of these compound assignments is the type of the LHS, 2380 // so the RHS is not necessarily an integer. 2381 case BO_MulAssign: 2382 case BO_DivAssign: 2383 case BO_RemAssign: 2384 case BO_AddAssign: 2385 case BO_SubAssign: 2386 return IntRange::forValueOfType(C, E->getType()); 2387 2388 // Operations with opaque sources are black-listed. 2389 case BO_PtrMemD: 2390 case BO_PtrMemI: 2391 return IntRange::forValueOfType(C, E->getType()); 2392 2393 // Bitwise-and uses the *infinum* of the two source ranges. 2394 case BO_And: 2395 case BO_AndAssign: 2396 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 2397 GetExprRange(C, BO->getRHS(), MaxWidth)); 2398 2399 // Left shift gets black-listed based on a judgement call. 2400 case BO_Shl: 2401 // ...except that we want to treat '1 << (blah)' as logically 2402 // positive. It's an important idiom. 2403 if (IntegerLiteral *I 2404 = dyn_cast<IntegerLiteral>(BO->getLHS()->IgnoreParenCasts())) { 2405 if (I->getValue() == 1) { 2406 IntRange R = IntRange::forValueOfType(C, E->getType()); 2407 return IntRange(R.Width, /*NonNegative*/ true); 2408 } 2409 } 2410 // fallthrough 2411 2412 case BO_ShlAssign: 2413 return IntRange::forValueOfType(C, E->getType()); 2414 2415 // Right shift by a constant can narrow its left argument. 2416 case BO_Shr: 2417 case BO_ShrAssign: { 2418 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2419 2420 // If the shift amount is a positive constant, drop the width by 2421 // that much. 2422 llvm::APSInt shift; 2423 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 2424 shift.isNonNegative()) { 2425 unsigned zext = shift.getZExtValue(); 2426 if (zext >= L.Width) 2427 L.Width = (L.NonNegative ? 0 : 1); 2428 else 2429 L.Width -= zext; 2430 } 2431 2432 return L; 2433 } 2434 2435 // Comma acts as its right operand. 2436 case BO_Comma: 2437 return GetExprRange(C, BO->getRHS(), MaxWidth); 2438 2439 // Black-list pointer subtractions. 2440 case BO_Sub: 2441 if (BO->getLHS()->getType()->isPointerType()) 2442 return IntRange::forValueOfType(C, E->getType()); 2443 // fallthrough 2444 2445 default: 2446 break; 2447 } 2448 2449 // Treat every other operator as if it were closed on the 2450 // narrowest type that encompasses both operands. 2451 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2452 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 2453 return IntRange::join(L, R); 2454 } 2455 2456 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 2457 switch (UO->getOpcode()) { 2458 // Boolean-valued operations are white-listed. 2459 case UO_LNot: 2460 return IntRange::forBoolType(); 2461 2462 // Operations with opaque sources are black-listed. 2463 case UO_Deref: 2464 case UO_AddrOf: // should be impossible 2465 return IntRange::forValueOfType(C, E->getType()); 2466 2467 default: 2468 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 2469 } 2470 } 2471 2472 if (dyn_cast<OffsetOfExpr>(E)) { 2473 IntRange::forValueOfType(C, E->getType()); 2474 } 2475 2476 FieldDecl *BitField = E->getBitField(); 2477 if (BitField) { 2478 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 2479 unsigned BitWidth = BitWidthAP.getZExtValue(); 2480 2481 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 2482 } 2483 2484 return IntRange::forValueOfType(C, E->getType()); 2485 } 2486 2487 IntRange GetExprRange(ASTContext &C, Expr *E) { 2488 return GetExprRange(C, E, C.getIntWidth(E->getType())); 2489 } 2490 2491 /// Checks whether the given value, which currently has the given 2492 /// source semantics, has the same value when coerced through the 2493 /// target semantics. 2494 bool IsSameFloatAfterCast(const llvm::APFloat &value, 2495 const llvm::fltSemantics &Src, 2496 const llvm::fltSemantics &Tgt) { 2497 llvm::APFloat truncated = value; 2498 2499 bool ignored; 2500 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 2501 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 2502 2503 return truncated.bitwiseIsEqual(value); 2504 } 2505 2506 /// Checks whether the given value, which currently has the given 2507 /// source semantics, has the same value when coerced through the 2508 /// target semantics. 2509 /// 2510 /// The value might be a vector of floats (or a complex number). 2511 bool IsSameFloatAfterCast(const APValue &value, 2512 const llvm::fltSemantics &Src, 2513 const llvm::fltSemantics &Tgt) { 2514 if (value.isFloat()) 2515 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 2516 2517 if (value.isVector()) { 2518 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 2519 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 2520 return false; 2521 return true; 2522 } 2523 2524 assert(value.isComplexFloat()); 2525 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 2526 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2527 } 2528 2529 void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC); 2530 2531 static bool IsZero(Sema &S, Expr *E) { 2532 // Suppress cases where we are comparing against an enum constant. 2533 if (const DeclRefExpr *DR = 2534 dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) 2535 if (isa<EnumConstantDecl>(DR->getDecl())) 2536 return false; 2537 2538 // Suppress cases where the '0' value is expanded from a macro. 2539 if (E->getLocStart().isMacroID()) 2540 return false; 2541 2542 llvm::APSInt Value; 2543 return E->isIntegerConstantExpr(Value, S.Context) && Value == 0; 2544 } 2545 2546 static bool HasEnumType(Expr *E) { 2547 // Strip off implicit integral promotions. 2548 while (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) { 2549 if (ICE->getCastKind() != CK_IntegralCast && 2550 ICE->getCastKind() != CK_NoOp) 2551 break; 2552 E = ICE->getSubExpr(); 2553 } 2554 2555 return E->getType()->isEnumeralType(); 2556 } 2557 2558 void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) { 2559 BinaryOperatorKind op = E->getOpcode(); 2560 if (E->isValueDependent()) 2561 return; 2562 2563 if (op == BO_LT && IsZero(S, E->getRHS())) { 2564 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2565 << "< 0" << "false" << HasEnumType(E->getLHS()) 2566 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2567 } else if (op == BO_GE && IsZero(S, E->getRHS())) { 2568 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2569 << ">= 0" << "true" << HasEnumType(E->getLHS()) 2570 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2571 } else if (op == BO_GT && IsZero(S, E->getLHS())) { 2572 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2573 << "0 >" << "false" << HasEnumType(E->getRHS()) 2574 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2575 } else if (op == BO_LE && IsZero(S, E->getLHS())) { 2576 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2577 << "0 <=" << "true" << HasEnumType(E->getRHS()) 2578 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2579 } 2580 } 2581 2582 /// Analyze the operands of the given comparison. Implements the 2583 /// fallback case from AnalyzeComparison. 2584 void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) { 2585 AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc()); 2586 AnalyzeImplicitConversions(S, E->getRHS(), E->getOperatorLoc()); 2587 } 2588 2589 /// \brief Implements -Wsign-compare. 2590 /// 2591 /// \param lex the left-hand expression 2592 /// \param rex the right-hand expression 2593 /// \param OpLoc the location of the joining operator 2594 /// \param BinOpc binary opcode or 0 2595 void AnalyzeComparison(Sema &S, BinaryOperator *E) { 2596 // The type the comparison is being performed in. 2597 QualType T = E->getLHS()->getType(); 2598 assert(S.Context.hasSameUnqualifiedType(T, E->getRHS()->getType()) 2599 && "comparison with mismatched types"); 2600 2601 // We don't do anything special if this isn't an unsigned integral 2602 // comparison: we're only interested in integral comparisons, and 2603 // signed comparisons only happen in cases we don't care to warn about. 2604 // 2605 // We also don't care about value-dependent expressions or expressions 2606 // whose result is a constant. 2607 if (!T->hasUnsignedIntegerRepresentation() 2608 || E->isValueDependent() || E->isIntegerConstantExpr(S.Context)) 2609 return AnalyzeImpConvsInComparison(S, E); 2610 2611 Expr *lex = E->getLHS()->IgnoreParenImpCasts(); 2612 Expr *rex = E->getRHS()->IgnoreParenImpCasts(); 2613 2614 // Check to see if one of the (unmodified) operands is of different 2615 // signedness. 2616 Expr *signedOperand, *unsignedOperand; 2617 if (lex->getType()->hasSignedIntegerRepresentation()) { 2618 assert(!rex->getType()->hasSignedIntegerRepresentation() && 2619 "unsigned comparison between two signed integer expressions?"); 2620 signedOperand = lex; 2621 unsignedOperand = rex; 2622 } else if (rex->getType()->hasSignedIntegerRepresentation()) { 2623 signedOperand = rex; 2624 unsignedOperand = lex; 2625 } else { 2626 CheckTrivialUnsignedComparison(S, E); 2627 return AnalyzeImpConvsInComparison(S, E); 2628 } 2629 2630 // Otherwise, calculate the effective range of the signed operand. 2631 IntRange signedRange = GetExprRange(S.Context, signedOperand); 2632 2633 // Go ahead and analyze implicit conversions in the operands. Note 2634 // that we skip the implicit conversions on both sides. 2635 AnalyzeImplicitConversions(S, lex, E->getOperatorLoc()); 2636 AnalyzeImplicitConversions(S, rex, E->getOperatorLoc()); 2637 2638 // If the signed range is non-negative, -Wsign-compare won't fire, 2639 // but we should still check for comparisons which are always true 2640 // or false. 2641 if (signedRange.NonNegative) 2642 return CheckTrivialUnsignedComparison(S, E); 2643 2644 // For (in)equality comparisons, if the unsigned operand is a 2645 // constant which cannot collide with a overflowed signed operand, 2646 // then reinterpreting the signed operand as unsigned will not 2647 // change the result of the comparison. 2648 if (E->isEqualityOp()) { 2649 unsigned comparisonWidth = S.Context.getIntWidth(T); 2650 IntRange unsignedRange = GetExprRange(S.Context, unsignedOperand); 2651 2652 // We should never be unable to prove that the unsigned operand is 2653 // non-negative. 2654 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2655 2656 if (unsignedRange.Width < comparisonWidth) 2657 return; 2658 } 2659 2660 S.Diag(E->getOperatorLoc(), diag::warn_mixed_sign_comparison) 2661 << lex->getType() << rex->getType() 2662 << lex->getSourceRange() << rex->getSourceRange(); 2663 } 2664 2665 /// Analyzes an attempt to assign the given value to a bitfield. 2666 /// 2667 /// Returns true if there was something fishy about the attempt. 2668 bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, 2669 SourceLocation InitLoc) { 2670 assert(Bitfield->isBitField()); 2671 if (Bitfield->isInvalidDecl()) 2672 return false; 2673 2674 // White-list bool bitfields. 2675 if (Bitfield->getType()->isBooleanType()) 2676 return false; 2677 2678 // Ignore value- or type-dependent expressions. 2679 if (Bitfield->getBitWidth()->isValueDependent() || 2680 Bitfield->getBitWidth()->isTypeDependent() || 2681 Init->isValueDependent() || 2682 Init->isTypeDependent()) 2683 return false; 2684 2685 Expr *OriginalInit = Init->IgnoreParenImpCasts(); 2686 2687 llvm::APSInt Width(32); 2688 Expr::EvalResult InitValue; 2689 if (!Bitfield->getBitWidth()->isIntegerConstantExpr(Width, S.Context) || 2690 !OriginalInit->Evaluate(InitValue, S.Context) || 2691 !InitValue.Val.isInt()) 2692 return false; 2693 2694 const llvm::APSInt &Value = InitValue.Val.getInt(); 2695 unsigned OriginalWidth = Value.getBitWidth(); 2696 unsigned FieldWidth = Width.getZExtValue(); 2697 2698 if (OriginalWidth <= FieldWidth) 2699 return false; 2700 2701 llvm::APSInt TruncatedValue = Value.trunc(FieldWidth); 2702 2703 // It's fairly common to write values into signed bitfields 2704 // that, if sign-extended, would end up becoming a different 2705 // value. We don't want to warn about that. 2706 if (Value.isSigned() && Value.isNegative()) 2707 TruncatedValue = TruncatedValue.sext(OriginalWidth); 2708 else 2709 TruncatedValue = TruncatedValue.zext(OriginalWidth); 2710 2711 if (Value == TruncatedValue) 2712 return false; 2713 2714 std::string PrettyValue = Value.toString(10); 2715 std::string PrettyTrunc = TruncatedValue.toString(10); 2716 2717 S.Diag(InitLoc, diag::warn_impcast_bitfield_precision_constant) 2718 << PrettyValue << PrettyTrunc << OriginalInit->getType() 2719 << Init->getSourceRange(); 2720 2721 return true; 2722 } 2723 2724 /// Analyze the given simple or compound assignment for warning-worthy 2725 /// operations. 2726 void AnalyzeAssignment(Sema &S, BinaryOperator *E) { 2727 // Just recurse on the LHS. 2728 AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc()); 2729 2730 // We want to recurse on the RHS as normal unless we're assigning to 2731 // a bitfield. 2732 if (FieldDecl *Bitfield = E->getLHS()->getBitField()) { 2733 if (AnalyzeBitFieldAssignment(S, Bitfield, E->getRHS(), 2734 E->getOperatorLoc())) { 2735 // Recurse, ignoring any implicit conversions on the RHS. 2736 return AnalyzeImplicitConversions(S, E->getRHS()->IgnoreParenImpCasts(), 2737 E->getOperatorLoc()); 2738 } 2739 } 2740 2741 AnalyzeImplicitConversions(S, E->getRHS(), E->getOperatorLoc()); 2742 } 2743 2744 /// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2745 void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T, 2746 SourceLocation CContext, unsigned diag) { 2747 S.Diag(E->getExprLoc(), diag) 2748 << SourceType << T << E->getSourceRange() << SourceRange(CContext); 2749 } 2750 2751 /// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2752 void DiagnoseImpCast(Sema &S, Expr *E, QualType T, SourceLocation CContext, 2753 unsigned diag) { 2754 DiagnoseImpCast(S, E, E->getType(), T, CContext, diag); 2755 } 2756 2757 /// Diagnose an implicit cast from a literal expression. Also attemps to supply 2758 /// fixit hints when the cast wouldn't lose information to simply write the 2759 /// expression with the expected type. 2760 void DiagnoseFloatingLiteralImpCast(Sema &S, FloatingLiteral *FL, QualType T, 2761 SourceLocation CContext) { 2762 // Emit the primary warning first, then try to emit a fixit hint note if 2763 // reasonable. 2764 S.Diag(FL->getExprLoc(), diag::warn_impcast_literal_float_to_integer) 2765 << FL->getType() << T << FL->getSourceRange() << SourceRange(CContext); 2766 2767 const llvm::APFloat &Value = FL->getValue(); 2768 2769 // Don't attempt to fix PPC double double literals. 2770 if (&Value.getSemantics() == &llvm::APFloat::PPCDoubleDouble) 2771 return; 2772 2773 // Try to convert this exactly to an 64-bit integer. FIXME: It would be 2774 // nice to support arbitrarily large integers here. 2775 bool isExact = false; 2776 uint64_t IntegerPart; 2777 if (Value.convertToInteger(&IntegerPart, 64, /*isSigned=*/true, 2778 llvm::APFloat::rmTowardZero, &isExact) 2779 != llvm::APFloat::opOK || !isExact) 2780 return; 2781 2782 llvm::APInt IntegerValue(64, IntegerPart, /*isSigned=*/true); 2783 2784 std::string LiteralValue = IntegerValue.toString(10, /*isSigned=*/true); 2785 S.Diag(FL->getExprLoc(), diag::note_fix_integral_float_as_integer) 2786 << FixItHint::CreateReplacement(FL->getSourceRange(), LiteralValue); 2787 } 2788 2789 std::string PrettyPrintInRange(const llvm::APSInt &Value, IntRange Range) { 2790 if (!Range.Width) return "0"; 2791 2792 llvm::APSInt ValueInRange = Value; 2793 ValueInRange.setIsSigned(!Range.NonNegative); 2794 ValueInRange = ValueInRange.trunc(Range.Width); 2795 return ValueInRange.toString(10); 2796 } 2797 2798 static bool isFromSystemMacro(Sema &S, SourceLocation loc) { 2799 SourceManager &smgr = S.Context.getSourceManager(); 2800 return loc.isMacroID() && smgr.isInSystemHeader(smgr.getSpellingLoc(loc)); 2801 } 2802 2803 void CheckImplicitConversion(Sema &S, Expr *E, QualType T, 2804 SourceLocation CC, bool *ICContext = 0) { 2805 if (E->isTypeDependent() || E->isValueDependent()) return; 2806 2807 const Type *Source = S.Context.getCanonicalType(E->getType()).getTypePtr(); 2808 const Type *Target = S.Context.getCanonicalType(T).getTypePtr(); 2809 if (Source == Target) return; 2810 if (Target->isDependentType()) return; 2811 2812 // If the conversion context location is invalid don't complain. 2813 // We also don't want to emit a warning if the issue occurs from the 2814 // instantiation of a system macro. The problem is that 'getSpellingLoc()' 2815 // is slow, so we delay this check as long as possible. Once we detect 2816 // we are in that scenario, we just return. 2817 if (CC.isInvalid()) 2818 return; 2819 2820 // Never diagnose implicit casts to bool. 2821 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2822 return; 2823 2824 // Strip vector types. 2825 if (isa<VectorType>(Source)) { 2826 if (!isa<VectorType>(Target)) { 2827 if (isFromSystemMacro(S, CC)) 2828 return; 2829 return DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_vector_scalar); 2830 } 2831 2832 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2833 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2834 } 2835 2836 // Strip complex types. 2837 if (isa<ComplexType>(Source)) { 2838 if (!isa<ComplexType>(Target)) { 2839 if (isFromSystemMacro(S, CC)) 2840 return; 2841 2842 return DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_complex_scalar); 2843 } 2844 2845 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2846 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2847 } 2848 2849 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2850 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2851 2852 // If the source is floating point... 2853 if (SourceBT && SourceBT->isFloatingPoint()) { 2854 // ...and the target is floating point... 2855 if (TargetBT && TargetBT->isFloatingPoint()) { 2856 // ...then warn if we're dropping FP rank. 2857 2858 // Builtin FP kinds are ordered by increasing FP rank. 2859 if (SourceBT->getKind() > TargetBT->getKind()) { 2860 // Don't warn about float constants that are precisely 2861 // representable in the target type. 2862 Expr::EvalResult result; 2863 if (E->Evaluate(result, S.Context)) { 2864 // Value might be a float, a float vector, or a float complex. 2865 if (IsSameFloatAfterCast(result.Val, 2866 S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2867 S.Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2868 return; 2869 } 2870 2871 if (isFromSystemMacro(S, CC)) 2872 return; 2873 2874 DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_float_precision); 2875 } 2876 return; 2877 } 2878 2879 // If the target is integral, always warn. 2880 if ((TargetBT && TargetBT->isInteger())) { 2881 if (isFromSystemMacro(S, CC)) 2882 return; 2883 2884 Expr *InnerE = E->IgnoreParenImpCasts(); 2885 if (FloatingLiteral *FL = dyn_cast<FloatingLiteral>(InnerE)) { 2886 DiagnoseFloatingLiteralImpCast(S, FL, T, CC); 2887 } else { 2888 DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_float_integer); 2889 } 2890 } 2891 2892 return; 2893 } 2894 2895 if (!Source->isIntegerType() || !Target->isIntegerType()) 2896 return; 2897 2898 IntRange SourceRange = GetExprRange(S.Context, E); 2899 IntRange TargetRange = IntRange::forTargetOfCanonicalType(S.Context, Target); 2900 2901 if (SourceRange.Width > TargetRange.Width) { 2902 // If the source is a constant, use a default-on diagnostic. 2903 // TODO: this should happen for bitfield stores, too. 2904 llvm::APSInt Value(32); 2905 if (E->isIntegerConstantExpr(Value, S.Context)) { 2906 if (isFromSystemMacro(S, CC)) 2907 return; 2908 2909 std::string PrettySourceValue = Value.toString(10); 2910 std::string PrettyTargetValue = PrettyPrintInRange(Value, TargetRange); 2911 2912 S.Diag(E->getExprLoc(), diag::warn_impcast_integer_precision_constant) 2913 << PrettySourceValue << PrettyTargetValue 2914 << E->getType() << T << E->getSourceRange() << clang::SourceRange(CC); 2915 return; 2916 } 2917 2918 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2919 // and by god we'll let them. 2920 2921 if (isFromSystemMacro(S, CC)) 2922 return; 2923 2924 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2925 return DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_integer_64_32); 2926 return DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_integer_precision); 2927 } 2928 2929 if ((TargetRange.NonNegative && !SourceRange.NonNegative) || 2930 (!TargetRange.NonNegative && SourceRange.NonNegative && 2931 SourceRange.Width == TargetRange.Width)) { 2932 2933 if (isFromSystemMacro(S, CC)) 2934 return; 2935 2936 unsigned DiagID = diag::warn_impcast_integer_sign; 2937 2938 // Traditionally, gcc has warned about this under -Wsign-compare. 2939 // We also want to warn about it in -Wconversion. 2940 // So if -Wconversion is off, use a completely identical diagnostic 2941 // in the sign-compare group. 2942 // The conditional-checking code will 2943 if (ICContext) { 2944 DiagID = diag::warn_impcast_integer_sign_conditional; 2945 *ICContext = true; 2946 } 2947 2948 return DiagnoseImpCast(S, E, T, CC, DiagID); 2949 } 2950 2951 // Diagnose conversions between different enumeration types. 2952 // In C, we pretend that the type of an EnumConstantDecl is its enumeration 2953 // type, to give us better diagnostics. 2954 QualType SourceType = E->getType(); 2955 if (!S.getLangOptions().CPlusPlus) { 2956 if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) 2957 if (EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl())) { 2958 EnumDecl *Enum = cast<EnumDecl>(ECD->getDeclContext()); 2959 SourceType = S.Context.getTypeDeclType(Enum); 2960 Source = S.Context.getCanonicalType(SourceType).getTypePtr(); 2961 } 2962 } 2963 2964 if (const EnumType *SourceEnum = Source->getAs<EnumType>()) 2965 if (const EnumType *TargetEnum = Target->getAs<EnumType>()) 2966 if ((SourceEnum->getDecl()->getIdentifier() || 2967 SourceEnum->getDecl()->getTypedefForAnonDecl()) && 2968 (TargetEnum->getDecl()->getIdentifier() || 2969 TargetEnum->getDecl()->getTypedefForAnonDecl()) && 2970 SourceEnum != TargetEnum) { 2971 if (isFromSystemMacro(S, CC)) 2972 return; 2973 2974 return DiagnoseImpCast(S, E, SourceType, T, CC, 2975 diag::warn_impcast_different_enum_types); 2976 } 2977 2978 return; 2979 } 2980 2981 void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T); 2982 2983 void CheckConditionalOperand(Sema &S, Expr *E, QualType T, 2984 SourceLocation CC, bool &ICContext) { 2985 E = E->IgnoreParenImpCasts(); 2986 2987 if (isa<ConditionalOperator>(E)) 2988 return CheckConditionalOperator(S, cast<ConditionalOperator>(E), T); 2989 2990 AnalyzeImplicitConversions(S, E, CC); 2991 if (E->getType() != T) 2992 return CheckImplicitConversion(S, E, T, CC, &ICContext); 2993 return; 2994 } 2995 2996 void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T) { 2997 SourceLocation CC = E->getQuestionLoc(); 2998 2999 AnalyzeImplicitConversions(S, E->getCond(), CC); 3000 3001 bool Suspicious = false; 3002 CheckConditionalOperand(S, E->getTrueExpr(), T, CC, Suspicious); 3003 CheckConditionalOperand(S, E->getFalseExpr(), T, CC, Suspicious); 3004 3005 // If -Wconversion would have warned about either of the candidates 3006 // for a signedness conversion to the context type... 3007 if (!Suspicious) return; 3008 3009 // ...but it's currently ignored... 3010 if (S.Diags.getDiagnosticLevel(diag::warn_impcast_integer_sign_conditional, 3011 CC)) 3012 return; 3013 3014 // ...and -Wsign-compare isn't... 3015 if (!S.Diags.getDiagnosticLevel(diag::warn_mixed_sign_conditional, CC)) 3016 return; 3017 3018 // ...then check whether it would have warned about either of the 3019 // candidates for a signedness conversion to the condition type. 3020 if (E->getType() != T) { 3021 Suspicious = false; 3022 CheckImplicitConversion(S, E->getTrueExpr()->IgnoreParenImpCasts(), 3023 E->getType(), CC, &Suspicious); 3024 if (!Suspicious) 3025 CheckImplicitConversion(S, E->getFalseExpr()->IgnoreParenImpCasts(), 3026 E->getType(), CC, &Suspicious); 3027 if (!Suspicious) 3028 return; 3029 } 3030 3031 // If so, emit a diagnostic under -Wsign-compare. 3032 Expr *lex = E->getTrueExpr()->IgnoreParenImpCasts(); 3033 Expr *rex = E->getFalseExpr()->IgnoreParenImpCasts(); 3034 S.Diag(E->getQuestionLoc(), diag::warn_mixed_sign_conditional) 3035 << lex->getType() << rex->getType() 3036 << lex->getSourceRange() << rex->getSourceRange(); 3037 } 3038 3039 /// AnalyzeImplicitConversions - Find and report any interesting 3040 /// implicit conversions in the given expression. There are a couple 3041 /// of competing diagnostics here, -Wconversion and -Wsign-compare. 3042 void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) { 3043 QualType T = OrigE->getType(); 3044 Expr *E = OrigE->IgnoreParenImpCasts(); 3045 3046 // For conditional operators, we analyze the arguments as if they 3047 // were being fed directly into the output. 3048 if (isa<ConditionalOperator>(E)) { 3049 ConditionalOperator *CO = cast<ConditionalOperator>(E); 3050 CheckConditionalOperator(S, CO, T); 3051 return; 3052 } 3053 3054 // Go ahead and check any implicit conversions we might have skipped. 3055 // The non-canonical typecheck is just an optimization; 3056 // CheckImplicitConversion will filter out dead implicit conversions. 3057 if (E->getType() != T) 3058 CheckImplicitConversion(S, E, T, CC); 3059 3060 // Now continue drilling into this expression. 3061 3062 // Skip past explicit casts. 3063 if (isa<ExplicitCastExpr>(E)) { 3064 E = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreParenImpCasts(); 3065 return AnalyzeImplicitConversions(S, E, CC); 3066 } 3067 3068 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 3069 // Do a somewhat different check with comparison operators. 3070 if (BO->isComparisonOp()) 3071 return AnalyzeComparison(S, BO); 3072 3073 // And with assignments and compound assignments. 3074 if (BO->isAssignmentOp()) 3075 return AnalyzeAssignment(S, BO); 3076 } 3077 3078 // These break the otherwise-useful invariant below. Fortunately, 3079 // we don't really need to recurse into them, because any internal 3080 // expressions should have been analyzed already when they were 3081 // built into statements. 3082 if (isa<StmtExpr>(E)) return; 3083 3084 // Don't descend into unevaluated contexts. 3085 if (isa<UnaryExprOrTypeTraitExpr>(E)) return; 3086 3087 // Now just recurse over the expression's children. 3088 CC = E->getExprLoc(); 3089 for (Stmt::child_range I = E->children(); I; ++I) 3090 AnalyzeImplicitConversions(S, cast<Expr>(*I), CC); 3091 } 3092 3093 } // end anonymous namespace 3094 3095 /// Diagnoses "dangerous" implicit conversions within the given 3096 /// expression (which is a full expression). Implements -Wconversion 3097 /// and -Wsign-compare. 3098 /// 3099 /// \param CC the "context" location of the implicit conversion, i.e. 3100 /// the most location of the syntactic entity requiring the implicit 3101 /// conversion 3102 void Sema::CheckImplicitConversions(Expr *E, SourceLocation CC) { 3103 // Don't diagnose in unevaluated contexts. 3104 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 3105 return; 3106 3107 // Don't diagnose for value- or type-dependent expressions. 3108 if (E->isTypeDependent() || E->isValueDependent()) 3109 return; 3110 3111 // This is not the right CC for (e.g.) a variable initialization. 3112 AnalyzeImplicitConversions(*this, E, CC); 3113 } 3114 3115 void Sema::CheckBitFieldInitialization(SourceLocation InitLoc, 3116 FieldDecl *BitField, 3117 Expr *Init) { 3118 (void) AnalyzeBitFieldAssignment(*this, BitField, Init, InitLoc); 3119 } 3120 3121 /// CheckParmsForFunctionDef - Check that the parameters of the given 3122 /// function are appropriate for the definition of a function. This 3123 /// takes care of any checks that cannot be performed on the 3124 /// declaration itself, e.g., that the types of each of the function 3125 /// parameters are complete. 3126 bool Sema::CheckParmsForFunctionDef(ParmVarDecl **P, ParmVarDecl **PEnd, 3127 bool CheckParameterNames) { 3128 bool HasInvalidParm = false; 3129 for (; P != PEnd; ++P) { 3130 ParmVarDecl *Param = *P; 3131 3132 // C99 6.7.5.3p4: the parameters in a parameter type list in a 3133 // function declarator that is part of a function definition of 3134 // that function shall not have incomplete type. 3135 // 3136 // This is also C++ [dcl.fct]p6. 3137 if (!Param->isInvalidDecl() && 3138 RequireCompleteType(Param->getLocation(), Param->getType(), 3139 diag::err_typecheck_decl_incomplete_type)) { 3140 Param->setInvalidDecl(); 3141 HasInvalidParm = true; 3142 } 3143 3144 // C99 6.9.1p5: If the declarator includes a parameter type list, the 3145 // declaration of each parameter shall include an identifier. 3146 if (CheckParameterNames && 3147 Param->getIdentifier() == 0 && 3148 !Param->isImplicit() && 3149 !getLangOptions().CPlusPlus) 3150 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 3151 3152 // C99 6.7.5.3p12: 3153 // If the function declarator is not part of a definition of that 3154 // function, parameters may have incomplete type and may use the [*] 3155 // notation in their sequences of declarator specifiers to specify 3156 // variable length array types. 3157 QualType PType = Param->getOriginalType(); 3158 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 3159 if (AT->getSizeModifier() == ArrayType::Star) { 3160 // FIXME: This diagnosic should point the the '[*]' if source-location 3161 // information is added for it. 3162 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 3163 } 3164 } 3165 } 3166 3167 return HasInvalidParm; 3168 } 3169 3170 /// CheckCastAlign - Implements -Wcast-align, which warns when a 3171 /// pointer cast increases the alignment requirements. 3172 void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) { 3173 // This is actually a lot of work to potentially be doing on every 3174 // cast; don't do it if we're ignoring -Wcast_align (as is the default). 3175 if (getDiagnostics().getDiagnosticLevel(diag::warn_cast_align, 3176 TRange.getBegin()) 3177 == Diagnostic::Ignored) 3178 return; 3179 3180 // Ignore dependent types. 3181 if (T->isDependentType() || Op->getType()->isDependentType()) 3182 return; 3183 3184 // Require that the destination be a pointer type. 3185 const PointerType *DestPtr = T->getAs<PointerType>(); 3186 if (!DestPtr) return; 3187 3188 // If the destination has alignment 1, we're done. 3189 QualType DestPointee = DestPtr->getPointeeType(); 3190 if (DestPointee->isIncompleteType()) return; 3191 CharUnits DestAlign = Context.getTypeAlignInChars(DestPointee); 3192 if (DestAlign.isOne()) return; 3193 3194 // Require that the source be a pointer type. 3195 const PointerType *SrcPtr = Op->getType()->getAs<PointerType>(); 3196 if (!SrcPtr) return; 3197 QualType SrcPointee = SrcPtr->getPointeeType(); 3198 3199 // Whitelist casts from cv void*. We already implicitly 3200 // whitelisted casts to cv void*, since they have alignment 1. 3201 // Also whitelist casts involving incomplete types, which implicitly 3202 // includes 'void'. 3203 if (SrcPointee->isIncompleteType()) return; 3204 3205 CharUnits SrcAlign = Context.getTypeAlignInChars(SrcPointee); 3206 if (SrcAlign >= DestAlign) return; 3207 3208 Diag(TRange.getBegin(), diag::warn_cast_align) 3209 << Op->getType() << T 3210 << static_cast<unsigned>(SrcAlign.getQuantity()) 3211 << static_cast<unsigned>(DestAlign.getQuantity()) 3212 << TRange << Op->getSourceRange(); 3213 } 3214 3215 static void CheckArrayAccess_Check(Sema &S, 3216 const clang::ArraySubscriptExpr *E) { 3217 const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts(); 3218 const ConstantArrayType *ArrayTy = 3219 S.Context.getAsConstantArrayType(BaseExpr->getType()); 3220 if (!ArrayTy) 3221 return; 3222 3223 const Expr *IndexExpr = E->getIdx(); 3224 if (IndexExpr->isValueDependent()) 3225 return; 3226 llvm::APSInt index; 3227 if (!IndexExpr->isIntegerConstantExpr(index, S.Context)) 3228 return; 3229 3230 if (index.isUnsigned() || !index.isNegative()) { 3231 llvm::APInt size = ArrayTy->getSize(); 3232 if (!size.isStrictlyPositive()) 3233 return; 3234 if (size.getBitWidth() > index.getBitWidth()) 3235 index = index.sext(size.getBitWidth()); 3236 else if (size.getBitWidth() < index.getBitWidth()) 3237 size = size.sext(index.getBitWidth()); 3238 3239 if (index.slt(size)) 3240 return; 3241 3242 S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, 3243 S.PDiag(diag::warn_array_index_exceeds_bounds) 3244 << index.toString(10, true) 3245 << size.toString(10, true) 3246 << IndexExpr->getSourceRange()); 3247 } else { 3248 S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, 3249 S.PDiag(diag::warn_array_index_precedes_bounds) 3250 << index.toString(10, true) 3251 << IndexExpr->getSourceRange()); 3252 } 3253 3254 const NamedDecl *ND = NULL; 3255 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr)) 3256 ND = dyn_cast<NamedDecl>(DRE->getDecl()); 3257 if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr)) 3258 ND = dyn_cast<NamedDecl>(ME->getMemberDecl()); 3259 if (ND) 3260 S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, 3261 S.PDiag(diag::note_array_index_out_of_bounds) 3262 << ND->getDeclName()); 3263 } 3264 3265 void Sema::CheckArrayAccess(const Expr *expr) { 3266 while (true) { 3267 expr = expr->IgnoreParens(); 3268 switch (expr->getStmtClass()) { 3269 case Stmt::ArraySubscriptExprClass: 3270 CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr)); 3271 return; 3272 case Stmt::ConditionalOperatorClass: { 3273 const ConditionalOperator *cond = cast<ConditionalOperator>(expr); 3274 if (const Expr *lhs = cond->getLHS()) 3275 CheckArrayAccess(lhs); 3276 if (const Expr *rhs = cond->getRHS()) 3277 CheckArrayAccess(rhs); 3278 return; 3279 } 3280 default: 3281 return; 3282 } 3283 } 3284 } 3285