1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Handling of format string in scanf and friends. The structure of format 11 // strings for fscanf() are described in C99 7.19.6.2. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/FormatString.h" 16 #include "FormatStringParsing.h" 17 #include "clang/Basic/TargetInfo.h" 18 19 using clang::analyze_format_string::ArgType; 20 using clang::analyze_format_string::FormatStringHandler; 21 using clang::analyze_format_string::LengthModifier; 22 using clang::analyze_format_string::OptionalAmount; 23 using clang::analyze_format_string::ConversionSpecifier; 24 using clang::analyze_scanf::ScanfConversionSpecifier; 25 using clang::analyze_scanf::ScanfSpecifier; 26 using clang::UpdateOnReturn; 27 using namespace clang; 28 29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32 static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Special case: "^]" are the first characters. 54 if (I + 1 != E && I[0] == '^' && I[1] == ']') { 55 I += 2; 56 if (I == E) { 57 H.HandleIncompleteScanList(start, I - 1); 58 return true; 59 } 60 } 61 62 // Look for a ']' character which denotes the end of the scan list. 63 while (*I != ']') { 64 if (++I == E) { 65 H.HandleIncompleteScanList(start, I - 1); 66 return true; 67 } 68 } 69 70 CS.setEndScanList(I); 71 return false; 72 } 73 74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 75 // We can possibly refactor. 76 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 77 const char *&Beg, 78 const char *E, 79 unsigned &argIndex, 80 const LangOptions &LO, 81 const TargetInfo &Target) { 82 using namespace clang::analyze_format_string; 83 using namespace clang::analyze_scanf; 84 const char *I = Beg; 85 const char *Start = nullptr; 86 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 87 88 // Look for a '%' character that indicates the start of a format specifier. 89 for ( ; I != E ; ++I) { 90 char c = *I; 91 if (c == '\0') { 92 // Detect spurious null characters, which are likely errors. 93 H.HandleNullChar(I); 94 return true; 95 } 96 if (c == '%') { 97 Start = I++; // Record the start of the format specifier. 98 break; 99 } 100 } 101 102 // No format specifier found? 103 if (!Start) 104 return false; 105 106 if (I == E) { 107 // No more characters left? 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 ScanfSpecifier FS; 113 if (ParseArgPosition(H, FS, Start, I, E)) 114 return true; 115 116 if (I == E) { 117 // No more characters left? 118 H.HandleIncompleteSpecifier(Start, E - Start); 119 return true; 120 } 121 122 // Look for '*' flag if it is present. 123 if (*I == '*') { 124 FS.setSuppressAssignment(I); 125 if (++I == E) { 126 H.HandleIncompleteSpecifier(Start, E - Start); 127 return true; 128 } 129 } 130 131 // Look for the field width (if any). Unlike printf, this is either 132 // a fixed integer or isn't present. 133 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 134 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 135 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 136 FS.setFieldWidth(Amt); 137 138 if (I == E) { 139 // No more characters left? 140 H.HandleIncompleteSpecifier(Start, E - Start); 141 return true; 142 } 143 } 144 145 // Look for the length modifier. 146 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 147 // No more characters left? 148 H.HandleIncompleteSpecifier(Start, E - Start); 149 return true; 150 } 151 152 // Detect spurious null characters, which are likely errors. 153 if (*I == '\0') { 154 H.HandleNullChar(I); 155 return true; 156 } 157 158 // Finally, look for the conversion specifier. 159 const char *conversionPosition = I++; 160 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 161 switch (*conversionPosition) { 162 default: 163 break; 164 case '%': k = ConversionSpecifier::PercentArg; break; 165 case 'A': k = ConversionSpecifier::AArg; break; 166 case 'E': k = ConversionSpecifier::EArg; break; 167 case 'F': k = ConversionSpecifier::FArg; break; 168 case 'G': k = ConversionSpecifier::GArg; break; 169 case 'X': k = ConversionSpecifier::XArg; break; 170 case 'a': k = ConversionSpecifier::aArg; break; 171 case 'd': k = ConversionSpecifier::dArg; break; 172 case 'e': k = ConversionSpecifier::eArg; break; 173 case 'f': k = ConversionSpecifier::fArg; break; 174 case 'g': k = ConversionSpecifier::gArg; break; 175 case 'i': k = ConversionSpecifier::iArg; break; 176 case 'n': k = ConversionSpecifier::nArg; break; 177 case 'c': k = ConversionSpecifier::cArg; break; 178 case 'C': k = ConversionSpecifier::CArg; break; 179 case 'S': k = ConversionSpecifier::SArg; break; 180 case '[': k = ConversionSpecifier::ScanListArg; break; 181 case 'u': k = ConversionSpecifier::uArg; break; 182 case 'x': k = ConversionSpecifier::xArg; break; 183 case 'o': k = ConversionSpecifier::oArg; break; 184 case 's': k = ConversionSpecifier::sArg; break; 185 case 'p': k = ConversionSpecifier::pArg; break; 186 // Apple extensions 187 // Apple-specific 188 case 'D': 189 if (Target.getTriple().isOSDarwin()) 190 k = ConversionSpecifier::DArg; 191 break; 192 case 'O': 193 if (Target.getTriple().isOSDarwin()) 194 k = ConversionSpecifier::OArg; 195 break; 196 case 'U': 197 if (Target.getTriple().isOSDarwin()) 198 k = ConversionSpecifier::UArg; 199 break; 200 } 201 ScanfConversionSpecifier CS(conversionPosition, k); 202 if (k == ScanfConversionSpecifier::ScanListArg) { 203 if (ParseScanList(H, CS, I, E)) 204 return true; 205 } 206 FS.setConversionSpecifier(CS); 207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 208 && !FS.usesPositionalArg()) 209 FS.setArgIndex(argIndex++); 210 211 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 213 214 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 215 unsigned Len = I - Beg; 216 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { 217 CS.setEndScanList(Beg + Len); 218 FS.setConversionSpecifier(CS); 219 } 220 // Assume the conversion takes one argument. 221 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); 222 } 223 return ScanfSpecifierResult(Start, FS); 224 } 225 226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 227 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 228 229 if (!CS.consumesDataArgument()) 230 return ArgType::Invalid(); 231 232 switch(CS.getKind()) { 233 // Signed int. 234 case ConversionSpecifier::dArg: 235 case ConversionSpecifier::DArg: 236 case ConversionSpecifier::iArg: 237 switch (LM.getKind()) { 238 case LengthModifier::None: 239 return ArgType::PtrTo(Ctx.IntTy); 240 case LengthModifier::AsChar: 241 return ArgType::PtrTo(ArgType::AnyCharTy); 242 case LengthModifier::AsShort: 243 return ArgType::PtrTo(Ctx.ShortTy); 244 case LengthModifier::AsLong: 245 return ArgType::PtrTo(Ctx.LongTy); 246 case LengthModifier::AsLongLong: 247 case LengthModifier::AsQuad: 248 return ArgType::PtrTo(Ctx.LongLongTy); 249 case LengthModifier::AsInt64: 250 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 251 case LengthModifier::AsIntMax: 252 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 253 case LengthModifier::AsSizeT: 254 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 255 case LengthModifier::AsPtrDiff: 256 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 257 case LengthModifier::AsLongDouble: 258 // GNU extension. 259 return ArgType::PtrTo(Ctx.LongLongTy); 260 case LengthModifier::AsAllocate: 261 case LengthModifier::AsMAllocate: 262 case LengthModifier::AsInt32: 263 case LengthModifier::AsInt3264: 264 case LengthModifier::AsWide: 265 return ArgType::Invalid(); 266 } 267 llvm_unreachable("Unsupported LenghtModifier Type"); 268 269 // Unsigned int. 270 case ConversionSpecifier::oArg: 271 case ConversionSpecifier::OArg: 272 case ConversionSpecifier::uArg: 273 case ConversionSpecifier::UArg: 274 case ConversionSpecifier::xArg: 275 case ConversionSpecifier::XArg: 276 switch (LM.getKind()) { 277 case LengthModifier::None: 278 return ArgType::PtrTo(Ctx.UnsignedIntTy); 279 case LengthModifier::AsChar: 280 return ArgType::PtrTo(Ctx.UnsignedCharTy); 281 case LengthModifier::AsShort: 282 return ArgType::PtrTo(Ctx.UnsignedShortTy); 283 case LengthModifier::AsLong: 284 return ArgType::PtrTo(Ctx.UnsignedLongTy); 285 case LengthModifier::AsLongLong: 286 case LengthModifier::AsQuad: 287 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 288 case LengthModifier::AsInt64: 289 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 290 case LengthModifier::AsIntMax: 291 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 292 case LengthModifier::AsSizeT: 293 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 294 case LengthModifier::AsPtrDiff: 295 return ArgType::PtrTo( 296 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 297 case LengthModifier::AsLongDouble: 298 // GNU extension. 299 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 300 case LengthModifier::AsAllocate: 301 case LengthModifier::AsMAllocate: 302 case LengthModifier::AsInt32: 303 case LengthModifier::AsInt3264: 304 case LengthModifier::AsWide: 305 return ArgType::Invalid(); 306 } 307 llvm_unreachable("Unsupported LenghtModifier Type"); 308 309 // Float. 310 case ConversionSpecifier::aArg: 311 case ConversionSpecifier::AArg: 312 case ConversionSpecifier::eArg: 313 case ConversionSpecifier::EArg: 314 case ConversionSpecifier::fArg: 315 case ConversionSpecifier::FArg: 316 case ConversionSpecifier::gArg: 317 case ConversionSpecifier::GArg: 318 switch (LM.getKind()) { 319 case LengthModifier::None: 320 return ArgType::PtrTo(Ctx.FloatTy); 321 case LengthModifier::AsLong: 322 return ArgType::PtrTo(Ctx.DoubleTy); 323 case LengthModifier::AsLongDouble: 324 return ArgType::PtrTo(Ctx.LongDoubleTy); 325 default: 326 return ArgType::Invalid(); 327 } 328 329 // Char, string and scanlist. 330 case ConversionSpecifier::cArg: 331 case ConversionSpecifier::sArg: 332 case ConversionSpecifier::ScanListArg: 333 switch (LM.getKind()) { 334 case LengthModifier::None: 335 return ArgType::PtrTo(ArgType::AnyCharTy); 336 case LengthModifier::AsLong: 337 case LengthModifier::AsWide: 338 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 339 case LengthModifier::AsAllocate: 340 case LengthModifier::AsMAllocate: 341 return ArgType::PtrTo(ArgType::CStrTy); 342 case LengthModifier::AsShort: 343 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 344 return ArgType::PtrTo(ArgType::AnyCharTy); 345 LLVM_FALLTHROUGH; 346 default: 347 return ArgType::Invalid(); 348 } 349 case ConversionSpecifier::CArg: 350 case ConversionSpecifier::SArg: 351 // FIXME: Mac OS X specific? 352 switch (LM.getKind()) { 353 case LengthModifier::None: 354 case LengthModifier::AsWide: 355 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 356 case LengthModifier::AsAllocate: 357 case LengthModifier::AsMAllocate: 358 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 359 case LengthModifier::AsShort: 360 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 361 return ArgType::PtrTo(ArgType::AnyCharTy); 362 LLVM_FALLTHROUGH; 363 default: 364 return ArgType::Invalid(); 365 } 366 367 // Pointer. 368 case ConversionSpecifier::pArg: 369 return ArgType::PtrTo(ArgType::CPointerTy); 370 371 // Write-back. 372 case ConversionSpecifier::nArg: 373 switch (LM.getKind()) { 374 case LengthModifier::None: 375 return ArgType::PtrTo(Ctx.IntTy); 376 case LengthModifier::AsChar: 377 return ArgType::PtrTo(Ctx.SignedCharTy); 378 case LengthModifier::AsShort: 379 return ArgType::PtrTo(Ctx.ShortTy); 380 case LengthModifier::AsLong: 381 return ArgType::PtrTo(Ctx.LongTy); 382 case LengthModifier::AsLongLong: 383 case LengthModifier::AsQuad: 384 return ArgType::PtrTo(Ctx.LongLongTy); 385 case LengthModifier::AsInt64: 386 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 387 case LengthModifier::AsIntMax: 388 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 389 case LengthModifier::AsSizeT: 390 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 391 case LengthModifier::AsPtrDiff: 392 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 393 case LengthModifier::AsLongDouble: 394 return ArgType(); // FIXME: Is this a known extension? 395 case LengthModifier::AsAllocate: 396 case LengthModifier::AsMAllocate: 397 case LengthModifier::AsInt32: 398 case LengthModifier::AsInt3264: 399 case LengthModifier::AsWide: 400 return ArgType::Invalid(); 401 } 402 403 default: 404 break; 405 } 406 407 return ArgType(); 408 } 409 410 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 411 const LangOptions &LangOpt, 412 ASTContext &Ctx) { 413 414 // %n is different from other conversion specifiers; don't try to fix it. 415 if (CS.getKind() == ConversionSpecifier::nArg) 416 return false; 417 418 if (!QT->isPointerType()) 419 return false; 420 421 QualType PT = QT->getPointeeType(); 422 423 // If it's an enum, get its underlying type. 424 if (const EnumType *ETy = PT->getAs<EnumType>()) { 425 // Don't try to fix incomplete enums. 426 if (!ETy->getDecl()->isComplete()) 427 return false; 428 PT = ETy->getDecl()->getIntegerType(); 429 } 430 431 const BuiltinType *BT = PT->getAs<BuiltinType>(); 432 if (!BT) 433 return false; 434 435 // Pointer to a character. 436 if (PT->isAnyCharacterType()) { 437 CS.setKind(ConversionSpecifier::sArg); 438 if (PT->isWideCharType()) 439 LM.setKind(LengthModifier::AsWideChar); 440 else 441 LM.setKind(LengthModifier::None); 442 443 // If we know the target array length, we can use it as a field width. 444 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 445 if (CAT->getSizeModifier() == ArrayType::Normal) 446 FieldWidth = OptionalAmount(OptionalAmount::Constant, 447 CAT->getSize().getZExtValue() - 1, 448 "", 0, false); 449 450 } 451 return true; 452 } 453 454 // Figure out the length modifier. 455 switch (BT->getKind()) { 456 // no modifier 457 case BuiltinType::UInt: 458 case BuiltinType::Int: 459 case BuiltinType::Float: 460 LM.setKind(LengthModifier::None); 461 break; 462 463 // hh 464 case BuiltinType::Char_U: 465 case BuiltinType::UChar: 466 case BuiltinType::Char_S: 467 case BuiltinType::SChar: 468 LM.setKind(LengthModifier::AsChar); 469 break; 470 471 // h 472 case BuiltinType::Short: 473 case BuiltinType::UShort: 474 LM.setKind(LengthModifier::AsShort); 475 break; 476 477 // l 478 case BuiltinType::Long: 479 case BuiltinType::ULong: 480 case BuiltinType::Double: 481 LM.setKind(LengthModifier::AsLong); 482 break; 483 484 // ll 485 case BuiltinType::LongLong: 486 case BuiltinType::ULongLong: 487 LM.setKind(LengthModifier::AsLongLong); 488 break; 489 490 // L 491 case BuiltinType::LongDouble: 492 LM.setKind(LengthModifier::AsLongDouble); 493 break; 494 495 // Don't know. 496 default: 497 return false; 498 } 499 500 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 501 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 502 namedTypeToLengthModifier(PT, LM); 503 504 // If fixing the length modifier was enough, we are done. 505 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 506 const analyze_scanf::ArgType &AT = getArgType(Ctx); 507 if (AT.isValid() && AT.matchesType(Ctx, QT)) 508 return true; 509 } 510 511 // Figure out the conversion specifier. 512 if (PT->isRealFloatingType()) 513 CS.setKind(ConversionSpecifier::fArg); 514 else if (PT->isSignedIntegerType()) 515 CS.setKind(ConversionSpecifier::dArg); 516 else if (PT->isUnsignedIntegerType()) 517 CS.setKind(ConversionSpecifier::uArg); 518 else 519 llvm_unreachable("Unexpected type"); 520 521 return true; 522 } 523 524 void ScanfSpecifier::toString(raw_ostream &os) const { 525 os << "%"; 526 527 if (usesPositionalArg()) 528 os << getPositionalArgIndex() << "$"; 529 if (SuppressAssignment) 530 os << "*"; 531 532 FieldWidth.toString(os); 533 os << LM.toString(); 534 os << CS.toString(); 535 } 536 537 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 538 const char *I, 539 const char *E, 540 const LangOptions &LO, 541 const TargetInfo &Target) { 542 543 unsigned argIndex = 0; 544 545 // Keep looking for a format specifier until we have exhausted the string. 546 while (I != E) { 547 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 548 LO, Target); 549 // Did a fail-stop error of any kind occur when parsing the specifier? 550 // If so, don't do any more processing. 551 if (FSR.shouldStop()) 552 return true; 553 // Did we exhaust the string or encounter an error that 554 // we can recover from? 555 if (!FSR.hasValue()) 556 continue; 557 // We have a format specifier. Pass it to the callback. 558 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 559 I - FSR.getStart())) { 560 return true; 561 } 562 } 563 assert(I == E && "Format string not exhausted"); 564 return false; 565 } 566