1 //===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a YAML parser. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/YAMLParser.h" 15 16 #include "llvm/ADT/ilist.h" 17 #include "llvm/ADT/ilist_node.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Support/SourceMgr.h" 25 26 using namespace llvm; 27 using namespace yaml; 28 29 enum UnicodeEncodingForm { 30 UEF_UTF32_LE, //< UTF-32 Little Endian 31 UEF_UTF32_BE, //< UTF-32 Big Endian 32 UEF_UTF16_LE, //< UTF-16 Little Endian 33 UEF_UTF16_BE, //< UTF-16 Big Endian 34 UEF_UTF8, //< UTF-8 or ascii. 35 UEF_Unknown //< Not a valid Unicode encoding. 36 }; 37 38 /// EncodingInfo - Holds the encoding type and length of the byte order mark if 39 /// it exists. Length is in {0, 2, 3, 4}. 40 typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; 41 42 /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode 43 /// encoding form of \a Input. 44 /// 45 /// @param Input A string of length 0 or more. 46 /// @returns An EncodingInfo indicating the Unicode encoding form of the input 47 /// and how long the byte order mark is if one exists. 48 static EncodingInfo getUnicodeEncoding(StringRef Input) { 49 if (Input.size() == 0) 50 return std::make_pair(UEF_Unknown, 0); 51 52 switch (uint8_t(Input[0])) { 53 case 0x00: 54 if (Input.size() >= 4) { 55 if ( Input[1] == 0 56 && uint8_t(Input[2]) == 0xFE 57 && uint8_t(Input[3]) == 0xFF) 58 return std::make_pair(UEF_UTF32_BE, 4); 59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) 60 return std::make_pair(UEF_UTF32_BE, 0); 61 } 62 63 if (Input.size() >= 2 && Input[1] != 0) 64 return std::make_pair(UEF_UTF16_BE, 0); 65 return std::make_pair(UEF_Unknown, 0); 66 case 0xFF: 67 if ( Input.size() >= 4 68 && uint8_t(Input[1]) == 0xFE 69 && Input[2] == 0 70 && Input[3] == 0) 71 return std::make_pair(UEF_UTF32_LE, 4); 72 73 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) 74 return std::make_pair(UEF_UTF16_LE, 2); 75 return std::make_pair(UEF_Unknown, 0); 76 case 0xFE: 77 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) 78 return std::make_pair(UEF_UTF16_BE, 2); 79 return std::make_pair(UEF_Unknown, 0); 80 case 0xEF: 81 if ( Input.size() >= 3 82 && uint8_t(Input[1]) == 0xBB 83 && uint8_t(Input[2]) == 0xBF) 84 return std::make_pair(UEF_UTF8, 3); 85 return std::make_pair(UEF_Unknown, 0); 86 } 87 88 // It could still be utf-32 or utf-16. 89 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) 90 return std::make_pair(UEF_UTF32_LE, 0); 91 92 if (Input.size() >= 2 && Input[1] == 0) 93 return std::make_pair(UEF_UTF16_LE, 0); 94 95 return std::make_pair(UEF_UTF8, 0); 96 } 97 98 namespace llvm { 99 namespace yaml { 100 /// Token - A single YAML token. 101 struct Token : ilist_node<Token> { 102 enum TokenKind { 103 TK_Error, // Uninitialized token. 104 TK_StreamStart, 105 TK_StreamEnd, 106 TK_VersionDirective, 107 TK_TagDirective, 108 TK_DocumentStart, 109 TK_DocumentEnd, 110 TK_BlockEntry, 111 TK_BlockEnd, 112 TK_BlockSequenceStart, 113 TK_BlockMappingStart, 114 TK_FlowEntry, 115 TK_FlowSequenceStart, 116 TK_FlowSequenceEnd, 117 TK_FlowMappingStart, 118 TK_FlowMappingEnd, 119 TK_Key, 120 TK_Value, 121 TK_Scalar, 122 TK_Alias, 123 TK_Anchor, 124 TK_Tag 125 } Kind; 126 127 /// A string of length 0 or more whose begin() points to the logical location 128 /// of the token in the input. 129 StringRef Range; 130 131 Token() : Kind(TK_Error) {} 132 }; 133 } 134 } 135 136 namespace llvm { 137 template<> 138 struct ilist_sentinel_traits<Token> { 139 Token *createSentinel() const { 140 return &Sentinel; 141 } 142 static void destroySentinel(Token*) {} 143 144 Token *provideInitialHead() const { return createSentinel(); } 145 Token *ensureHead(Token*) const { return createSentinel(); } 146 static void noteHead(Token*, Token*) {} 147 148 private: 149 mutable Token Sentinel; 150 }; 151 152 template<> 153 struct ilist_node_traits<Token> { 154 Token *createNode(const Token &V) { 155 return new (Alloc.Allocate<Token>()) Token(V); 156 } 157 static void deleteNode(Token *V) {} 158 159 void addNodeToList(Token *) {} 160 void removeNodeFromList(Token *) {} 161 void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, 162 ilist_iterator<Token> /*first*/, 163 ilist_iterator<Token> /*last*/) {} 164 165 BumpPtrAllocator Alloc; 166 }; 167 } 168 169 typedef ilist<Token> TokenQueueT; 170 171 namespace { 172 /// @brief This struct is used to track simple keys. 173 /// 174 /// Simple keys are handled by creating an entry in SimpleKeys for each Token 175 /// which could legally be the start of a simple key. When peekNext is called, 176 /// if the Token To be returned is referenced by a SimpleKey, we continue 177 /// tokenizing until that potential simple key has either been found to not be 178 /// a simple key (we moved on to the next line or went further than 1024 chars). 179 /// Or when we run into a Value, and then insert a Key token (and possibly 180 /// others) before the SimpleKey's Tok. 181 struct SimpleKey { 182 TokenQueueT::iterator Tok; 183 unsigned Column; 184 unsigned Line; 185 unsigned FlowLevel; 186 bool IsRequired; 187 188 bool operator ==(const SimpleKey &Other) { 189 return Tok == Other.Tok; 190 } 191 }; 192 } 193 194 /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit 195 /// subsequence and the subsequence's length in code units (uint8_t). 196 /// A length of 0 represents an error. 197 typedef std::pair<uint32_t, unsigned> UTF8Decoded; 198 199 static UTF8Decoded decodeUTF8(StringRef Range) { 200 StringRef::iterator Position= Range.begin(); 201 StringRef::iterator End = Range.end(); 202 // 1 byte: [0x00, 0x7f] 203 // Bit pattern: 0xxxxxxx 204 if ((*Position & 0x80) == 0) { 205 return std::make_pair(*Position, 1); 206 } 207 // 2 bytes: [0x80, 0x7ff] 208 // Bit pattern: 110xxxxx 10xxxxxx 209 if (Position + 1 != End && 210 ((*Position & 0xE0) == 0xC0) && 211 ((*(Position + 1) & 0xC0) == 0x80)) { 212 uint32_t codepoint = ((*Position & 0x1F) << 6) | 213 (*(Position + 1) & 0x3F); 214 if (codepoint >= 0x80) 215 return std::make_pair(codepoint, 2); 216 } 217 // 3 bytes: [0x8000, 0xffff] 218 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx 219 if (Position + 2 != End && 220 ((*Position & 0xF0) == 0xE0) && 221 ((*(Position + 1) & 0xC0) == 0x80) && 222 ((*(Position + 2) & 0xC0) == 0x80)) { 223 uint32_t codepoint = ((*Position & 0x0F) << 12) | 224 ((*(Position + 1) & 0x3F) << 6) | 225 (*(Position + 2) & 0x3F); 226 // Codepoints between 0xD800 and 0xDFFF are invalid, as 227 // they are high / low surrogate halves used by UTF-16. 228 if (codepoint >= 0x800 && 229 (codepoint < 0xD800 || codepoint > 0xDFFF)) 230 return std::make_pair(codepoint, 3); 231 } 232 // 4 bytes: [0x10000, 0x10FFFF] 233 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 234 if (Position + 3 != End && 235 ((*Position & 0xF8) == 0xF0) && 236 ((*(Position + 1) & 0xC0) == 0x80) && 237 ((*(Position + 2) & 0xC0) == 0x80) && 238 ((*(Position + 3) & 0xC0) == 0x80)) { 239 uint32_t codepoint = ((*Position & 0x07) << 18) | 240 ((*(Position + 1) & 0x3F) << 12) | 241 ((*(Position + 2) & 0x3F) << 6) | 242 (*(Position + 3) & 0x3F); 243 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) 244 return std::make_pair(codepoint, 4); 245 } 246 return std::make_pair(0, 0); 247 } 248 249 namespace llvm { 250 namespace yaml { 251 /// @brief Scans YAML tokens from a MemoryBuffer. 252 class Scanner { 253 public: 254 Scanner(const StringRef Input, SourceMgr &SM); 255 256 /// @brief Parse the next token and return it without popping it. 257 Token &peekNext(); 258 259 /// @brief Parse the next token and pop it from the queue. 260 Token getNext(); 261 262 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, 263 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { 264 SM.PrintMessage(Loc, Kind, Message, Ranges); 265 } 266 267 void setError(const Twine &Message, StringRef::iterator Position) { 268 if (Current >= End) 269 Current = End - 1; 270 271 // Don't print out more errors after the first one we encounter. The rest 272 // are just the result of the first, and have no meaning. 273 if (!Failed) 274 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); 275 Failed = true; 276 } 277 278 void setError(const Twine &Message) { 279 setError(Message, Current); 280 } 281 282 /// @brief Returns true if an error occurred while parsing. 283 bool failed() { 284 return Failed; 285 } 286 287 private: 288 StringRef currentInput() { 289 return StringRef(Current, End - Current); 290 } 291 292 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting 293 /// at \a Position. 294 /// 295 /// If the UTF-8 code units starting at Position do not form a well-formed 296 /// code unit subsequence, then the Unicode scalar value is 0, and the length 297 /// is 0. 298 UTF8Decoded decodeUTF8(StringRef::iterator Position) { 299 return ::decodeUTF8(StringRef(Position, End - Position)); 300 } 301 302 // The following functions are based on the gramar rules in the YAML spec. The 303 // style of the function names it meant to closely match how they are written 304 // in the spec. The number within the [] is the number of the grammar rule in 305 // the spec. 306 // 307 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. 308 // 309 // c- 310 // A production starting and ending with a special character. 311 // b- 312 // A production matching a single line break. 313 // nb- 314 // A production starting and ending with a non-break character. 315 // s- 316 // A production starting and ending with a white space character. 317 // ns- 318 // A production starting and ending with a non-space character. 319 // l- 320 // A production matching complete line(s). 321 322 /// @brief Skip a single nb-char[27] starting at Position. 323 /// 324 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] 325 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] 326 /// 327 /// @returns The code unit after the nb-char, or Position if it's not an 328 /// nb-char. 329 StringRef::iterator skip_nb_char(StringRef::iterator Position); 330 331 /// @brief Skip a single b-break[28] starting at Position. 332 /// 333 /// A b-break is 0xD 0xA | 0xD | 0xA 334 /// 335 /// @returns The code unit after the b-break, or Position if it's not a 336 /// b-break. 337 StringRef::iterator skip_b_break(StringRef::iterator Position); 338 339 /// @brief Skip a single s-white[33] starting at Position. 340 /// 341 /// A s-white is 0x20 | 0x9 342 /// 343 /// @returns The code unit after the s-white, or Position if it's not a 344 /// s-white. 345 StringRef::iterator skip_s_white(StringRef::iterator Position); 346 347 /// @brief Skip a single ns-char[34] starting at Position. 348 /// 349 /// A ns-char is nb-char - s-white 350 /// 351 /// @returns The code unit after the ns-char, or Position if it's not a 352 /// ns-char. 353 StringRef::iterator skip_ns_char(StringRef::iterator Position); 354 355 typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); 356 /// @brief Skip minimal well-formed code unit subsequences until Func 357 /// returns its input. 358 /// 359 /// @returns The code unit after the last minimal well-formed code unit 360 /// subsequence that Func accepted. 361 StringRef::iterator skip_while( SkipWhileFunc Func 362 , StringRef::iterator Position); 363 364 /// @brief Scan ns-uri-char[39]s starting at Cur. 365 /// 366 /// This updates Cur and Column while scanning. 367 /// 368 /// @returns A StringRef starting at Cur which covers the longest contiguous 369 /// sequence of ns-uri-char. 370 StringRef scan_ns_uri_char(); 371 372 /// @brief Scan ns-plain-one-line[133] starting at \a Cur. 373 StringRef scan_ns_plain_one_line(); 374 375 /// @brief Consume a minimal well-formed code unit subsequence starting at 376 /// \a Cur. Return false if it is not the same Unicode scalar value as 377 /// \a Expected. This updates \a Column. 378 bool consume(uint32_t Expected); 379 380 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. 381 void skip(uint32_t Distance); 382 383 /// @brief Return true if the minimal well-formed code unit subsequence at 384 /// Pos is whitespace or a new line 385 bool isBlankOrBreak(StringRef::iterator Position); 386 387 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. 388 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok 389 , unsigned AtColumn 390 , bool IsRequired); 391 392 /// @brief Remove simple keys that can no longer be valid simple keys. 393 /// 394 /// Invalid simple keys are not on the current line or are further than 1024 395 /// columns back. 396 void removeStaleSimpleKeyCandidates(); 397 398 /// @brief Remove all simple keys on FlowLevel \a Level. 399 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); 400 401 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd 402 /// tokens if needed. 403 bool unrollIndent(int ToColumn); 404 405 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint 406 /// if needed. 407 bool rollIndent( int ToColumn 408 , Token::TokenKind Kind 409 , TokenQueueT::iterator InsertPoint); 410 411 /// @brief Skip whitespace and comments until the start of the next token. 412 void scanToNextToken(); 413 414 /// @brief Must be the first token generated. 415 bool scanStreamStart(); 416 417 /// @brief Generate tokens needed to close out the stream. 418 bool scanStreamEnd(); 419 420 /// @brief Scan a %BLAH directive. 421 bool scanDirective(); 422 423 /// @brief Scan a ... or ---. 424 bool scanDocumentIndicator(bool IsStart); 425 426 /// @brief Scan a [ or { and generate the proper flow collection start token. 427 bool scanFlowCollectionStart(bool IsSequence); 428 429 /// @brief Scan a ] or } and generate the proper flow collection end token. 430 bool scanFlowCollectionEnd(bool IsSequence); 431 432 /// @brief Scan the , that separates entries in a flow collection. 433 bool scanFlowEntry(); 434 435 /// @brief Scan the - that starts block sequence entries. 436 bool scanBlockEntry(); 437 438 /// @brief Scan an explicit ? indicating a key. 439 bool scanKey(); 440 441 /// @brief Scan an explicit : indicating a value. 442 bool scanValue(); 443 444 /// @brief Scan a quoted scalar. 445 bool scanFlowScalar(bool IsDoubleQuoted); 446 447 /// @brief Scan an unquoted scalar. 448 bool scanPlainScalar(); 449 450 /// @brief Scan an Alias or Anchor starting with * or &. 451 bool scanAliasOrAnchor(bool IsAlias); 452 453 /// @brief Scan a block scalar starting with | or >. 454 bool scanBlockScalar(bool IsLiteral); 455 456 /// @brief Scan a tag of the form !stuff. 457 bool scanTag(); 458 459 /// @brief Dispatch to the next scanning function based on \a *Cur. 460 bool fetchMoreTokens(); 461 462 /// @brief The SourceMgr used for diagnostics and buffer management. 463 SourceMgr &SM; 464 465 /// @brief The original input. 466 MemoryBuffer *InputBuffer; 467 468 /// @brief The current position of the scanner. 469 StringRef::iterator Current; 470 471 /// @brief The end of the input (one past the last character). 472 StringRef::iterator End; 473 474 /// @brief Current YAML indentation level in spaces. 475 int Indent; 476 477 /// @brief Current column number in Unicode code points. 478 unsigned Column; 479 480 /// @brief Current line number. 481 unsigned Line; 482 483 /// @brief How deep we are in flow style containers. 0 Means at block level. 484 unsigned FlowLevel; 485 486 /// @brief Are we at the start of the stream? 487 bool IsStartOfStream; 488 489 /// @brief Can the next token be the start of a simple key? 490 bool IsSimpleKeyAllowed; 491 492 /// @brief Is the next token required to start a simple key? 493 bool IsSimpleKeyRequired; 494 495 /// @brief True if an error has occurred. 496 bool Failed; 497 498 /// @brief Queue of tokens. This is required to queue up tokens while looking 499 /// for the end of a simple key. And for cases where a single character 500 /// can produce multiple tokens (e.g. BlockEnd). 501 TokenQueueT TokenQueue; 502 503 /// @brief Indentation levels. 504 SmallVector<int, 4> Indents; 505 506 /// @brief Potential simple keys. 507 SmallVector<SimpleKey, 4> SimpleKeys; 508 }; 509 510 } // end namespace yaml 511 } // end namespace llvm 512 513 /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. 514 static void encodeUTF8( uint32_t UnicodeScalarValue 515 , SmallVectorImpl<char> &Result) { 516 if (UnicodeScalarValue <= 0x7F) { 517 Result.push_back(UnicodeScalarValue & 0x7F); 518 } else if (UnicodeScalarValue <= 0x7FF) { 519 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); 520 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); 521 Result.push_back(FirstByte); 522 Result.push_back(SecondByte); 523 } else if (UnicodeScalarValue <= 0xFFFF) { 524 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); 525 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 526 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); 527 Result.push_back(FirstByte); 528 Result.push_back(SecondByte); 529 Result.push_back(ThirdByte); 530 } else if (UnicodeScalarValue <= 0x10FFFF) { 531 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); 532 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); 533 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 534 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); 535 Result.push_back(FirstByte); 536 Result.push_back(SecondByte); 537 Result.push_back(ThirdByte); 538 Result.push_back(FourthByte); 539 } 540 } 541 542 bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { 543 SourceMgr SM; 544 Scanner scanner(Input, SM); 545 while (true) { 546 Token T = scanner.getNext(); 547 switch (T.Kind) { 548 case Token::TK_StreamStart: 549 OS << "Stream-Start: "; 550 break; 551 case Token::TK_StreamEnd: 552 OS << "Stream-End: "; 553 break; 554 case Token::TK_VersionDirective: 555 OS << "Version-Directive: "; 556 break; 557 case Token::TK_TagDirective: 558 OS << "Tag-Directive: "; 559 break; 560 case Token::TK_DocumentStart: 561 OS << "Document-Start: "; 562 break; 563 case Token::TK_DocumentEnd: 564 OS << "Document-End: "; 565 break; 566 case Token::TK_BlockEntry: 567 OS << "Block-Entry: "; 568 break; 569 case Token::TK_BlockEnd: 570 OS << "Block-End: "; 571 break; 572 case Token::TK_BlockSequenceStart: 573 OS << "Block-Sequence-Start: "; 574 break; 575 case Token::TK_BlockMappingStart: 576 OS << "Block-Mapping-Start: "; 577 break; 578 case Token::TK_FlowEntry: 579 OS << "Flow-Entry: "; 580 break; 581 case Token::TK_FlowSequenceStart: 582 OS << "Flow-Sequence-Start: "; 583 break; 584 case Token::TK_FlowSequenceEnd: 585 OS << "Flow-Sequence-End: "; 586 break; 587 case Token::TK_FlowMappingStart: 588 OS << "Flow-Mapping-Start: "; 589 break; 590 case Token::TK_FlowMappingEnd: 591 OS << "Flow-Mapping-End: "; 592 break; 593 case Token::TK_Key: 594 OS << "Key: "; 595 break; 596 case Token::TK_Value: 597 OS << "Value: "; 598 break; 599 case Token::TK_Scalar: 600 OS << "Scalar: "; 601 break; 602 case Token::TK_Alias: 603 OS << "Alias: "; 604 break; 605 case Token::TK_Anchor: 606 OS << "Anchor: "; 607 break; 608 case Token::TK_Tag: 609 OS << "Tag: "; 610 break; 611 case Token::TK_Error: 612 break; 613 } 614 OS << T.Range << "\n"; 615 if (T.Kind == Token::TK_StreamEnd) 616 break; 617 else if (T.Kind == Token::TK_Error) 618 return false; 619 } 620 return true; 621 } 622 623 bool yaml::scanTokens(StringRef Input) { 624 llvm::SourceMgr SM; 625 llvm::yaml::Scanner scanner(Input, SM); 626 for (;;) { 627 llvm::yaml::Token T = scanner.getNext(); 628 if (T.Kind == Token::TK_StreamEnd) 629 break; 630 else if (T.Kind == Token::TK_Error) 631 return false; 632 } 633 return true; 634 } 635 636 std::string yaml::escape(StringRef Input) { 637 std::string EscapedInput; 638 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { 639 if (*i == '\\') 640 EscapedInput += "\\\\"; 641 else if (*i == '"') 642 EscapedInput += "\\\""; 643 else if (*i == 0) 644 EscapedInput += "\\0"; 645 else if (*i == 0x07) 646 EscapedInput += "\\a"; 647 else if (*i == 0x08) 648 EscapedInput += "\\b"; 649 else if (*i == 0x09) 650 EscapedInput += "\\t"; 651 else if (*i == 0x0A) 652 EscapedInput += "\\n"; 653 else if (*i == 0x0B) 654 EscapedInput += "\\v"; 655 else if (*i == 0x0C) 656 EscapedInput += "\\f"; 657 else if (*i == 0x0D) 658 EscapedInput += "\\r"; 659 else if (*i == 0x1B) 660 EscapedInput += "\\e"; 661 else if ((unsigned char)*i < 0x20) { // Control characters not handled above. 662 std::string HexStr = utohexstr(*i); 663 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 664 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. 665 UTF8Decoded UnicodeScalarValue 666 = decodeUTF8(StringRef(i, Input.end() - i)); 667 if (UnicodeScalarValue.second == 0) { 668 // Found invalid char. 669 SmallString<4> Val; 670 encodeUTF8(0xFFFD, Val); 671 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); 672 // FIXME: Error reporting. 673 return EscapedInput; 674 } 675 if (UnicodeScalarValue.first == 0x85) 676 EscapedInput += "\\N"; 677 else if (UnicodeScalarValue.first == 0xA0) 678 EscapedInput += "\\_"; 679 else if (UnicodeScalarValue.first == 0x2028) 680 EscapedInput += "\\L"; 681 else if (UnicodeScalarValue.first == 0x2029) 682 EscapedInput += "\\P"; 683 else { 684 std::string HexStr = utohexstr(UnicodeScalarValue.first); 685 if (HexStr.size() <= 2) 686 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 687 else if (HexStr.size() <= 4) 688 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; 689 else if (HexStr.size() <= 8) 690 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; 691 } 692 i += UnicodeScalarValue.second - 1; 693 } else 694 EscapedInput.push_back(*i); 695 } 696 return EscapedInput; 697 } 698 699 Scanner::Scanner(StringRef Input, SourceMgr &sm) 700 : SM(sm) 701 , Indent(-1) 702 , Column(0) 703 , Line(0) 704 , FlowLevel(0) 705 , IsStartOfStream(true) 706 , IsSimpleKeyAllowed(true) 707 , IsSimpleKeyRequired(false) 708 , Failed(false) { 709 InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); 710 SM.AddNewSourceBuffer(InputBuffer, SMLoc()); 711 Current = InputBuffer->getBufferStart(); 712 End = InputBuffer->getBufferEnd(); 713 } 714 715 Token &Scanner::peekNext() { 716 // If the current token is a possible simple key, keep parsing until we 717 // can confirm. 718 bool NeedMore = false; 719 while (true) { 720 if (TokenQueue.empty() || NeedMore) { 721 if (!fetchMoreTokens()) { 722 TokenQueue.clear(); 723 TokenQueue.push_back(Token()); 724 return TokenQueue.front(); 725 } 726 } 727 assert(!TokenQueue.empty() && 728 "fetchMoreTokens lied about getting tokens!"); 729 730 removeStaleSimpleKeyCandidates(); 731 SimpleKey SK; 732 SK.Tok = TokenQueue.front(); 733 if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) 734 == SimpleKeys.end()) 735 break; 736 else 737 NeedMore = true; 738 } 739 return TokenQueue.front(); 740 } 741 742 Token Scanner::getNext() { 743 Token Ret = peekNext(); 744 // TokenQueue can be empty if there was an error getting the next token. 745 if (!TokenQueue.empty()) 746 TokenQueue.pop_front(); 747 748 // There cannot be any referenced Token's if the TokenQueue is empty. So do a 749 // quick deallocation of them all. 750 if (TokenQueue.empty()) { 751 TokenQueue.Alloc.Reset(); 752 } 753 754 return Ret; 755 } 756 757 StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { 758 if (Position == End) 759 return Position; 760 // Check 7 bit c-printable - b-char. 761 if ( *Position == 0x09 762 || (*Position >= 0x20 && *Position <= 0x7E)) 763 return Position + 1; 764 765 // Check for valid UTF-8. 766 if (uint8_t(*Position) & 0x80) { 767 UTF8Decoded u8d = decodeUTF8(Position); 768 if ( u8d.second != 0 769 && u8d.first != 0xFEFF 770 && ( u8d.first == 0x85 771 || ( u8d.first >= 0xA0 772 && u8d.first <= 0xD7FF) 773 || ( u8d.first >= 0xE000 774 && u8d.first <= 0xFFFD) 775 || ( u8d.first >= 0x10000 776 && u8d.first <= 0x10FFFF))) 777 return Position + u8d.second; 778 } 779 return Position; 780 } 781 782 StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { 783 if (Position == End) 784 return Position; 785 if (*Position == 0x0D) { 786 if (Position + 1 != End && *(Position + 1) == 0x0A) 787 return Position + 2; 788 return Position + 1; 789 } 790 791 if (*Position == 0x0A) 792 return Position + 1; 793 return Position; 794 } 795 796 797 StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { 798 if (Position == End) 799 return Position; 800 if (*Position == ' ' || *Position == '\t') 801 return Position + 1; 802 return Position; 803 } 804 805 StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { 806 if (Position == End) 807 return Position; 808 if (*Position == ' ' || *Position == '\t') 809 return Position; 810 return skip_nb_char(Position); 811 } 812 813 StringRef::iterator Scanner::skip_while( SkipWhileFunc Func 814 , StringRef::iterator Position) { 815 while (true) { 816 StringRef::iterator i = (this->*Func)(Position); 817 if (i == Position) 818 break; 819 Position = i; 820 } 821 return Position; 822 } 823 824 static bool is_ns_hex_digit(const char C) { 825 return (C >= '0' && C <= '9') 826 || (C >= 'a' && C <= 'z') 827 || (C >= 'A' && C <= 'Z'); 828 } 829 830 static bool is_ns_word_char(const char C) { 831 return C == '-' 832 || (C >= 'a' && C <= 'z') 833 || (C >= 'A' && C <= 'Z'); 834 } 835 836 StringRef Scanner::scan_ns_uri_char() { 837 StringRef::iterator Start = Current; 838 while (true) { 839 if (Current == End) 840 break; 841 if (( *Current == '%' 842 && Current + 2 < End 843 && is_ns_hex_digit(*(Current + 1)) 844 && is_ns_hex_digit(*(Current + 2))) 845 || is_ns_word_char(*Current) 846 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") 847 != StringRef::npos) { 848 ++Current; 849 ++Column; 850 } else 851 break; 852 } 853 return StringRef(Start, Current - Start); 854 } 855 856 StringRef Scanner::scan_ns_plain_one_line() { 857 StringRef::iterator start = Current; 858 // The first character must already be verified. 859 ++Current; 860 while (true) { 861 if (Current == End) { 862 break; 863 } else if (*Current == ':') { 864 // Check if the next character is a ns-char. 865 if (Current + 1 == End) 866 break; 867 StringRef::iterator i = skip_ns_char(Current + 1); 868 if (Current + 1 != i) { 869 Current = i; 870 Column += 2; // Consume both the ':' and ns-char. 871 } else 872 break; 873 } else if (*Current == '#') { 874 // Check if the previous character was a ns-char. 875 // The & 0x80 check is to check for the trailing byte of a utf-8 876 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { 877 ++Current; 878 ++Column; 879 } else 880 break; 881 } else { 882 StringRef::iterator i = skip_nb_char(Current); 883 if (i == Current) 884 break; 885 Current = i; 886 ++Column; 887 } 888 } 889 return StringRef(start, Current - start); 890 } 891 892 bool Scanner::consume(uint32_t Expected) { 893 if (Expected >= 0x80) 894 report_fatal_error("Not dealing with this yet"); 895 if (Current == End) 896 return false; 897 if (uint8_t(*Current) >= 0x80) 898 report_fatal_error("Not dealing with this yet"); 899 if (uint8_t(*Current) == Expected) { 900 ++Current; 901 ++Column; 902 return true; 903 } 904 return false; 905 } 906 907 void Scanner::skip(uint32_t Distance) { 908 Current += Distance; 909 Column += Distance; 910 } 911 912 bool Scanner::isBlankOrBreak(StringRef::iterator Position) { 913 if (Position == End) 914 return false; 915 if ( *Position == ' ' || *Position == '\t' 916 || *Position == '\r' || *Position == '\n') 917 return true; 918 return false; 919 } 920 921 void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok 922 , unsigned AtColumn 923 , bool IsRequired) { 924 if (IsSimpleKeyAllowed) { 925 SimpleKey SK; 926 SK.Tok = Tok; 927 SK.Line = Line; 928 SK.Column = AtColumn; 929 SK.IsRequired = IsRequired; 930 SK.FlowLevel = FlowLevel; 931 SimpleKeys.push_back(SK); 932 } 933 } 934 935 void Scanner::removeStaleSimpleKeyCandidates() { 936 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); 937 i != SimpleKeys.end();) { 938 if (i->Line != Line || i->Column + 1024 < Column) { 939 if (i->IsRequired) 940 setError( "Could not find expected : for simple key" 941 , i->Tok->Range.begin()); 942 i = SimpleKeys.erase(i); 943 } else 944 ++i; 945 } 946 } 947 948 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { 949 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) 950 SimpleKeys.pop_back(); 951 } 952 953 bool Scanner::unrollIndent(int ToColumn) { 954 Token T; 955 // Indentation is ignored in flow. 956 if (FlowLevel != 0) 957 return true; 958 959 while (Indent > ToColumn) { 960 T.Kind = Token::TK_BlockEnd; 961 T.Range = StringRef(Current, 1); 962 TokenQueue.push_back(T); 963 Indent = Indents.pop_back_val(); 964 } 965 966 return true; 967 } 968 969 bool Scanner::rollIndent( int ToColumn 970 , Token::TokenKind Kind 971 , TokenQueueT::iterator InsertPoint) { 972 if (FlowLevel) 973 return true; 974 if (Indent < ToColumn) { 975 Indents.push_back(Indent); 976 Indent = ToColumn; 977 978 Token T; 979 T.Kind = Kind; 980 T.Range = StringRef(Current, 0); 981 TokenQueue.insert(InsertPoint, T); 982 } 983 return true; 984 } 985 986 void Scanner::scanToNextToken() { 987 while (true) { 988 while (*Current == ' ' || *Current == '\t') { 989 skip(1); 990 } 991 992 // Skip comment. 993 if (*Current == '#') { 994 while (true) { 995 // This may skip more than one byte, thus Column is only incremented 996 // for code points. 997 StringRef::iterator i = skip_nb_char(Current); 998 if (i == Current) 999 break; 1000 Current = i; 1001 ++Column; 1002 } 1003 } 1004 1005 // Skip EOL. 1006 StringRef::iterator i = skip_b_break(Current); 1007 if (i == Current) 1008 break; 1009 Current = i; 1010 ++Line; 1011 Column = 0; 1012 // New lines may start a simple key. 1013 if (!FlowLevel) 1014 IsSimpleKeyAllowed = true; 1015 } 1016 } 1017 1018 bool Scanner::scanStreamStart() { 1019 IsStartOfStream = false; 1020 1021 EncodingInfo EI = getUnicodeEncoding(currentInput()); 1022 1023 Token T; 1024 T.Kind = Token::TK_StreamStart; 1025 T.Range = StringRef(Current, EI.second); 1026 TokenQueue.push_back(T); 1027 Current += EI.second; 1028 return true; 1029 } 1030 1031 bool Scanner::scanStreamEnd() { 1032 // Force an ending new line if one isn't present. 1033 if (Column != 0) { 1034 Column = 0; 1035 ++Line; 1036 } 1037 1038 unrollIndent(-1); 1039 SimpleKeys.clear(); 1040 IsSimpleKeyAllowed = false; 1041 1042 Token T; 1043 T.Kind = Token::TK_StreamEnd; 1044 T.Range = StringRef(Current, 0); 1045 TokenQueue.push_back(T); 1046 return true; 1047 } 1048 1049 bool Scanner::scanDirective() { 1050 // Reset the indentation level. 1051 unrollIndent(-1); 1052 SimpleKeys.clear(); 1053 IsSimpleKeyAllowed = false; 1054 1055 StringRef::iterator Start = Current; 1056 consume('%'); 1057 StringRef::iterator NameStart = Current; 1058 Current = skip_while(&Scanner::skip_ns_char, Current); 1059 StringRef Name(NameStart, Current - NameStart); 1060 Current = skip_while(&Scanner::skip_s_white, Current); 1061 1062 if (Name == "YAML") { 1063 Current = skip_while(&Scanner::skip_ns_char, Current); 1064 Token T; 1065 T.Kind = Token::TK_VersionDirective; 1066 T.Range = StringRef(Start, Current - Start); 1067 TokenQueue.push_back(T); 1068 return true; 1069 } 1070 return false; 1071 } 1072 1073 bool Scanner::scanDocumentIndicator(bool IsStart) { 1074 unrollIndent(-1); 1075 SimpleKeys.clear(); 1076 IsSimpleKeyAllowed = false; 1077 1078 Token T; 1079 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; 1080 T.Range = StringRef(Current, 3); 1081 skip(3); 1082 TokenQueue.push_back(T); 1083 return true; 1084 } 1085 1086 bool Scanner::scanFlowCollectionStart(bool IsSequence) { 1087 Token T; 1088 T.Kind = IsSequence ? Token::TK_FlowSequenceStart 1089 : Token::TK_FlowMappingStart; 1090 T.Range = StringRef(Current, 1); 1091 skip(1); 1092 TokenQueue.push_back(T); 1093 1094 // [ and { may begin a simple key. 1095 saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); 1096 1097 // And may also be followed by a simple key. 1098 IsSimpleKeyAllowed = true; 1099 ++FlowLevel; 1100 return true; 1101 } 1102 1103 bool Scanner::scanFlowCollectionEnd(bool IsSequence) { 1104 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1105 IsSimpleKeyAllowed = false; 1106 Token T; 1107 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd 1108 : Token::TK_FlowMappingEnd; 1109 T.Range = StringRef(Current, 1); 1110 skip(1); 1111 TokenQueue.push_back(T); 1112 if (FlowLevel) 1113 --FlowLevel; 1114 return true; 1115 } 1116 1117 bool Scanner::scanFlowEntry() { 1118 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1119 IsSimpleKeyAllowed = true; 1120 Token T; 1121 T.Kind = Token::TK_FlowEntry; 1122 T.Range = StringRef(Current, 1); 1123 skip(1); 1124 TokenQueue.push_back(T); 1125 return true; 1126 } 1127 1128 bool Scanner::scanBlockEntry() { 1129 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); 1130 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1131 IsSimpleKeyAllowed = true; 1132 Token T; 1133 T.Kind = Token::TK_BlockEntry; 1134 T.Range = StringRef(Current, 1); 1135 skip(1); 1136 TokenQueue.push_back(T); 1137 return true; 1138 } 1139 1140 bool Scanner::scanKey() { 1141 if (!FlowLevel) 1142 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1143 1144 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1145 IsSimpleKeyAllowed = !FlowLevel; 1146 1147 Token T; 1148 T.Kind = Token::TK_Key; 1149 T.Range = StringRef(Current, 1); 1150 skip(1); 1151 TokenQueue.push_back(T); 1152 return true; 1153 } 1154 1155 bool Scanner::scanValue() { 1156 // If the previous token could have been a simple key, insert the key token 1157 // into the token queue. 1158 if (!SimpleKeys.empty()) { 1159 SimpleKey SK = SimpleKeys.pop_back_val(); 1160 Token T; 1161 T.Kind = Token::TK_Key; 1162 T.Range = SK.Tok->Range; 1163 TokenQueueT::iterator i, e; 1164 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { 1165 if (i == SK.Tok) 1166 break; 1167 } 1168 assert(i != e && "SimpleKey not in token queue!"); 1169 i = TokenQueue.insert(i, T); 1170 1171 // We may also need to add a Block-Mapping-Start token. 1172 rollIndent(SK.Column, Token::TK_BlockMappingStart, i); 1173 1174 IsSimpleKeyAllowed = false; 1175 } else { 1176 if (!FlowLevel) 1177 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1178 IsSimpleKeyAllowed = !FlowLevel; 1179 } 1180 1181 Token T; 1182 T.Kind = Token::TK_Value; 1183 T.Range = StringRef(Current, 1); 1184 skip(1); 1185 TokenQueue.push_back(T); 1186 return true; 1187 } 1188 1189 // Forbidding inlining improves performance by roughly 20%. 1190 // FIXME: Remove once llvm optimizes this to the faster version without hints. 1191 LLVM_ATTRIBUTE_NOINLINE static bool 1192 wasEscaped(StringRef::iterator First, StringRef::iterator Position); 1193 1194 // Returns whether a character at 'Position' was escaped with a leading '\'. 1195 // 'First' specifies the position of the first character in the string. 1196 static bool wasEscaped(StringRef::iterator First, 1197 StringRef::iterator Position) { 1198 assert(Position - 1 >= First); 1199 StringRef::iterator I = Position - 1; 1200 // We calculate the number of consecutive '\'s before the current position 1201 // by iterating backwards through our string. 1202 while (I >= First && *I == '\\') --I; 1203 // (Position - 1 - I) now contains the number of '\'s before the current 1204 // position. If it is odd, the character at 'Position' was escaped. 1205 return (Position - 1 - I) % 2 == 1; 1206 } 1207 1208 bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { 1209 StringRef::iterator Start = Current; 1210 unsigned ColStart = Column; 1211 if (IsDoubleQuoted) { 1212 do { 1213 ++Current; 1214 while (Current != End && *Current != '"') 1215 ++Current; 1216 // Repeat until the previous character was not a '\' or was an escaped 1217 // backslash. 1218 } while ( Current != End 1219 && *(Current - 1) == '\\' 1220 && wasEscaped(Start + 1, Current)); 1221 } else { 1222 skip(1); 1223 while (true) { 1224 // Skip a ' followed by another '. 1225 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { 1226 skip(2); 1227 continue; 1228 } else if (*Current == '\'') 1229 break; 1230 StringRef::iterator i = skip_nb_char(Current); 1231 if (i == Current) { 1232 i = skip_b_break(Current); 1233 if (i == Current) 1234 break; 1235 Current = i; 1236 Column = 0; 1237 ++Line; 1238 } else { 1239 if (i == End) 1240 break; 1241 Current = i; 1242 ++Column; 1243 } 1244 } 1245 } 1246 skip(1); // Skip ending quote. 1247 Token T; 1248 T.Kind = Token::TK_Scalar; 1249 T.Range = StringRef(Start, Current - Start); 1250 TokenQueue.push_back(T); 1251 1252 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1253 1254 IsSimpleKeyAllowed = false; 1255 1256 return true; 1257 } 1258 1259 bool Scanner::scanPlainScalar() { 1260 StringRef::iterator Start = Current; 1261 unsigned ColStart = Column; 1262 unsigned LeadingBlanks = 0; 1263 assert(Indent >= -1 && "Indent must be >= -1 !"); 1264 unsigned indent = static_cast<unsigned>(Indent + 1); 1265 while (true) { 1266 if (*Current == '#') 1267 break; 1268 1269 while (!isBlankOrBreak(Current)) { 1270 if ( FlowLevel && *Current == ':' 1271 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { 1272 setError("Found unexpected ':' while scanning a plain scalar", Current); 1273 return false; 1274 } 1275 1276 // Check for the end of the plain scalar. 1277 if ( (*Current == ':' && isBlankOrBreak(Current + 1)) 1278 || ( FlowLevel 1279 && (StringRef(Current, 1).find_first_of(",:?[]{}") 1280 != StringRef::npos))) 1281 break; 1282 1283 StringRef::iterator i = skip_nb_char(Current); 1284 if (i == Current) 1285 break; 1286 Current = i; 1287 ++Column; 1288 } 1289 1290 // Are we at the end? 1291 if (!isBlankOrBreak(Current)) 1292 break; 1293 1294 // Eat blanks. 1295 StringRef::iterator Tmp = Current; 1296 while (isBlankOrBreak(Tmp)) { 1297 StringRef::iterator i = skip_s_white(Tmp); 1298 if (i != Tmp) { 1299 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { 1300 setError("Found invalid tab character in indentation", Tmp); 1301 return false; 1302 } 1303 Tmp = i; 1304 ++Column; 1305 } else { 1306 i = skip_b_break(Tmp); 1307 if (!LeadingBlanks) 1308 LeadingBlanks = 1; 1309 Tmp = i; 1310 Column = 0; 1311 ++Line; 1312 } 1313 } 1314 1315 if (!FlowLevel && Column < indent) 1316 break; 1317 1318 Current = Tmp; 1319 } 1320 if (Start == Current) { 1321 setError("Got empty plain scalar", Start); 1322 return false; 1323 } 1324 Token T; 1325 T.Kind = Token::TK_Scalar; 1326 T.Range = StringRef(Start, Current - Start); 1327 TokenQueue.push_back(T); 1328 1329 // Plain scalars can be simple keys. 1330 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1331 1332 IsSimpleKeyAllowed = false; 1333 1334 return true; 1335 } 1336 1337 bool Scanner::scanAliasOrAnchor(bool IsAlias) { 1338 StringRef::iterator Start = Current; 1339 unsigned ColStart = Column; 1340 skip(1); 1341 while(true) { 1342 if ( *Current == '[' || *Current == ']' 1343 || *Current == '{' || *Current == '}' 1344 || *Current == ',' 1345 || *Current == ':') 1346 break; 1347 StringRef::iterator i = skip_ns_char(Current); 1348 if (i == Current) 1349 break; 1350 Current = i; 1351 ++Column; 1352 } 1353 1354 if (Start == Current) { 1355 setError("Got empty alias or anchor", Start); 1356 return false; 1357 } 1358 1359 Token T; 1360 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; 1361 T.Range = StringRef(Start, Current - Start); 1362 TokenQueue.push_back(T); 1363 1364 // Alias and anchors can be simple keys. 1365 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1366 1367 IsSimpleKeyAllowed = false; 1368 1369 return true; 1370 } 1371 1372 bool Scanner::scanBlockScalar(bool IsLiteral) { 1373 StringRef::iterator Start = Current; 1374 skip(1); // Eat | or > 1375 while(true) { 1376 StringRef::iterator i = skip_nb_char(Current); 1377 if (i == Current) { 1378 if (Column == 0) 1379 break; 1380 i = skip_b_break(Current); 1381 if (i != Current) { 1382 // We got a line break. 1383 Column = 0; 1384 ++Line; 1385 Current = i; 1386 continue; 1387 } else { 1388 // There was an error, which should already have been printed out. 1389 return false; 1390 } 1391 } 1392 Current = i; 1393 ++Column; 1394 } 1395 1396 if (Start == Current) { 1397 setError("Got empty block scalar", Start); 1398 return false; 1399 } 1400 1401 Token T; 1402 T.Kind = Token::TK_Scalar; 1403 T.Range = StringRef(Start, Current - Start); 1404 TokenQueue.push_back(T); 1405 return true; 1406 } 1407 1408 bool Scanner::scanTag() { 1409 StringRef::iterator Start = Current; 1410 unsigned ColStart = Column; 1411 skip(1); // Eat !. 1412 if (Current == End || isBlankOrBreak(Current)); // An empty tag. 1413 else if (*Current == '<') { 1414 skip(1); 1415 scan_ns_uri_char(); 1416 if (!consume('>')) 1417 return false; 1418 } else { 1419 // FIXME: Actually parse the c-ns-shorthand-tag rule. 1420 Current = skip_while(&Scanner::skip_ns_char, Current); 1421 } 1422 1423 Token T; 1424 T.Kind = Token::TK_Tag; 1425 T.Range = StringRef(Start, Current - Start); 1426 TokenQueue.push_back(T); 1427 1428 // Tags can be simple keys. 1429 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1430 1431 IsSimpleKeyAllowed = false; 1432 1433 return true; 1434 } 1435 1436 bool Scanner::fetchMoreTokens() { 1437 if (IsStartOfStream) 1438 return scanStreamStart(); 1439 1440 scanToNextToken(); 1441 1442 if (Current == End) 1443 return scanStreamEnd(); 1444 1445 removeStaleSimpleKeyCandidates(); 1446 1447 unrollIndent(Column); 1448 1449 if (Column == 0 && *Current == '%') 1450 return scanDirective(); 1451 1452 if (Column == 0 && Current + 4 <= End 1453 && *Current == '-' 1454 && *(Current + 1) == '-' 1455 && *(Current + 2) == '-' 1456 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1457 return scanDocumentIndicator(true); 1458 1459 if (Column == 0 && Current + 4 <= End 1460 && *Current == '.' 1461 && *(Current + 1) == '.' 1462 && *(Current + 2) == '.' 1463 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1464 return scanDocumentIndicator(false); 1465 1466 if (*Current == '[') 1467 return scanFlowCollectionStart(true); 1468 1469 if (*Current == '{') 1470 return scanFlowCollectionStart(false); 1471 1472 if (*Current == ']') 1473 return scanFlowCollectionEnd(true); 1474 1475 if (*Current == '}') 1476 return scanFlowCollectionEnd(false); 1477 1478 if (*Current == ',') 1479 return scanFlowEntry(); 1480 1481 if (*Current == '-' && isBlankOrBreak(Current + 1)) 1482 return scanBlockEntry(); 1483 1484 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) 1485 return scanKey(); 1486 1487 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) 1488 return scanValue(); 1489 1490 if (*Current == '*') 1491 return scanAliasOrAnchor(true); 1492 1493 if (*Current == '&') 1494 return scanAliasOrAnchor(false); 1495 1496 if (*Current == '!') 1497 return scanTag(); 1498 1499 if (*Current == '|' && !FlowLevel) 1500 return scanBlockScalar(true); 1501 1502 if (*Current == '>' && !FlowLevel) 1503 return scanBlockScalar(false); 1504 1505 if (*Current == '\'') 1506 return scanFlowScalar(false); 1507 1508 if (*Current == '"') 1509 return scanFlowScalar(true); 1510 1511 // Get a plain scalar. 1512 StringRef FirstChar(Current, 1); 1513 if (!(isBlankOrBreak(Current) 1514 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) 1515 || (*Current == '-' && !isBlankOrBreak(Current + 1)) 1516 || (!FlowLevel && (*Current == '?' || *Current == ':') 1517 && isBlankOrBreak(Current + 1)) 1518 || (!FlowLevel && *Current == ':' 1519 && Current + 2 < End 1520 && *(Current + 1) == ':' 1521 && !isBlankOrBreak(Current + 2))) 1522 return scanPlainScalar(); 1523 1524 setError("Unrecognized character while tokenizing."); 1525 return false; 1526 } 1527 1528 Stream::Stream(StringRef Input, SourceMgr &SM) 1529 : scanner(new Scanner(Input, SM)) 1530 , CurrentDoc(0) {} 1531 1532 Stream::~Stream() {} 1533 1534 bool Stream::failed() { return scanner->failed(); } 1535 1536 void Stream::printError(Node *N, const Twine &Msg) { 1537 SmallVector<SMRange, 1> Ranges; 1538 Ranges.push_back(N->getSourceRange()); 1539 scanner->printError( N->getSourceRange().Start 1540 , SourceMgr::DK_Error 1541 , Msg 1542 , Ranges); 1543 } 1544 1545 void Stream::handleYAMLDirective(const Token &t) { 1546 // TODO: Ensure version is 1.x. 1547 } 1548 1549 document_iterator Stream::begin() { 1550 if (CurrentDoc) 1551 report_fatal_error("Can only iterate over the stream once"); 1552 1553 // Skip Stream-Start. 1554 scanner->getNext(); 1555 1556 CurrentDoc.reset(new Document(*this)); 1557 return document_iterator(CurrentDoc); 1558 } 1559 1560 document_iterator Stream::end() { 1561 return document_iterator(); 1562 } 1563 1564 void Stream::skip() { 1565 for (document_iterator i = begin(), e = end(); i != e; ++i) 1566 i->skip(); 1567 } 1568 1569 Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) 1570 : Doc(D) 1571 , TypeID(Type) 1572 , Anchor(A) { 1573 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); 1574 SourceRange = SMRange(Start, Start); 1575 } 1576 1577 Token &Node::peekNext() { 1578 return Doc->peekNext(); 1579 } 1580 1581 Token Node::getNext() { 1582 return Doc->getNext(); 1583 } 1584 1585 Node *Node::parseBlockNode() { 1586 return Doc->parseBlockNode(); 1587 } 1588 1589 BumpPtrAllocator &Node::getAllocator() { 1590 return Doc->NodeAllocator; 1591 } 1592 1593 void Node::setError(const Twine &Msg, Token &Tok) const { 1594 Doc->setError(Msg, Tok); 1595 } 1596 1597 bool Node::failed() const { 1598 return Doc->failed(); 1599 } 1600 1601 1602 1603 StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { 1604 // TODO: Handle newlines properly. We need to remove leading whitespace. 1605 if (Value[0] == '"') { // Double quoted. 1606 // Pull off the leading and trailing "s. 1607 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1608 // Search for characters that would require unescaping the value. 1609 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); 1610 if (i != StringRef::npos) 1611 return unescapeDoubleQuoted(UnquotedValue, i, Storage); 1612 return UnquotedValue; 1613 } else if (Value[0] == '\'') { // Single quoted. 1614 // Pull off the leading and trailing 's. 1615 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1616 StringRef::size_type i = UnquotedValue.find('\''); 1617 if (i != StringRef::npos) { 1618 // We're going to need Storage. 1619 Storage.clear(); 1620 Storage.reserve(UnquotedValue.size()); 1621 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { 1622 StringRef Valid(UnquotedValue.begin(), i); 1623 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1624 Storage.push_back('\''); 1625 UnquotedValue = UnquotedValue.substr(i + 2); 1626 } 1627 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1628 return StringRef(Storage.begin(), Storage.size()); 1629 } 1630 return UnquotedValue; 1631 } 1632 // Plain or block. 1633 return Value.rtrim(" "); 1634 } 1635 1636 StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue 1637 , StringRef::size_type i 1638 , SmallVectorImpl<char> &Storage) 1639 const { 1640 // Use Storage to build proper value. 1641 Storage.clear(); 1642 Storage.reserve(UnquotedValue.size()); 1643 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { 1644 // Insert all previous chars into Storage. 1645 StringRef Valid(UnquotedValue.begin(), i); 1646 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1647 // Chop off inserted chars. 1648 UnquotedValue = UnquotedValue.substr(i); 1649 1650 assert(!UnquotedValue.empty() && "Can't be empty!"); 1651 1652 // Parse escape or line break. 1653 switch (UnquotedValue[0]) { 1654 case '\r': 1655 case '\n': 1656 Storage.push_back('\n'); 1657 if ( UnquotedValue.size() > 1 1658 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1659 UnquotedValue = UnquotedValue.substr(1); 1660 UnquotedValue = UnquotedValue.substr(1); 1661 break; 1662 default: 1663 if (UnquotedValue.size() == 1) 1664 // TODO: Report error. 1665 break; 1666 UnquotedValue = UnquotedValue.substr(1); 1667 switch (UnquotedValue[0]) { 1668 default: { 1669 Token T; 1670 T.Range = StringRef(UnquotedValue.begin(), 1); 1671 setError("Unrecognized escape code!", T); 1672 return ""; 1673 } 1674 case '\r': 1675 case '\n': 1676 // Remove the new line. 1677 if ( UnquotedValue.size() > 1 1678 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1679 UnquotedValue = UnquotedValue.substr(1); 1680 // If this was just a single byte newline, it will get skipped 1681 // below. 1682 break; 1683 case '0': 1684 Storage.push_back(0x00); 1685 break; 1686 case 'a': 1687 Storage.push_back(0x07); 1688 break; 1689 case 'b': 1690 Storage.push_back(0x08); 1691 break; 1692 case 't': 1693 case 0x09: 1694 Storage.push_back(0x09); 1695 break; 1696 case 'n': 1697 Storage.push_back(0x0A); 1698 break; 1699 case 'v': 1700 Storage.push_back(0x0B); 1701 break; 1702 case 'f': 1703 Storage.push_back(0x0C); 1704 break; 1705 case 'r': 1706 Storage.push_back(0x0D); 1707 break; 1708 case 'e': 1709 Storage.push_back(0x1B); 1710 break; 1711 case ' ': 1712 Storage.push_back(0x20); 1713 break; 1714 case '"': 1715 Storage.push_back(0x22); 1716 break; 1717 case '/': 1718 Storage.push_back(0x2F); 1719 break; 1720 case '\\': 1721 Storage.push_back(0x5C); 1722 break; 1723 case 'N': 1724 encodeUTF8(0x85, Storage); 1725 break; 1726 case '_': 1727 encodeUTF8(0xA0, Storage); 1728 break; 1729 case 'L': 1730 encodeUTF8(0x2028, Storage); 1731 break; 1732 case 'P': 1733 encodeUTF8(0x2029, Storage); 1734 break; 1735 case 'x': { 1736 if (UnquotedValue.size() < 3) 1737 // TODO: Report error. 1738 break; 1739 unsigned int UnicodeScalarValue; 1740 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) 1741 // TODO: Report error. 1742 UnicodeScalarValue = 0xFFFD; 1743 encodeUTF8(UnicodeScalarValue, Storage); 1744 UnquotedValue = UnquotedValue.substr(2); 1745 break; 1746 } 1747 case 'u': { 1748 if (UnquotedValue.size() < 5) 1749 // TODO: Report error. 1750 break; 1751 unsigned int UnicodeScalarValue; 1752 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) 1753 // TODO: Report error. 1754 UnicodeScalarValue = 0xFFFD; 1755 encodeUTF8(UnicodeScalarValue, Storage); 1756 UnquotedValue = UnquotedValue.substr(4); 1757 break; 1758 } 1759 case 'U': { 1760 if (UnquotedValue.size() < 9) 1761 // TODO: Report error. 1762 break; 1763 unsigned int UnicodeScalarValue; 1764 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) 1765 // TODO: Report error. 1766 UnicodeScalarValue = 0xFFFD; 1767 encodeUTF8(UnicodeScalarValue, Storage); 1768 UnquotedValue = UnquotedValue.substr(8); 1769 break; 1770 } 1771 } 1772 UnquotedValue = UnquotedValue.substr(1); 1773 } 1774 } 1775 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1776 return StringRef(Storage.begin(), Storage.size()); 1777 } 1778 1779 Node *KeyValueNode::getKey() { 1780 if (Key) 1781 return Key; 1782 // Handle implicit null keys. 1783 { 1784 Token &t = peekNext(); 1785 if ( t.Kind == Token::TK_BlockEnd 1786 || t.Kind == Token::TK_Value 1787 || t.Kind == Token::TK_Error) { 1788 return Key = new (getAllocator()) NullNode(Doc); 1789 } 1790 if (t.Kind == Token::TK_Key) 1791 getNext(); // skip TK_Key. 1792 } 1793 1794 // Handle explicit null keys. 1795 Token &t = peekNext(); 1796 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { 1797 return Key = new (getAllocator()) NullNode(Doc); 1798 } 1799 1800 // We've got a normal key. 1801 return Key = parseBlockNode(); 1802 } 1803 1804 Node *KeyValueNode::getValue() { 1805 if (Value) 1806 return Value; 1807 getKey()->skip(); 1808 if (failed()) 1809 return Value = new (getAllocator()) NullNode(Doc); 1810 1811 // Handle implicit null values. 1812 { 1813 Token &t = peekNext(); 1814 if ( t.Kind == Token::TK_BlockEnd 1815 || t.Kind == Token::TK_FlowMappingEnd 1816 || t.Kind == Token::TK_Key 1817 || t.Kind == Token::TK_FlowEntry 1818 || t.Kind == Token::TK_Error) { 1819 return Value = new (getAllocator()) NullNode(Doc); 1820 } 1821 1822 if (t.Kind != Token::TK_Value) { 1823 setError("Unexpected token in Key Value.", t); 1824 return Value = new (getAllocator()) NullNode(Doc); 1825 } 1826 getNext(); // skip TK_Value. 1827 } 1828 1829 // Handle explicit null values. 1830 Token &t = peekNext(); 1831 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { 1832 return Value = new (getAllocator()) NullNode(Doc); 1833 } 1834 1835 // We got a normal value. 1836 return Value = parseBlockNode(); 1837 } 1838 1839 void MappingNode::increment() { 1840 if (failed()) { 1841 IsAtEnd = true; 1842 CurrentEntry = 0; 1843 return; 1844 } 1845 if (CurrentEntry) { 1846 CurrentEntry->skip(); 1847 if (Type == MT_Inline) { 1848 IsAtEnd = true; 1849 CurrentEntry = 0; 1850 return; 1851 } 1852 } 1853 Token T = peekNext(); 1854 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { 1855 // KeyValueNode eats the TK_Key. That way it can detect null keys. 1856 CurrentEntry = new (getAllocator()) KeyValueNode(Doc); 1857 } else if (Type == MT_Block) { 1858 switch (T.Kind) { 1859 case Token::TK_BlockEnd: 1860 getNext(); 1861 IsAtEnd = true; 1862 CurrentEntry = 0; 1863 break; 1864 default: 1865 setError("Unexpected token. Expected Key or Block End", T); 1866 case Token::TK_Error: 1867 IsAtEnd = true; 1868 CurrentEntry = 0; 1869 } 1870 } else { 1871 switch (T.Kind) { 1872 case Token::TK_FlowEntry: 1873 // Eat the flow entry and recurse. 1874 getNext(); 1875 return increment(); 1876 case Token::TK_FlowMappingEnd: 1877 getNext(); 1878 case Token::TK_Error: 1879 // Set this to end iterator. 1880 IsAtEnd = true; 1881 CurrentEntry = 0; 1882 break; 1883 default: 1884 setError( "Unexpected token. Expected Key, Flow Entry, or Flow " 1885 "Mapping End." 1886 , T); 1887 IsAtEnd = true; 1888 CurrentEntry = 0; 1889 } 1890 } 1891 } 1892 1893 void SequenceNode::increment() { 1894 if (failed()) { 1895 IsAtEnd = true; 1896 CurrentEntry = 0; 1897 return; 1898 } 1899 if (CurrentEntry) 1900 CurrentEntry->skip(); 1901 Token T = peekNext(); 1902 if (SeqType == ST_Block) { 1903 switch (T.Kind) { 1904 case Token::TK_BlockEntry: 1905 getNext(); 1906 CurrentEntry = parseBlockNode(); 1907 if (CurrentEntry == 0) { // An error occurred. 1908 IsAtEnd = true; 1909 CurrentEntry = 0; 1910 } 1911 break; 1912 case Token::TK_BlockEnd: 1913 getNext(); 1914 IsAtEnd = true; 1915 CurrentEntry = 0; 1916 break; 1917 default: 1918 setError( "Unexpected token. Expected Block Entry or Block End." 1919 , T); 1920 case Token::TK_Error: 1921 IsAtEnd = true; 1922 CurrentEntry = 0; 1923 } 1924 } else if (SeqType == ST_Indentless) { 1925 switch (T.Kind) { 1926 case Token::TK_BlockEntry: 1927 getNext(); 1928 CurrentEntry = parseBlockNode(); 1929 if (CurrentEntry == 0) { // An error occurred. 1930 IsAtEnd = true; 1931 CurrentEntry = 0; 1932 } 1933 break; 1934 default: 1935 case Token::TK_Error: 1936 IsAtEnd = true; 1937 CurrentEntry = 0; 1938 } 1939 } else if (SeqType == ST_Flow) { 1940 switch (T.Kind) { 1941 case Token::TK_FlowEntry: 1942 // Eat the flow entry and recurse. 1943 getNext(); 1944 WasPreviousTokenFlowEntry = true; 1945 return increment(); 1946 case Token::TK_FlowSequenceEnd: 1947 getNext(); 1948 case Token::TK_Error: 1949 // Set this to end iterator. 1950 IsAtEnd = true; 1951 CurrentEntry = 0; 1952 break; 1953 case Token::TK_StreamEnd: 1954 case Token::TK_DocumentEnd: 1955 case Token::TK_DocumentStart: 1956 setError("Could not find closing ]!", T); 1957 // Set this to end iterator. 1958 IsAtEnd = true; 1959 CurrentEntry = 0; 1960 break; 1961 default: 1962 if (!WasPreviousTokenFlowEntry) { 1963 setError("Expected , between entries!", T); 1964 IsAtEnd = true; 1965 CurrentEntry = 0; 1966 break; 1967 } 1968 // Otherwise it must be a flow entry. 1969 CurrentEntry = parseBlockNode(); 1970 if (!CurrentEntry) { 1971 IsAtEnd = true; 1972 } 1973 WasPreviousTokenFlowEntry = false; 1974 break; 1975 } 1976 } 1977 } 1978 1979 Document::Document(Stream &S) : stream(S), Root(0) { 1980 if (parseDirectives()) 1981 expectToken(Token::TK_DocumentStart); 1982 Token &T = peekNext(); 1983 if (T.Kind == Token::TK_DocumentStart) 1984 getNext(); 1985 } 1986 1987 bool Document::skip() { 1988 if (stream.scanner->failed()) 1989 return false; 1990 if (!Root) 1991 getRoot(); 1992 Root->skip(); 1993 Token &T = peekNext(); 1994 if (T.Kind == Token::TK_StreamEnd) 1995 return false; 1996 if (T.Kind == Token::TK_DocumentEnd) { 1997 getNext(); 1998 return skip(); 1999 } 2000 return true; 2001 } 2002 2003 Token &Document::peekNext() { 2004 return stream.scanner->peekNext(); 2005 } 2006 2007 Token Document::getNext() { 2008 return stream.scanner->getNext(); 2009 } 2010 2011 void Document::setError(const Twine &Message, Token &Location) const { 2012 stream.scanner->setError(Message, Location.Range.begin()); 2013 } 2014 2015 bool Document::failed() const { 2016 return stream.scanner->failed(); 2017 } 2018 2019 Node *Document::parseBlockNode() { 2020 Token T = peekNext(); 2021 // Handle properties. 2022 Token AnchorInfo; 2023 parse_property: 2024 switch (T.Kind) { 2025 case Token::TK_Alias: 2026 getNext(); 2027 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); 2028 case Token::TK_Anchor: 2029 if (AnchorInfo.Kind == Token::TK_Anchor) { 2030 setError("Already encountered an anchor for this node!", T); 2031 return 0; 2032 } 2033 AnchorInfo = getNext(); // Consume TK_Anchor. 2034 T = peekNext(); 2035 goto parse_property; 2036 case Token::TK_Tag: 2037 getNext(); // Skip TK_Tag. 2038 T = peekNext(); 2039 goto parse_property; 2040 default: 2041 break; 2042 } 2043 2044 switch (T.Kind) { 2045 case Token::TK_BlockEntry: 2046 // We got an unindented BlockEntry sequence. This is not terminated with 2047 // a BlockEnd. 2048 // Don't eat the TK_BlockEntry, SequenceNode needs it. 2049 return new (NodeAllocator) SequenceNode( stream.CurrentDoc 2050 , AnchorInfo.Range.substr(1) 2051 , SequenceNode::ST_Indentless); 2052 case Token::TK_BlockSequenceStart: 2053 getNext(); 2054 return new (NodeAllocator) 2055 SequenceNode( stream.CurrentDoc 2056 , AnchorInfo.Range.substr(1) 2057 , SequenceNode::ST_Block); 2058 case Token::TK_BlockMappingStart: 2059 getNext(); 2060 return new (NodeAllocator) 2061 MappingNode( stream.CurrentDoc 2062 , AnchorInfo.Range.substr(1) 2063 , MappingNode::MT_Block); 2064 case Token::TK_FlowSequenceStart: 2065 getNext(); 2066 return new (NodeAllocator) 2067 SequenceNode( stream.CurrentDoc 2068 , AnchorInfo.Range.substr(1) 2069 , SequenceNode::ST_Flow); 2070 case Token::TK_FlowMappingStart: 2071 getNext(); 2072 return new (NodeAllocator) 2073 MappingNode( stream.CurrentDoc 2074 , AnchorInfo.Range.substr(1) 2075 , MappingNode::MT_Flow); 2076 case Token::TK_Scalar: 2077 getNext(); 2078 return new (NodeAllocator) 2079 ScalarNode( stream.CurrentDoc 2080 , AnchorInfo.Range.substr(1) 2081 , T.Range); 2082 case Token::TK_Key: 2083 // Don't eat the TK_Key, KeyValueNode expects it. 2084 return new (NodeAllocator) 2085 MappingNode( stream.CurrentDoc 2086 , AnchorInfo.Range.substr(1) 2087 , MappingNode::MT_Inline); 2088 case Token::TK_DocumentStart: 2089 case Token::TK_DocumentEnd: 2090 case Token::TK_StreamEnd: 2091 default: 2092 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not 2093 // !!null null. 2094 return new (NodeAllocator) NullNode(stream.CurrentDoc); 2095 case Token::TK_Error: 2096 return 0; 2097 } 2098 llvm_unreachable("Control flow shouldn't reach here."); 2099 return 0; 2100 } 2101 2102 bool Document::parseDirectives() { 2103 bool isDirective = false; 2104 while (true) { 2105 Token T = peekNext(); 2106 if (T.Kind == Token::TK_TagDirective) { 2107 handleTagDirective(getNext()); 2108 isDirective = true; 2109 } else if (T.Kind == Token::TK_VersionDirective) { 2110 stream.handleYAMLDirective(getNext()); 2111 isDirective = true; 2112 } else 2113 break; 2114 } 2115 return isDirective; 2116 } 2117 2118 bool Document::expectToken(int TK) { 2119 Token T = getNext(); 2120 if (T.Kind != TK) { 2121 setError("Unexpected token", T); 2122 return false; 2123 } 2124 return true; 2125 } 2126