1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/AST/CommentLexer.h" 10 #include "clang/AST/CommentCommandTraits.h" 11 #include "clang/Basic/CommentOptions.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticOptions.h" 14 #include "clang/Basic/FileManager.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "gtest/gtest.h" 18 #include <vector> 19 20 using namespace llvm; 21 using namespace clang; 22 23 namespace clang { 24 namespace comments { 25 26 namespace { 27 class CommentLexerTest : public ::testing::Test { 28 protected: 29 CommentLexerTest() 30 : FileMgr(FileMgrOpts), 31 DiagID(new DiagnosticIDs()), 32 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 33 SourceMgr(Diags, FileMgr), 34 Traits(Allocator, CommentOptions()) { 35 } 36 37 FileSystemOptions FileMgrOpts; 38 FileManager FileMgr; 39 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 40 DiagnosticsEngine Diags; 41 SourceManager SourceMgr; 42 llvm::BumpPtrAllocator Allocator; 43 CommandTraits Traits; 44 45 void lexString(const char *Source, std::vector<Token> &Toks); 46 47 StringRef getCommandName(const Token &Tok) { 48 return Traits.getCommandInfo(Tok.getCommandID())->Name; 49 } 50 51 StringRef getVerbatimBlockName(const Token &Tok) { 52 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name; 53 } 54 55 StringRef getVerbatimLineName(const Token &Tok) { 56 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name; 57 } 58 }; 59 60 void CommentLexerTest::lexString(const char *Source, 61 std::vector<Token> &Toks) { 62 std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Source); 63 FileID File = SourceMgr.createFileID(std::move(Buf)); 64 SourceLocation Begin = SourceMgr.getLocForStartOfFile(File); 65 66 Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source)); 67 68 while (1) { 69 Token Tok; 70 L.lex(Tok); 71 if (Tok.is(tok::eof)) 72 break; 73 Toks.push_back(Tok); 74 } 75 } 76 77 } // unnamed namespace 78 79 // Empty source range should be handled. 80 TEST_F(CommentLexerTest, Basic1) { 81 const char *Source = ""; 82 std::vector<Token> Toks; 83 84 lexString(Source, Toks); 85 86 ASSERT_EQ(0U, Toks.size()); 87 } 88 89 // Empty comments should be handled. 90 TEST_F(CommentLexerTest, Basic2) { 91 const char *Sources[] = { 92 "//", "///", "//!", "///<", "//!<" 93 }; 94 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 95 std::vector<Token> Toks; 96 97 lexString(Sources[i], Toks); 98 99 ASSERT_EQ(1U, Toks.size()); 100 101 ASSERT_EQ(tok::newline, Toks[0].getKind()); 102 } 103 } 104 105 // Empty comments should be handled. 106 TEST_F(CommentLexerTest, Basic3) { 107 const char *Sources[] = { 108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/" 109 }; 110 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 111 std::vector<Token> Toks; 112 113 lexString(Sources[i], Toks); 114 115 ASSERT_EQ(2U, Toks.size()); 116 117 ASSERT_EQ(tok::newline, Toks[0].getKind()); 118 ASSERT_EQ(tok::newline, Toks[1].getKind()); 119 } 120 } 121 122 // Single comment with plain text. 123 TEST_F(CommentLexerTest, Basic4) { 124 const char *Sources[] = { 125 "// Meow", "/// Meow", "//! Meow", 126 "// Meow\n", "// Meow\r\n", "//! Meow\r", 127 }; 128 129 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 130 std::vector<Token> Toks; 131 132 lexString(Sources[i], Toks); 133 134 ASSERT_EQ(2U, Toks.size()); 135 136 ASSERT_EQ(tok::text, Toks[0].getKind()); 137 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 138 139 ASSERT_EQ(tok::newline, Toks[1].getKind()); 140 } 141 } 142 143 // Single comment with plain text. 144 TEST_F(CommentLexerTest, Basic5) { 145 const char *Sources[] = { 146 "/* Meow*/", "/** Meow*/", "/*! Meow*/" 147 }; 148 149 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 150 std::vector<Token> Toks; 151 152 lexString(Sources[i], Toks); 153 154 ASSERT_EQ(3U, Toks.size()); 155 156 ASSERT_EQ(tok::text, Toks[0].getKind()); 157 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 158 159 ASSERT_EQ(tok::newline, Toks[1].getKind()); 160 ASSERT_EQ(tok::newline, Toks[2].getKind()); 161 } 162 } 163 164 // Test newline escaping. 165 TEST_F(CommentLexerTest, Basic6) { 166 const char *Sources[] = { 167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n", 168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n", 169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" 170 }; 171 172 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 173 std::vector<Token> Toks; 174 175 lexString(Sources[i], Toks); 176 177 ASSERT_EQ(10U, Toks.size()); 178 179 ASSERT_EQ(tok::text, Toks[0].getKind()); 180 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 181 ASSERT_EQ(tok::text, Toks[1].getKind()); 182 ASSERT_EQ(StringRef("\\"), Toks[1].getText()); 183 ASSERT_EQ(tok::newline, Toks[2].getKind()); 184 185 ASSERT_EQ(tok::text, Toks[3].getKind()); 186 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText()); 187 ASSERT_EQ(tok::text, Toks[4].getKind()); 188 ASSERT_EQ(StringRef("\\"), Toks[4].getText()); 189 ASSERT_EQ(tok::text, Toks[5].getKind()); 190 ASSERT_EQ(StringRef(" "), Toks[5].getText()); 191 ASSERT_EQ(tok::newline, Toks[6].getKind()); 192 193 ASSERT_EQ(tok::text, Toks[7].getKind()); 194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText()); 195 ASSERT_EQ(tok::newline, Toks[8].getKind()); 196 197 ASSERT_EQ(tok::newline, Toks[9].getKind()); 198 } 199 } 200 201 // Check that we skip C-style aligned stars correctly. 202 TEST_F(CommentLexerTest, Basic7) { 203 const char *Source = 204 "/* Aaa\n" 205 " * Bbb\r\n" 206 "\t* Ccc\n" 207 " ! Ddd\n" 208 " * Eee\n" 209 " ** Fff\n" 210 " */"; 211 std::vector<Token> Toks; 212 213 lexString(Source, Toks); 214 215 ASSERT_EQ(15U, Toks.size()); 216 217 ASSERT_EQ(tok::text, Toks[0].getKind()); 218 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 219 ASSERT_EQ(tok::newline, Toks[1].getKind()); 220 221 ASSERT_EQ(tok::text, Toks[2].getKind()); 222 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 223 ASSERT_EQ(tok::newline, Toks[3].getKind()); 224 225 ASSERT_EQ(tok::text, Toks[4].getKind()); 226 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 227 ASSERT_EQ(tok::newline, Toks[5].getKind()); 228 229 ASSERT_EQ(tok::text, Toks[6].getKind()); 230 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText()); 231 ASSERT_EQ(tok::newline, Toks[7].getKind()); 232 233 ASSERT_EQ(tok::text, Toks[8].getKind()); 234 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText()); 235 ASSERT_EQ(tok::newline, Toks[9].getKind()); 236 237 ASSERT_EQ(tok::text, Toks[10].getKind()); 238 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText()); 239 ASSERT_EQ(tok::newline, Toks[11].getKind()); 240 241 ASSERT_EQ(tok::text, Toks[12].getKind()); 242 ASSERT_EQ(StringRef(" "), Toks[12].getText()); 243 244 ASSERT_EQ(tok::newline, Toks[13].getKind()); 245 ASSERT_EQ(tok::newline, Toks[14].getKind()); 246 } 247 248 // A command marker followed by comment end. 249 TEST_F(CommentLexerTest, DoxygenCommand1) { 250 const char *Sources[] = { "//@", "///@", "//!@" }; 251 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 252 std::vector<Token> Toks; 253 254 lexString(Sources[i], Toks); 255 256 ASSERT_EQ(2U, Toks.size()); 257 258 ASSERT_EQ(tok::text, Toks[0].getKind()); 259 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 260 261 ASSERT_EQ(tok::newline, Toks[1].getKind()); 262 } 263 } 264 265 // A command marker followed by comment end. 266 TEST_F(CommentLexerTest, DoxygenCommand2) { 267 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"}; 268 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 269 std::vector<Token> Toks; 270 271 lexString(Sources[i], Toks); 272 273 ASSERT_EQ(3U, Toks.size()); 274 275 ASSERT_EQ(tok::text, Toks[0].getKind()); 276 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 277 278 ASSERT_EQ(tok::newline, Toks[1].getKind()); 279 ASSERT_EQ(tok::newline, Toks[2].getKind()); 280 } 281 } 282 283 // A command marker followed by comment end. 284 TEST_F(CommentLexerTest, DoxygenCommand3) { 285 const char *Sources[] = { "/*\\*/", "/**\\*/" }; 286 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 287 std::vector<Token> Toks; 288 289 lexString(Sources[i], Toks); 290 291 ASSERT_EQ(3U, Toks.size()); 292 293 ASSERT_EQ(tok::text, Toks[0].getKind()); 294 ASSERT_EQ(StringRef("\\"), Toks[0].getText()); 295 296 ASSERT_EQ(tok::newline, Toks[1].getKind()); 297 ASSERT_EQ(tok::newline, Toks[2].getKind()); 298 } 299 } 300 301 // Doxygen escape sequences. 302 TEST_F(CommentLexerTest, DoxygenCommand4) { 303 const char *Sources[] = { 304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::", 305 "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::" 306 }; 307 const char *Text[] = { 308 " ", 309 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ", 310 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ", 311 "::", "" 312 }; 313 314 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 315 std::vector<Token> Toks; 316 317 lexString(Sources[i], Toks); 318 319 ASSERT_EQ(array_lengthof(Text), Toks.size()); 320 321 for (size_t j = 0, e = Toks.size(); j != e; j++) { 322 if(Toks[j].is(tok::text)) { 323 ASSERT_EQ(StringRef(Text[j]), Toks[j].getText()) 324 << "index " << i; 325 } 326 } 327 } 328 } 329 330 // A command marker followed by a non-letter that is not a part of an escape 331 // sequence. 332 TEST_F(CommentLexerTest, DoxygenCommand5) { 333 const char *Source = "/// \\^ \\0"; 334 std::vector<Token> Toks; 335 336 lexString(Source, Toks); 337 338 ASSERT_EQ(6U, Toks.size()); 339 340 ASSERT_EQ(tok::text, Toks[0].getKind()); 341 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 342 343 ASSERT_EQ(tok::text, Toks[1].getKind()); 344 ASSERT_EQ(StringRef("\\"), Toks[1].getText()); 345 346 ASSERT_EQ(tok::text, Toks[2].getKind()); 347 ASSERT_EQ(StringRef("^ "), Toks[2].getText()); 348 349 ASSERT_EQ(tok::text, Toks[3].getKind()); 350 ASSERT_EQ(StringRef("\\"), Toks[3].getText()); 351 352 ASSERT_EQ(tok::text, Toks[4].getKind()); 353 ASSERT_EQ(StringRef("0"), Toks[4].getText()); 354 355 ASSERT_EQ(tok::newline, Toks[5].getKind()); 356 } 357 358 TEST_F(CommentLexerTest, DoxygenCommand6) { 359 const char *Source = "/// \\brief Aaa."; 360 std::vector<Token> Toks; 361 362 lexString(Source, Toks); 363 364 ASSERT_EQ(4U, Toks.size()); 365 366 ASSERT_EQ(tok::text, Toks[0].getKind()); 367 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 368 369 ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); 370 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1])); 371 372 ASSERT_EQ(tok::text, Toks[2].getKind()); 373 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); 374 375 ASSERT_EQ(tok::newline, Toks[3].getKind()); 376 } 377 378 TEST_F(CommentLexerTest, DoxygenCommand7) { 379 const char *Source = "/// \\em\\em \\em\t\\em\n"; 380 std::vector<Token> Toks; 381 382 lexString(Source, Toks); 383 384 ASSERT_EQ(8U, Toks.size()); 385 386 ASSERT_EQ(tok::text, Toks[0].getKind()); 387 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 388 389 ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); 390 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1])); 391 392 ASSERT_EQ(tok::backslash_command, Toks[2].getKind()); 393 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2])); 394 395 ASSERT_EQ(tok::text, Toks[3].getKind()); 396 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 397 398 ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); 399 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4])); 400 401 ASSERT_EQ(tok::text, Toks[5].getKind()); 402 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 403 404 ASSERT_EQ(tok::backslash_command, Toks[6].getKind()); 405 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6])); 406 407 ASSERT_EQ(tok::newline, Toks[7].getKind()); 408 } 409 410 TEST_F(CommentLexerTest, DoxygenCommand8) { 411 const char *Source = "/// @em@em @em\t@em\n"; 412 std::vector<Token> Toks; 413 414 lexString(Source, Toks); 415 416 ASSERT_EQ(8U, Toks.size()); 417 418 ASSERT_EQ(tok::text, Toks[0].getKind()); 419 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 420 421 ASSERT_EQ(tok::at_command, Toks[1].getKind()); 422 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1])); 423 424 ASSERT_EQ(tok::at_command, Toks[2].getKind()); 425 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2])); 426 427 ASSERT_EQ(tok::text, Toks[3].getKind()); 428 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 429 430 ASSERT_EQ(tok::at_command, Toks[4].getKind()); 431 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4])); 432 433 ASSERT_EQ(tok::text, Toks[5].getKind()); 434 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 435 436 ASSERT_EQ(tok::at_command, Toks[6].getKind()); 437 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6])); 438 439 ASSERT_EQ(tok::newline, Toks[7].getKind()); 440 } 441 442 TEST_F(CommentLexerTest, DoxygenCommand9) { 443 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n"; 444 std::vector<Token> Toks; 445 446 lexString(Source, Toks); 447 448 ASSERT_EQ(8U, Toks.size()); 449 450 ASSERT_EQ(tok::text, Toks[0].getKind()); 451 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 452 453 ASSERT_EQ(tok::unknown_command, Toks[1].getKind()); 454 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName()); 455 456 ASSERT_EQ(tok::unknown_command, Toks[2].getKind()); 457 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName()); 458 459 ASSERT_EQ(tok::text, Toks[3].getKind()); 460 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 461 462 ASSERT_EQ(tok::unknown_command, Toks[4].getKind()); 463 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName()); 464 465 ASSERT_EQ(tok::text, Toks[5].getKind()); 466 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 467 468 ASSERT_EQ(tok::unknown_command, Toks[6].getKind()); 469 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName()); 470 471 ASSERT_EQ(tok::newline, Toks[7].getKind()); 472 } 473 474 TEST_F(CommentLexerTest, DoxygenCommand10) { 475 const char *Source = "// \\c\n"; 476 std::vector<Token> Toks; 477 478 lexString(Source, Toks); 479 480 ASSERT_EQ(3U, Toks.size()); 481 482 ASSERT_EQ(tok::text, Toks[0].getKind()); 483 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 484 485 ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); 486 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1])); 487 488 ASSERT_EQ(tok::newline, Toks[2].getKind()); 489 } 490 491 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { 492 const char *Source = 493 "/// \\NewBlockCommand Aaa.\n" 494 "/// @NewBlockCommand Aaa.\n"; 495 496 Traits.registerBlockCommand(StringRef("NewBlockCommand")); 497 498 std::vector<Token> Toks; 499 500 lexString(Source, Toks); 501 502 ASSERT_EQ(8U, Toks.size()); 503 504 ASSERT_EQ(tok::text, Toks[0].getKind()); 505 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 506 507 ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); 508 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1])); 509 510 ASSERT_EQ(tok::text, Toks[2].getKind()); 511 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); 512 513 ASSERT_EQ(tok::newline, Toks[3].getKind()); 514 515 ASSERT_EQ(tok::text, Toks[4].getKind()); 516 ASSERT_EQ(StringRef(" "), Toks[4].getText()); 517 518 ASSERT_EQ(tok::at_command, Toks[5].getKind()); 519 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5])); 520 521 ASSERT_EQ(tok::text, Toks[6].getKind()); 522 ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText()); 523 524 ASSERT_EQ(tok::newline, Toks[7].getKind()); 525 } 526 527 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) { 528 const char *Source = 529 "/// \\Foo\n" 530 "/// \\Bar Baz\n" 531 "/// \\Blech quux=corge\n"; 532 533 Traits.registerBlockCommand(StringRef("Foo")); 534 Traits.registerBlockCommand(StringRef("Bar")); 535 Traits.registerBlockCommand(StringRef("Blech")); 536 537 std::vector<Token> Toks; 538 539 lexString(Source, Toks); 540 541 ASSERT_EQ(11U, Toks.size()); 542 543 ASSERT_EQ(tok::text, Toks[0].getKind()); 544 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 545 546 ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); 547 ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1])); 548 549 ASSERT_EQ(tok::newline, Toks[2].getKind()); 550 551 ASSERT_EQ(tok::text, Toks[3].getKind()); 552 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 553 554 ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); 555 ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4])); 556 557 ASSERT_EQ(tok::text, Toks[5].getKind()); 558 ASSERT_EQ(StringRef(" Baz"), Toks[5].getText()); 559 560 ASSERT_EQ(tok::newline, Toks[6].getKind()); 561 562 ASSERT_EQ(tok::text, Toks[7].getKind()); 563 ASSERT_EQ(StringRef(" "), Toks[7].getText()); 564 565 ASSERT_EQ(tok::backslash_command, Toks[8].getKind()); 566 ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8])); 567 568 ASSERT_EQ(tok::text, Toks[9].getKind()); 569 ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText()); 570 571 ASSERT_EQ(tok::newline, Toks[10].getKind()); 572 } 573 574 // Empty verbatim block. 575 TEST_F(CommentLexerTest, VerbatimBlock1) { 576 const char *Sources[] = { 577 "/// \\verbatim\\endverbatim\n//", 578 "/** \\verbatim\\endverbatim*/" 579 }; 580 581 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 582 std::vector<Token> Toks; 583 584 lexString(Sources[i], Toks); 585 586 ASSERT_EQ(5U, Toks.size()); 587 588 ASSERT_EQ(tok::text, Toks[0].getKind()); 589 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 590 591 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 592 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 593 594 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); 595 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2])); 596 597 ASSERT_EQ(tok::newline, Toks[3].getKind()); 598 ASSERT_EQ(tok::newline, Toks[4].getKind()); 599 } 600 } 601 602 // Empty verbatim block without an end command. 603 TEST_F(CommentLexerTest, VerbatimBlock2) { 604 const char *Source = "/// \\verbatim"; 605 606 std::vector<Token> Toks; 607 608 lexString(Source, Toks); 609 610 ASSERT_EQ(3U, Toks.size()); 611 612 ASSERT_EQ(tok::text, Toks[0].getKind()); 613 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 614 615 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 616 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 617 618 ASSERT_EQ(tok::newline, Toks[2].getKind()); 619 } 620 621 // Empty verbatim block without an end command. 622 TEST_F(CommentLexerTest, VerbatimBlock3) { 623 const char *Source = "/** \\verbatim*/"; 624 625 std::vector<Token> Toks; 626 627 lexString(Source, Toks); 628 629 ASSERT_EQ(4U, Toks.size()); 630 631 ASSERT_EQ(tok::text, Toks[0].getKind()); 632 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 633 634 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 635 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 636 637 ASSERT_EQ(tok::newline, Toks[2].getKind()); 638 ASSERT_EQ(tok::newline, Toks[3].getKind()); 639 } 640 641 // Single-line verbatim block. 642 TEST_F(CommentLexerTest, VerbatimBlock4) { 643 const char *Sources[] = { 644 "/// Meow \\verbatim aaa \\endverbatim\n//", 645 "/** Meow \\verbatim aaa \\endverbatim*/" 646 }; 647 648 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 649 std::vector<Token> Toks; 650 651 lexString(Sources[i], Toks); 652 653 ASSERT_EQ(6U, Toks.size()); 654 655 ASSERT_EQ(tok::text, Toks[0].getKind()); 656 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 657 658 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 659 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 660 661 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 662 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 663 664 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 665 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3])); 666 667 ASSERT_EQ(tok::newline, Toks[4].getKind()); 668 ASSERT_EQ(tok::newline, Toks[5].getKind()); 669 } 670 } 671 672 // Single-line verbatim block without an end command. 673 TEST_F(CommentLexerTest, VerbatimBlock5) { 674 const char *Sources[] = { 675 "/// Meow \\verbatim aaa \n//", 676 "/** Meow \\verbatim aaa */" 677 }; 678 679 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 680 std::vector<Token> Toks; 681 682 lexString(Sources[i], Toks); 683 684 ASSERT_EQ(5U, Toks.size()); 685 686 ASSERT_EQ(tok::text, Toks[0].getKind()); 687 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 688 689 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 690 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 691 692 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 693 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 694 695 ASSERT_EQ(tok::newline, Toks[3].getKind()); 696 ASSERT_EQ(tok::newline, Toks[4].getKind()); 697 } 698 } 699 700 TEST_F(CommentLexerTest, VerbatimBlock6) { 701 const char *Source = 702 "// \\verbatim\n" 703 "// Aaa\n" 704 "//\n" 705 "// Bbb\n" 706 "// \\endverbatim\n"; 707 708 std::vector<Token> Toks; 709 710 lexString(Source, Toks); 711 712 ASSERT_EQ(10U, Toks.size()); 713 714 ASSERT_EQ(tok::text, Toks[0].getKind()); 715 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 716 717 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 718 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 719 720 ASSERT_EQ(tok::newline, Toks[2].getKind()); 721 722 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 723 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText()); 724 725 ASSERT_EQ(tok::newline, Toks[4].getKind()); 726 727 ASSERT_EQ(tok::newline, Toks[5].getKind()); 728 729 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 730 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText()); 731 732 ASSERT_EQ(tok::newline, Toks[7].getKind()); 733 734 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind()); 735 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8])); 736 737 ASSERT_EQ(tok::newline, Toks[9].getKind()); 738 } 739 740 TEST_F(CommentLexerTest, VerbatimBlock7) { 741 const char *Source = 742 "/* \\verbatim\n" 743 " * Aaa\n" 744 " *\n" 745 " * Bbb\n" 746 " * \\endverbatim\n" 747 " */"; 748 749 std::vector<Token> Toks; 750 751 lexString(Source, Toks); 752 753 ASSERT_EQ(10U, Toks.size()); 754 755 ASSERT_EQ(tok::text, Toks[0].getKind()); 756 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 757 758 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 759 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 760 761 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 762 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText()); 763 764 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 765 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText()); 766 767 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 768 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText()); 769 770 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind()); 771 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5])); 772 773 ASSERT_EQ(tok::newline, Toks[6].getKind()); 774 775 ASSERT_EQ(tok::text, Toks[7].getKind()); 776 ASSERT_EQ(StringRef(" "), Toks[7].getText()); 777 778 ASSERT_EQ(tok::newline, Toks[8].getKind()); 779 ASSERT_EQ(tok::newline, Toks[9].getKind()); 780 } 781 782 // Complex test for verbatim blocks. 783 TEST_F(CommentLexerTest, VerbatimBlock8) { 784 const char *Source = 785 "/* Meow \\verbatim aaa\\$\\@\n" 786 "bbb \\endverbati\r" 787 "ccc\r\n" 788 "ddd \\endverbatim Blah \\verbatim eee\n" 789 "\\endverbatim BlahBlah*/"; 790 std::vector<Token> Toks; 791 792 lexString(Source, Toks); 793 794 ASSERT_EQ(14U, Toks.size()); 795 796 ASSERT_EQ(tok::text, Toks[0].getKind()); 797 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 798 799 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 800 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 801 802 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 803 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText()); 804 805 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 806 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText()); 807 808 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 809 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText()); 810 811 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); 812 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText()); 813 814 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); 815 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6])); 816 817 ASSERT_EQ(tok::text, Toks[7].getKind()); 818 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText()); 819 820 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); 821 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8])); 822 823 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); 824 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText()); 825 826 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); 827 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10])); 828 829 ASSERT_EQ(tok::text, Toks[11].getKind()); 830 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText()); 831 832 ASSERT_EQ(tok::newline, Toks[12].getKind()); 833 ASSERT_EQ(tok::newline, Toks[13].getKind()); 834 } 835 836 // LaTeX verbatim blocks. 837 TEST_F(CommentLexerTest, VerbatimBlock9) { 838 const char *Source = 839 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)"; 840 std::vector<Token> Toks; 841 842 lexString(Source, Toks); 843 844 ASSERT_EQ(17U, Toks.size()); 845 846 ASSERT_EQ(tok::text, Toks[0].getKind()); 847 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 848 849 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 850 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1])); 851 852 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 853 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText()); 854 855 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 856 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3])); 857 858 ASSERT_EQ(tok::text, Toks[4].getKind()); 859 ASSERT_EQ(StringRef(" "), Toks[4].getText()); 860 861 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); 862 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5])); 863 864 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 865 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText()); 866 867 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); 868 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7])); 869 870 ASSERT_EQ(tok::text, Toks[8].getKind()); 871 ASSERT_EQ(StringRef(" "), Toks[8].getText()); 872 873 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); 874 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9])); 875 876 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); 877 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText()); 878 879 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); 880 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11])); 881 882 ASSERT_EQ(tok::text, Toks[12].getKind()); 883 ASSERT_EQ(StringRef(" "), Toks[12].getText()); 884 885 ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind()); 886 ASSERT_EQ(StringRef("f("), getVerbatimBlockName(Toks[13])); 887 888 ASSERT_EQ(tok::verbatim_block_line, Toks[14].getKind()); 889 ASSERT_EQ(StringRef(" Ddd "), Toks[14].getVerbatimBlockText()); 890 891 ASSERT_EQ(tok::verbatim_block_end, Toks[15].getKind()); 892 ASSERT_EQ(StringRef("f)"), getVerbatimBlockName(Toks[15])); 893 894 ASSERT_EQ(tok::newline, Toks[16].getKind()); 895 } 896 897 // Empty verbatim line. 898 TEST_F(CommentLexerTest, VerbatimLine1) { 899 const char *Sources[] = { 900 "/// \\fn\n//", 901 "/** \\fn*/" 902 }; 903 904 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 905 std::vector<Token> Toks; 906 907 lexString(Sources[i], Toks); 908 909 ASSERT_EQ(4U, Toks.size()); 910 911 ASSERT_EQ(tok::text, Toks[0].getKind()); 912 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 913 914 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 915 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 916 917 ASSERT_EQ(tok::newline, Toks[2].getKind()); 918 ASSERT_EQ(tok::newline, Toks[3].getKind()); 919 } 920 } 921 922 // Verbatim line with Doxygen escape sequences, which should not be expanded. 923 TEST_F(CommentLexerTest, VerbatimLine2) { 924 const char *Sources[] = { 925 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//", 926 "/** \\fn void *foo(const char *zzz = \"\\$\");*/" 927 }; 928 929 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 930 std::vector<Token> Toks; 931 932 lexString(Sources[i], Toks); 933 934 ASSERT_EQ(5U, Toks.size()); 935 936 ASSERT_EQ(tok::text, Toks[0].getKind()); 937 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 938 939 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 940 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 941 942 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 943 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 944 Toks[2].getVerbatimLineText()); 945 946 ASSERT_EQ(tok::newline, Toks[3].getKind()); 947 ASSERT_EQ(tok::newline, Toks[4].getKind()); 948 } 949 } 950 951 // Verbatim line should not eat anything from next source line. 952 TEST_F(CommentLexerTest, VerbatimLine3) { 953 const char *Source = 954 "/** \\fn void *foo(const char *zzz = \"\\$\");\n" 955 " * Meow\n" 956 " */"; 957 958 std::vector<Token> Toks; 959 960 lexString(Source, Toks); 961 962 ASSERT_EQ(9U, Toks.size()); 963 964 ASSERT_EQ(tok::text, Toks[0].getKind()); 965 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 966 967 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 968 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 969 970 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 971 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 972 Toks[2].getVerbatimLineText()); 973 ASSERT_EQ(tok::newline, Toks[3].getKind()); 974 975 ASSERT_EQ(tok::text, Toks[4].getKind()); 976 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText()); 977 ASSERT_EQ(tok::newline, Toks[5].getKind()); 978 979 ASSERT_EQ(tok::text, Toks[6].getKind()); 980 ASSERT_EQ(StringRef(" "), Toks[6].getText()); 981 982 ASSERT_EQ(tok::newline, Toks[7].getKind()); 983 ASSERT_EQ(tok::newline, Toks[8].getKind()); 984 } 985 986 TEST_F(CommentLexerTest, HTML1) { 987 const char *Source = 988 "// <"; 989 990 std::vector<Token> Toks; 991 992 lexString(Source, Toks); 993 994 ASSERT_EQ(3U, Toks.size()); 995 996 ASSERT_EQ(tok::text, Toks[0].getKind()); 997 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 998 999 ASSERT_EQ(tok::text, Toks[1].getKind()); 1000 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 1001 1002 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1003 } 1004 1005 TEST_F(CommentLexerTest, HTML2) { 1006 const char *Source = 1007 "// a<2"; 1008 1009 std::vector<Token> Toks; 1010 1011 lexString(Source, Toks); 1012 1013 ASSERT_EQ(4U, Toks.size()); 1014 1015 ASSERT_EQ(tok::text, Toks[0].getKind()); 1016 ASSERT_EQ(StringRef(" a"), Toks[0].getText()); 1017 1018 ASSERT_EQ(tok::text, Toks[1].getKind()); 1019 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 1020 1021 ASSERT_EQ(tok::text, Toks[2].getKind()); 1022 ASSERT_EQ(StringRef("2"), Toks[2].getText()); 1023 1024 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1025 } 1026 1027 TEST_F(CommentLexerTest, HTML3) { 1028 const char *Source = 1029 "// < img"; 1030 1031 std::vector<Token> Toks; 1032 1033 lexString(Source, Toks); 1034 1035 ASSERT_EQ(4U, Toks.size()); 1036 1037 ASSERT_EQ(tok::text, Toks[0].getKind()); 1038 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1039 1040 ASSERT_EQ(tok::text, Toks[1].getKind()); 1041 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 1042 1043 ASSERT_EQ(tok::text, Toks[2].getKind()); 1044 ASSERT_EQ(StringRef(" img"), Toks[2].getText()); 1045 1046 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1047 } 1048 1049 TEST_F(CommentLexerTest, HTML4) { 1050 const char *Sources[] = { 1051 "// <img", 1052 "// <img " 1053 }; 1054 1055 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1056 std::vector<Token> Toks; 1057 1058 lexString(Sources[i], Toks); 1059 1060 ASSERT_EQ(3U, Toks.size()); 1061 1062 ASSERT_EQ(tok::text, Toks[0].getKind()); 1063 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1064 1065 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1066 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1067 1068 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1069 } 1070 } 1071 1072 TEST_F(CommentLexerTest, HTML5) { 1073 const char *Source = 1074 "// <img 42"; 1075 1076 std::vector<Token> Toks; 1077 1078 lexString(Source, Toks); 1079 1080 ASSERT_EQ(4U, Toks.size()); 1081 1082 ASSERT_EQ(tok::text, Toks[0].getKind()); 1083 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1084 1085 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1086 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1087 1088 ASSERT_EQ(tok::text, Toks[2].getKind()); 1089 ASSERT_EQ(StringRef("42"), Toks[2].getText()); 1090 1091 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1092 } 1093 1094 TEST_F(CommentLexerTest, HTML6) { 1095 const char *Source = "// <img> Meow"; 1096 1097 std::vector<Token> Toks; 1098 1099 lexString(Source, Toks); 1100 1101 ASSERT_EQ(5U, Toks.size()); 1102 1103 ASSERT_EQ(tok::text, Toks[0].getKind()); 1104 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1105 1106 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1107 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1108 1109 ASSERT_EQ(tok::html_greater, Toks[2].getKind()); 1110 1111 ASSERT_EQ(tok::text, Toks[3].getKind()); 1112 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText()); 1113 1114 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1115 } 1116 1117 TEST_F(CommentLexerTest, HTML7) { 1118 const char *Source = "// <img="; 1119 1120 std::vector<Token> Toks; 1121 1122 lexString(Source, Toks); 1123 1124 ASSERT_EQ(4U, Toks.size()); 1125 1126 ASSERT_EQ(tok::text, Toks[0].getKind()); 1127 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1128 1129 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1130 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1131 1132 ASSERT_EQ(tok::text, Toks[2].getKind()); 1133 ASSERT_EQ(StringRef("="), Toks[2].getText()); 1134 1135 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1136 } 1137 1138 TEST_F(CommentLexerTest, HTML8) { 1139 const char *Source = "// <img src=> Meow"; 1140 1141 std::vector<Token> Toks; 1142 1143 lexString(Source, Toks); 1144 1145 ASSERT_EQ(7U, Toks.size()); 1146 1147 ASSERT_EQ(tok::text, Toks[0].getKind()); 1148 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1149 1150 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1151 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1152 1153 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1154 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1155 1156 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1157 1158 ASSERT_EQ(tok::html_greater, Toks[4].getKind()); 1159 1160 ASSERT_EQ(tok::text, Toks[5].getKind()); 1161 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText()); 1162 1163 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1164 } 1165 1166 TEST_F(CommentLexerTest, HTML9) { 1167 const char *Sources[] = { 1168 "// <img src", 1169 "// <img src " 1170 }; 1171 1172 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1173 std::vector<Token> Toks; 1174 1175 lexString(Sources[i], Toks); 1176 1177 ASSERT_EQ(4U, Toks.size()); 1178 1179 ASSERT_EQ(tok::text, Toks[0].getKind()); 1180 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1181 1182 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1183 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1184 1185 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1186 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1187 1188 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1189 } 1190 } 1191 1192 TEST_F(CommentLexerTest, HTML10) { 1193 const char *Sources[] = { 1194 "// <img src=", 1195 "// <img src =" 1196 }; 1197 1198 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1199 std::vector<Token> Toks; 1200 1201 lexString(Sources[i], Toks); 1202 1203 ASSERT_EQ(5U, Toks.size()); 1204 1205 ASSERT_EQ(tok::text, Toks[0].getKind()); 1206 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1207 1208 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1209 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1210 1211 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1212 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1213 1214 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1215 1216 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1217 } 1218 } 1219 1220 TEST_F(CommentLexerTest, HTML11) { 1221 const char *Sources[] = { 1222 "// <img src=\"", 1223 "// <img src = \"", 1224 "// <img src=\'", 1225 "// <img src = \'" 1226 }; 1227 1228 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1229 std::vector<Token> Toks; 1230 1231 lexString(Sources[i], Toks); 1232 1233 ASSERT_EQ(6U, Toks.size()); 1234 1235 ASSERT_EQ(tok::text, Toks[0].getKind()); 1236 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1237 1238 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1239 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1240 1241 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1242 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1243 1244 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1245 1246 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1247 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString()); 1248 1249 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1250 } 1251 } 1252 1253 TEST_F(CommentLexerTest, HTML12) { 1254 const char *Source = "// <img src=@"; 1255 1256 std::vector<Token> Toks; 1257 1258 lexString(Source, Toks); 1259 1260 ASSERT_EQ(6U, Toks.size()); 1261 1262 ASSERT_EQ(tok::text, Toks[0].getKind()); 1263 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1264 1265 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1266 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1267 1268 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1269 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1270 1271 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1272 1273 ASSERT_EQ(tok::text, Toks[4].getKind()); 1274 ASSERT_EQ(StringRef("@"), Toks[4].getText()); 1275 1276 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1277 } 1278 1279 TEST_F(CommentLexerTest, HTML13) { 1280 const char *Sources[] = { 1281 "// <img src=\"val\\\"\\'val", 1282 "// <img src=\"val\\\"\\'val\"", 1283 "// <img src=\'val\\\"\\'val", 1284 "// <img src=\'val\\\"\\'val\'" 1285 }; 1286 1287 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1288 std::vector<Token> Toks; 1289 1290 lexString(Sources[i], Toks); 1291 1292 ASSERT_EQ(6U, Toks.size()); 1293 1294 ASSERT_EQ(tok::text, Toks[0].getKind()); 1295 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1296 1297 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1298 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1299 1300 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1301 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1302 1303 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1304 1305 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1306 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1307 1308 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1309 } 1310 } 1311 1312 TEST_F(CommentLexerTest, HTML14) { 1313 const char *Sources[] = { 1314 "// <img src=\"val\\\"\\'val\">", 1315 "// <img src=\'val\\\"\\'val\'>" 1316 }; 1317 1318 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1319 std::vector<Token> Toks; 1320 1321 lexString(Sources[i], Toks); 1322 1323 ASSERT_EQ(7U, Toks.size()); 1324 1325 ASSERT_EQ(tok::text, Toks[0].getKind()); 1326 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1327 1328 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1329 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1330 1331 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1332 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1333 1334 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1335 1336 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1337 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1338 1339 ASSERT_EQ(tok::html_greater, Toks[5].getKind()); 1340 1341 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1342 } 1343 } 1344 1345 TEST_F(CommentLexerTest, HTML15) { 1346 const char *Sources[] = { 1347 "// <img/>", 1348 "// <img />" 1349 }; 1350 1351 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1352 std::vector<Token> Toks; 1353 1354 lexString(Sources[i], Toks); 1355 1356 ASSERT_EQ(4U, Toks.size()); 1357 1358 ASSERT_EQ(tok::text, Toks[0].getKind()); 1359 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1360 1361 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1362 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1363 1364 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind()); 1365 1366 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1367 } 1368 } 1369 1370 TEST_F(CommentLexerTest, HTML16) { 1371 const char *Sources[] = { 1372 "// <img/ Aaa", 1373 "// <img / Aaa" 1374 }; 1375 1376 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1377 std::vector<Token> Toks; 1378 1379 lexString(Sources[i], Toks); 1380 1381 ASSERT_EQ(5U, Toks.size()); 1382 1383 ASSERT_EQ(tok::text, Toks[0].getKind()); 1384 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1385 1386 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1387 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1388 1389 ASSERT_EQ(tok::text, Toks[2].getKind()); 1390 ASSERT_EQ(StringRef("/"), Toks[2].getText()); 1391 1392 ASSERT_EQ(tok::text, Toks[3].getKind()); 1393 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText()); 1394 1395 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1396 } 1397 } 1398 1399 TEST_F(CommentLexerTest, HTML17) { 1400 const char *Source = "// </"; 1401 1402 std::vector<Token> Toks; 1403 1404 lexString(Source, Toks); 1405 1406 ASSERT_EQ(3U, Toks.size()); 1407 1408 ASSERT_EQ(tok::text, Toks[0].getKind()); 1409 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1410 1411 ASSERT_EQ(tok::text, Toks[1].getKind()); 1412 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1413 1414 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1415 } 1416 1417 TEST_F(CommentLexerTest, HTML18) { 1418 const char *Source = "// </@"; 1419 1420 std::vector<Token> Toks; 1421 1422 lexString(Source, Toks); 1423 1424 ASSERT_EQ(4U, Toks.size()); 1425 1426 ASSERT_EQ(tok::text, Toks[0].getKind()); 1427 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1428 1429 ASSERT_EQ(tok::text, Toks[1].getKind()); 1430 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1431 1432 ASSERT_EQ(tok::text, Toks[2].getKind()); 1433 ASSERT_EQ(StringRef("@"), Toks[2].getText()); 1434 1435 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1436 } 1437 1438 TEST_F(CommentLexerTest, HTML19) { 1439 const char *Source = "// </img"; 1440 1441 std::vector<Token> Toks; 1442 1443 lexString(Source, Toks); 1444 1445 ASSERT_EQ(3U, Toks.size()); 1446 1447 ASSERT_EQ(tok::text, Toks[0].getKind()); 1448 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1449 1450 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind()); 1451 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName()); 1452 1453 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1454 } 1455 1456 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) { 1457 const char *Source = "// <tag>"; 1458 1459 std::vector<Token> Toks; 1460 1461 lexString(Source, Toks); 1462 1463 ASSERT_EQ(4U, Toks.size()); 1464 1465 ASSERT_EQ(tok::text, Toks[0].getKind()); 1466 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1467 1468 ASSERT_EQ(tok::text, Toks[1].getKind()); 1469 ASSERT_EQ(StringRef("<tag"), Toks[1].getText()); 1470 1471 ASSERT_EQ(tok::text, Toks[2].getKind()); 1472 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1473 1474 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1475 } 1476 1477 TEST_F(CommentLexerTest, NotAKnownHTMLTag2) { 1478 const char *Source = "// </tag>"; 1479 1480 std::vector<Token> Toks; 1481 1482 lexString(Source, Toks); 1483 1484 ASSERT_EQ(4U, Toks.size()); 1485 1486 ASSERT_EQ(tok::text, Toks[0].getKind()); 1487 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1488 1489 ASSERT_EQ(tok::text, Toks[1].getKind()); 1490 ASSERT_EQ(StringRef("</tag"), Toks[1].getText()); 1491 1492 ASSERT_EQ(tok::text, Toks[2].getKind()); 1493 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1494 1495 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1496 } 1497 1498 TEST_F(CommentLexerTest, HTMLCharacterReferences1) { 1499 const char *Source = "// &"; 1500 1501 std::vector<Token> Toks; 1502 1503 lexString(Source, Toks); 1504 1505 ASSERT_EQ(3U, Toks.size()); 1506 1507 ASSERT_EQ(tok::text, Toks[0].getKind()); 1508 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1509 1510 ASSERT_EQ(tok::text, Toks[1].getKind()); 1511 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1512 1513 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1514 } 1515 1516 TEST_F(CommentLexerTest, HTMLCharacterReferences2) { 1517 const char *Source = "// &!"; 1518 1519 std::vector<Token> Toks; 1520 1521 lexString(Source, Toks); 1522 1523 ASSERT_EQ(4U, Toks.size()); 1524 1525 ASSERT_EQ(tok::text, Toks[0].getKind()); 1526 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1527 1528 ASSERT_EQ(tok::text, Toks[1].getKind()); 1529 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1530 1531 ASSERT_EQ(tok::text, Toks[2].getKind()); 1532 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1533 1534 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1535 } 1536 1537 TEST_F(CommentLexerTest, HTMLCharacterReferences3) { 1538 const char *Source = "// &"; 1539 1540 std::vector<Token> Toks; 1541 1542 lexString(Source, Toks); 1543 1544 ASSERT_EQ(3U, Toks.size()); 1545 1546 ASSERT_EQ(tok::text, Toks[0].getKind()); 1547 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1548 1549 ASSERT_EQ(tok::text, Toks[1].getKind()); 1550 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1551 1552 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1553 } 1554 1555 TEST_F(CommentLexerTest, HTMLCharacterReferences4) { 1556 const char *Source = "// &!"; 1557 1558 std::vector<Token> Toks; 1559 1560 lexString(Source, Toks); 1561 1562 ASSERT_EQ(4U, Toks.size()); 1563 1564 ASSERT_EQ(tok::text, Toks[0].getKind()); 1565 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1566 1567 ASSERT_EQ(tok::text, Toks[1].getKind()); 1568 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1569 1570 ASSERT_EQ(tok::text, Toks[2].getKind()); 1571 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1572 1573 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1574 } 1575 1576 TEST_F(CommentLexerTest, HTMLCharacterReferences5) { 1577 const char *Source = "// &#"; 1578 1579 std::vector<Token> Toks; 1580 1581 lexString(Source, Toks); 1582 1583 ASSERT_EQ(3U, Toks.size()); 1584 1585 ASSERT_EQ(tok::text, Toks[0].getKind()); 1586 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1587 1588 ASSERT_EQ(tok::text, Toks[1].getKind()); 1589 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1590 1591 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1592 } 1593 1594 TEST_F(CommentLexerTest, HTMLCharacterReferences6) { 1595 const char *Source = "// &#a"; 1596 1597 std::vector<Token> Toks; 1598 1599 lexString(Source, Toks); 1600 1601 ASSERT_EQ(4U, Toks.size()); 1602 1603 ASSERT_EQ(tok::text, Toks[0].getKind()); 1604 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1605 1606 ASSERT_EQ(tok::text, Toks[1].getKind()); 1607 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1608 1609 ASSERT_EQ(tok::text, Toks[2].getKind()); 1610 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1611 1612 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1613 } 1614 1615 TEST_F(CommentLexerTest, HTMLCharacterReferences7) { 1616 const char *Source = "// *"; 1617 1618 std::vector<Token> Toks; 1619 1620 lexString(Source, Toks); 1621 1622 ASSERT_EQ(3U, Toks.size()); 1623 1624 ASSERT_EQ(tok::text, Toks[0].getKind()); 1625 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1626 1627 ASSERT_EQ(tok::text, Toks[1].getKind()); 1628 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1629 1630 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1631 } 1632 1633 TEST_F(CommentLexerTest, HTMLCharacterReferences8) { 1634 const char *Source = "// *a"; 1635 1636 std::vector<Token> Toks; 1637 1638 lexString(Source, Toks); 1639 1640 ASSERT_EQ(4U, Toks.size()); 1641 1642 ASSERT_EQ(tok::text, Toks[0].getKind()); 1643 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1644 1645 ASSERT_EQ(tok::text, Toks[1].getKind()); 1646 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1647 1648 ASSERT_EQ(tok::text, Toks[2].getKind()); 1649 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1650 1651 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1652 } 1653 1654 TEST_F(CommentLexerTest, HTMLCharacterReferences9) { 1655 const char *Source = "// &#x"; 1656 1657 std::vector<Token> Toks; 1658 1659 lexString(Source, Toks); 1660 1661 ASSERT_EQ(3U, Toks.size()); 1662 1663 ASSERT_EQ(tok::text, Toks[0].getKind()); 1664 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1665 1666 ASSERT_EQ(tok::text, Toks[1].getKind()); 1667 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1668 1669 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1670 } 1671 1672 TEST_F(CommentLexerTest, HTMLCharacterReferences10) { 1673 const char *Source = "// &#xz"; 1674 1675 std::vector<Token> Toks; 1676 1677 lexString(Source, Toks); 1678 1679 ASSERT_EQ(4U, Toks.size()); 1680 1681 ASSERT_EQ(tok::text, Toks[0].getKind()); 1682 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1683 1684 ASSERT_EQ(tok::text, Toks[1].getKind()); 1685 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1686 1687 ASSERT_EQ(tok::text, Toks[2].getKind()); 1688 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1689 1690 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1691 } 1692 1693 TEST_F(CommentLexerTest, HTMLCharacterReferences11) { 1694 const char *Source = "// «"; 1695 1696 std::vector<Token> Toks; 1697 1698 lexString(Source, Toks); 1699 1700 ASSERT_EQ(3U, Toks.size()); 1701 1702 ASSERT_EQ(tok::text, Toks[0].getKind()); 1703 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1704 1705 ASSERT_EQ(tok::text, Toks[1].getKind()); 1706 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1707 1708 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1709 } 1710 1711 TEST_F(CommentLexerTest, HTMLCharacterReferences12) { 1712 const char *Source = "// «z"; 1713 1714 std::vector<Token> Toks; 1715 1716 lexString(Source, Toks); 1717 1718 ASSERT_EQ(4U, Toks.size()); 1719 1720 ASSERT_EQ(tok::text, Toks[0].getKind()); 1721 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1722 1723 ASSERT_EQ(tok::text, Toks[1].getKind()); 1724 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1725 1726 ASSERT_EQ(tok::text, Toks[2].getKind()); 1727 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1728 1729 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1730 } 1731 1732 TEST_F(CommentLexerTest, HTMLCharacterReferences13) { 1733 const char *Source = "// &"; 1734 1735 std::vector<Token> Toks; 1736 1737 lexString(Source, Toks); 1738 1739 ASSERT_EQ(3U, Toks.size()); 1740 1741 ASSERT_EQ(tok::text, Toks[0].getKind()); 1742 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1743 1744 ASSERT_EQ(tok::text, Toks[1].getKind()); 1745 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1746 1747 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1748 } 1749 1750 TEST_F(CommentLexerTest, HTMLCharacterReferences14) { 1751 const char *Source = "// &<"; 1752 1753 std::vector<Token> Toks; 1754 1755 lexString(Source, Toks); 1756 1757 ASSERT_EQ(4U, Toks.size()); 1758 1759 ASSERT_EQ(tok::text, Toks[0].getKind()); 1760 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1761 1762 ASSERT_EQ(tok::text, Toks[1].getKind()); 1763 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1764 1765 ASSERT_EQ(tok::text, Toks[2].getKind()); 1766 ASSERT_EQ(StringRef("<"), Toks[2].getText()); 1767 1768 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1769 } 1770 1771 TEST_F(CommentLexerTest, HTMLCharacterReferences15) { 1772 const char *Source = "// & meow"; 1773 1774 std::vector<Token> Toks; 1775 1776 lexString(Source, Toks); 1777 1778 ASSERT_EQ(4U, Toks.size()); 1779 1780 ASSERT_EQ(tok::text, Toks[0].getKind()); 1781 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1782 1783 ASSERT_EQ(tok::text, Toks[1].getKind()); 1784 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1785 1786 ASSERT_EQ(tok::text, Toks[2].getKind()); 1787 ASSERT_EQ(StringRef(" meow"), Toks[2].getText()); 1788 1789 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1790 } 1791 1792 TEST_F(CommentLexerTest, HTMLCharacterReferences16) { 1793 const char *Sources[] = { 1794 "// =", 1795 "// =", 1796 "// =", 1797 "// =" 1798 }; 1799 1800 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1801 std::vector<Token> Toks; 1802 1803 lexString(Sources[i], Toks); 1804 1805 ASSERT_EQ(3U, Toks.size()); 1806 1807 ASSERT_EQ(tok::text, Toks[0].getKind()); 1808 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1809 1810 ASSERT_EQ(tok::text, Toks[1].getKind()); 1811 ASSERT_EQ(StringRef("="), Toks[1].getText()); 1812 1813 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1814 } 1815 } 1816 1817 TEST_F(CommentLexerTest, MultipleComments) { 1818 const char *Source = 1819 "// Aaa\n" 1820 "/// Bbb\n" 1821 "/* Ccc\n" 1822 " * Ddd*/\n" 1823 "/** Eee*/"; 1824 1825 std::vector<Token> Toks; 1826 1827 lexString(Source, Toks); 1828 1829 ASSERT_EQ(12U, Toks.size()); 1830 1831 ASSERT_EQ(tok::text, Toks[0].getKind()); 1832 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 1833 ASSERT_EQ(tok::newline, Toks[1].getKind()); 1834 1835 ASSERT_EQ(tok::text, Toks[2].getKind()); 1836 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 1837 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1838 1839 ASSERT_EQ(tok::text, Toks[4].getKind()); 1840 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 1841 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1842 1843 ASSERT_EQ(tok::text, Toks[6].getKind()); 1844 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText()); 1845 ASSERT_EQ(tok::newline, Toks[7].getKind()); 1846 ASSERT_EQ(tok::newline, Toks[8].getKind()); 1847 1848 ASSERT_EQ(tok::text, Toks[9].getKind()); 1849 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText()); 1850 1851 ASSERT_EQ(tok::newline, Toks[10].getKind()); 1852 ASSERT_EQ(tok::newline, Toks[11].getKind()); 1853 } 1854 1855 } // end namespace comments 1856 } // end namespace clang 1857 1858