1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15 /// where it can be used to format real code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "UnwrappedLineParser.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 namespace clang { 23 namespace format { 24 25 class ScopedMacroState : public FormatTokenSource { 26 public: 27 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 28 FormatToken &ResetToken) 29 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 30 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 31 TokenSource = this; 32 Line.Level = 0; 33 Line.InPPDirective = true; 34 } 35 36 ~ScopedMacroState() { 37 TokenSource = PreviousTokenSource; 38 ResetToken = Token; 39 Line.InPPDirective = false; 40 Line.Level = PreviousLineLevel; 41 } 42 43 virtual FormatToken getNextToken() { 44 // The \c UnwrappedLineParser guards against this by never calling 45 // \c getNextToken() after it has encountered the first eof token. 46 assert(!eof()); 47 Token = PreviousTokenSource->getNextToken(); 48 if (eof()) 49 return createEOF(); 50 return Token; 51 } 52 53 private: 54 bool eof() { 55 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 56 } 57 58 FormatToken createEOF() { 59 FormatToken FormatTok; 60 FormatTok.Tok.startToken(); 61 FormatTok.Tok.setKind(tok::eof); 62 return FormatTok; 63 } 64 65 UnwrappedLine &Line; 66 FormatTokenSource *&TokenSource; 67 FormatToken &ResetToken; 68 unsigned PreviousLineLevel; 69 FormatTokenSource *PreviousTokenSource; 70 71 FormatToken Token; 72 }; 73 74 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 75 FormatTokenSource &Tokens, 76 UnwrappedLineConsumer &Callback) 77 : RootTokenInitialized(false), Style(Style), Tokens(&Tokens), 78 Callback(Callback) { 79 } 80 81 bool UnwrappedLineParser::parse() { 82 readToken(); 83 return parseFile(); 84 } 85 86 bool UnwrappedLineParser::parseFile() { 87 bool Error = parseLevel(/*HasOpeningBrace=*/false); 88 // Make sure to format the remaining tokens. 89 addUnwrappedLine(); 90 return Error; 91 } 92 93 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 94 bool Error = false; 95 do { 96 switch (FormatTok.Tok.getKind()) { 97 case tok::comment: 98 nextToken(); 99 addUnwrappedLine(); 100 break; 101 case tok::l_brace: 102 Error |= parseBlock(); 103 addUnwrappedLine(); 104 break; 105 case tok::r_brace: 106 if (HasOpeningBrace) { 107 return false; 108 } else { 109 // Stray '}' is an error. 110 Error = true; 111 nextToken(); 112 addUnwrappedLine(); 113 } 114 break; 115 default: 116 parseStructuralElement(); 117 break; 118 } 119 } while (!eof()); 120 return Error; 121 } 122 123 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 124 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 125 nextToken(); 126 127 addUnwrappedLine(); 128 129 Line.Level += AddLevels; 130 parseLevel(/*HasOpeningBrace=*/true); 131 Line.Level -= AddLevels; 132 133 if (!FormatTok.Tok.is(tok::r_brace)) 134 return true; 135 136 nextToken(); // Munch the closing brace. 137 return false; 138 } 139 140 void UnwrappedLineParser::parsePPDirective() { 141 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 142 ScopedMacroState MacroState(Line, Tokens, FormatTok); 143 nextToken(); 144 145 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 146 addUnwrappedLine(); 147 return; 148 } 149 150 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 151 case tok::pp_define: 152 parsePPDefine(); 153 break; 154 default: 155 parsePPUnknown(); 156 break; 157 } 158 } 159 160 void UnwrappedLineParser::parsePPDefine() { 161 nextToken(); 162 163 if (FormatTok.Tok.getKind() != tok::identifier) { 164 parsePPUnknown(); 165 return; 166 } 167 nextToken(); 168 if (FormatTok.Tok.getKind() == tok::l_paren) { 169 parseParens(); 170 } 171 addUnwrappedLine(); 172 Line.Level = 1; 173 174 // Errors during a preprocessor directive can only affect the layout of the 175 // preprocessor directive, and thus we ignore them. An alternative approach 176 // would be to use the same approach we use on the file level (no 177 // re-indentation if there was a structural error) within the macro 178 // definition. 179 parseFile(); 180 } 181 182 void UnwrappedLineParser::parsePPUnknown() { 183 do { 184 nextToken(); 185 } while (!eof()); 186 addUnwrappedLine(); 187 } 188 189 void UnwrappedLineParser::parseComments() { 190 // Consume leading line comments, e.g. for branches without compounds. 191 while (FormatTok.Tok.is(tok::comment)) { 192 nextToken(); 193 addUnwrappedLine(); 194 } 195 } 196 197 void UnwrappedLineParser::parseStructuralElement() { 198 parseComments(); 199 200 int TokenNumber = 0; 201 switch (FormatTok.Tok.getKind()) { 202 case tok::at: 203 nextToken(); 204 switch (FormatTok.Tok.getObjCKeywordID()) { 205 case tok::objc_public: 206 case tok::objc_protected: 207 case tok::objc_package: 208 case tok::objc_private: 209 return parseAccessSpecifier(); 210 default: 211 break; 212 } 213 break; 214 case tok::kw_namespace: 215 parseNamespace(); 216 return; 217 case tok::kw_inline: 218 nextToken(); 219 TokenNumber++; 220 if (FormatTok.Tok.is(tok::kw_namespace)) { 221 parseNamespace(); 222 return; 223 } 224 break; 225 case tok::kw_public: 226 case tok::kw_protected: 227 case tok::kw_private: 228 parseAccessSpecifier(); 229 return; 230 case tok::kw_if: 231 parseIfThenElse(); 232 return; 233 case tok::kw_for: 234 case tok::kw_while: 235 parseForOrWhileLoop(); 236 return; 237 case tok::kw_do: 238 parseDoWhile(); 239 return; 240 case tok::kw_switch: 241 parseSwitch(); 242 return; 243 case tok::kw_default: 244 nextToken(); 245 parseLabel(); 246 return; 247 case tok::kw_case: 248 parseCaseLabel(); 249 return; 250 default: 251 break; 252 } 253 do { 254 ++TokenNumber; 255 switch (FormatTok.Tok.getKind()) { 256 case tok::kw_enum: 257 parseEnum(); 258 return; 259 case tok::kw_struct: // fallthrough 260 case tok::kw_class: 261 parseStructOrClass(); 262 return; 263 case tok::semi: 264 nextToken(); 265 addUnwrappedLine(); 266 return; 267 case tok::l_paren: 268 parseParens(); 269 break; 270 case tok::l_brace: 271 parseBlock(); 272 addUnwrappedLine(); 273 return; 274 case tok::identifier: 275 nextToken(); 276 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 277 parseLabel(); 278 return; 279 } 280 break; 281 case tok::equal: 282 nextToken(); 283 // Skip initializers as they will be formatted by a later step. 284 if (FormatTok.Tok.is(tok::l_brace)) 285 nextToken(); 286 break; 287 default: 288 nextToken(); 289 break; 290 } 291 } while (!eof()); 292 } 293 294 void UnwrappedLineParser::parseParens() { 295 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 296 nextToken(); 297 do { 298 switch (FormatTok.Tok.getKind()) { 299 case tok::l_paren: 300 parseParens(); 301 break; 302 case tok::r_paren: 303 nextToken(); 304 return; 305 default: 306 nextToken(); 307 break; 308 } 309 } while (!eof()); 310 } 311 312 void UnwrappedLineParser::parseIfThenElse() { 313 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 314 nextToken(); 315 parseParens(); 316 bool NeedsUnwrappedLine = false; 317 if (FormatTok.Tok.is(tok::l_brace)) { 318 parseBlock(); 319 NeedsUnwrappedLine = true; 320 } else { 321 addUnwrappedLine(); 322 ++Line.Level; 323 parseStructuralElement(); 324 --Line.Level; 325 } 326 if (FormatTok.Tok.is(tok::kw_else)) { 327 nextToken(); 328 if (FormatTok.Tok.is(tok::l_brace)) { 329 parseBlock(); 330 addUnwrappedLine(); 331 } else if (FormatTok.Tok.is(tok::kw_if)) { 332 parseIfThenElse(); 333 } else { 334 addUnwrappedLine(); 335 ++Line.Level; 336 parseStructuralElement(); 337 --Line.Level; 338 } 339 } else if (NeedsUnwrappedLine) { 340 addUnwrappedLine(); 341 } 342 } 343 344 void UnwrappedLineParser::parseNamespace() { 345 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 346 nextToken(); 347 if (FormatTok.Tok.is(tok::identifier)) 348 nextToken(); 349 if (FormatTok.Tok.is(tok::l_brace)) { 350 parseBlock(0); 351 addUnwrappedLine(); 352 } 353 // FIXME: Add error handling. 354 } 355 356 void UnwrappedLineParser::parseForOrWhileLoop() { 357 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 358 "'for' or 'while' expected"); 359 nextToken(); 360 parseParens(); 361 if (FormatTok.Tok.is(tok::l_brace)) { 362 parseBlock(); 363 addUnwrappedLine(); 364 } else { 365 addUnwrappedLine(); 366 ++Line.Level; 367 parseStructuralElement(); 368 --Line.Level; 369 } 370 } 371 372 void UnwrappedLineParser::parseDoWhile() { 373 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 374 nextToken(); 375 if (FormatTok.Tok.is(tok::l_brace)) { 376 parseBlock(); 377 } else { 378 addUnwrappedLine(); 379 ++Line.Level; 380 parseStructuralElement(); 381 --Line.Level; 382 } 383 384 // FIXME: Add error handling. 385 if (!FormatTok.Tok.is(tok::kw_while)) { 386 addUnwrappedLine(); 387 return; 388 } 389 390 nextToken(); 391 parseStructuralElement(); 392 } 393 394 void UnwrappedLineParser::parseLabel() { 395 // FIXME: remove all asserts. 396 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 397 nextToken(); 398 unsigned OldLineLevel = Line.Level; 399 if (Line.Level > 0) 400 --Line.Level; 401 if (FormatTok.Tok.is(tok::l_brace)) { 402 parseBlock(); 403 } 404 addUnwrappedLine(); 405 Line.Level = OldLineLevel; 406 } 407 408 void UnwrappedLineParser::parseCaseLabel() { 409 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 410 // FIXME: fix handling of complex expressions here. 411 do { 412 nextToken(); 413 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 414 parseLabel(); 415 } 416 417 void UnwrappedLineParser::parseSwitch() { 418 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 419 nextToken(); 420 parseParens(); 421 if (FormatTok.Tok.is(tok::l_brace)) { 422 parseBlock(Style.IndentCaseLabels ? 2 : 1); 423 addUnwrappedLine(); 424 } else { 425 addUnwrappedLine(); 426 Line.Level += (Style.IndentCaseLabels ? 2 : 1); 427 parseStructuralElement(); 428 Line.Level -= (Style.IndentCaseLabels ? 2 : 1); 429 } 430 } 431 432 void UnwrappedLineParser::parseAccessSpecifier() { 433 nextToken(); 434 // Otherwise, we don't know what it is, and we'd better keep the next token. 435 if (FormatTok.Tok.is(tok::colon)) 436 nextToken(); 437 addUnwrappedLine(); 438 } 439 440 void UnwrappedLineParser::parseEnum() { 441 bool HasContents = false; 442 do { 443 switch (FormatTok.Tok.getKind()) { 444 case tok::l_brace: 445 nextToken(); 446 addUnwrappedLine(); 447 ++Line.Level; 448 parseComments(); 449 break; 450 case tok::l_paren: 451 parseParens(); 452 break; 453 case tok::comma: 454 nextToken(); 455 addUnwrappedLine(); 456 parseComments(); 457 break; 458 case tok::r_brace: 459 if (HasContents) 460 addUnwrappedLine(); 461 --Line.Level; 462 nextToken(); 463 break; 464 case tok::semi: 465 nextToken(); 466 addUnwrappedLine(); 467 return; 468 default: 469 HasContents = true; 470 nextToken(); 471 break; 472 } 473 } while (!eof()); 474 } 475 476 void UnwrappedLineParser::parseStructOrClass() { 477 nextToken(); 478 do { 479 switch (FormatTok.Tok.getKind()) { 480 case tok::l_brace: 481 // FIXME: Think about how to resolve the error handling here. 482 parseBlock(); 483 parseStructuralElement(); 484 return; 485 case tok::semi: 486 nextToken(); 487 addUnwrappedLine(); 488 return; 489 default: 490 nextToken(); 491 break; 492 } 493 } while (!eof()); 494 } 495 496 void UnwrappedLineParser::addUnwrappedLine() { 497 if (!RootTokenInitialized) 498 return; 499 // Consume trailing comments. 500 while (!eof() && FormatTok.NewlinesBefore == 0 && 501 FormatTok.Tok.is(tok::comment)) { 502 nextToken(); 503 } 504 Callback.consumeUnwrappedLine(Line); 505 RootTokenInitialized = false; 506 } 507 508 bool UnwrappedLineParser::eof() const { 509 return FormatTok.Tok.is(tok::eof); 510 } 511 512 void UnwrappedLineParser::nextToken() { 513 if (eof()) 514 return; 515 if (RootTokenInitialized) { 516 LastInCurrentLine->Children.push_back(FormatTok); 517 LastInCurrentLine = &LastInCurrentLine->Children.back(); 518 } else { 519 Line.RootToken = FormatTok; 520 RootTokenInitialized = true; 521 LastInCurrentLine = &Line.RootToken; 522 } 523 readToken(); 524 } 525 526 void UnwrappedLineParser::readToken() { 527 FormatTok = Tokens->getNextToken(); 528 while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) && 529 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 530 FormatTok.IsFirst)) { 531 // FIXME: This is incorrect - the correct way is to create a 532 // data structure that will construct the parts around the preprocessor 533 // directive as a structured \c UnwrappedLine. 534 addUnwrappedLine(); 535 parsePPDirective(); 536 } 537 } 538 539 } // end namespace format 540 } // end namespace clang 541