1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15 /// where it can be used to format real code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "UnwrappedLineParser.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 namespace clang { 23 namespace format { 24 25 class ScopedMacroState : public FormatTokenSource { 26 public: 27 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 28 FormatToken &ResetToken) 29 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 30 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 31 TokenSource = this; 32 Line.Level = 0; 33 Line.InPPDirective = true; 34 } 35 36 ~ScopedMacroState() { 37 TokenSource = PreviousTokenSource; 38 ResetToken = Token; 39 Line.InPPDirective = false; 40 Line.Level = PreviousLineLevel; 41 } 42 43 virtual FormatToken getNextToken() { 44 // The \c UnwrappedLineParser guards against this by never calling 45 // \c getNextToken() after it has encountered the first eof token. 46 assert(!eof()); 47 Token = PreviousTokenSource->getNextToken(); 48 if (eof()) 49 return createEOF(); 50 return Token; 51 } 52 53 private: 54 bool eof() { 55 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 56 } 57 58 FormatToken createEOF() { 59 FormatToken FormatTok; 60 FormatTok.Tok.startToken(); 61 FormatTok.Tok.setKind(tok::eof); 62 return FormatTok; 63 } 64 65 UnwrappedLine &Line; 66 FormatTokenSource *&TokenSource; 67 FormatToken &ResetToken; 68 unsigned PreviousLineLevel; 69 FormatTokenSource *PreviousTokenSource; 70 71 FormatToken Token; 72 }; 73 74 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 75 FormatTokenSource &Tokens, 76 UnwrappedLineConsumer &Callback) 77 : Style(Style), Tokens(&Tokens), Callback(Callback) { 78 } 79 80 bool UnwrappedLineParser::parse() { 81 readToken(); 82 return parseFile(); 83 } 84 85 bool UnwrappedLineParser::parseFile() { 86 bool Error = parseLevel(/*HasOpeningBrace=*/false); 87 // Make sure to format the remaining tokens. 88 addUnwrappedLine(); 89 return Error; 90 } 91 92 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 93 bool Error = false; 94 do { 95 switch (FormatTok.Tok.getKind()) { 96 case tok::comment: 97 nextToken(); 98 addUnwrappedLine(); 99 break; 100 case tok::l_brace: 101 Error |= parseBlock(); 102 addUnwrappedLine(); 103 break; 104 case tok::r_brace: 105 if (HasOpeningBrace) { 106 return false; 107 } else { 108 // Stray '}' is an error. 109 Error = true; 110 nextToken(); 111 addUnwrappedLine(); 112 } 113 break; 114 default: 115 parseStructuralElement(); 116 break; 117 } 118 } while (!eof()); 119 return Error; 120 } 121 122 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 123 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 124 nextToken(); 125 126 addUnwrappedLine(); 127 128 Line.Level += AddLevels; 129 parseLevel(/*HasOpeningBrace=*/true); 130 Line.Level -= AddLevels; 131 132 // FIXME: Add error handling. 133 if (!FormatTok.Tok.is(tok::r_brace)) 134 return true; 135 136 nextToken(); 137 if (FormatTok.Tok.is(tok::semi)) 138 nextToken(); 139 return false; 140 } 141 142 void UnwrappedLineParser::parsePPDirective() { 143 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 144 ScopedMacroState MacroState(Line, Tokens, FormatTok); 145 nextToken(); 146 147 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 148 addUnwrappedLine(); 149 return; 150 } 151 152 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 153 case tok::pp_define: 154 parsePPDefine(); 155 break; 156 default: 157 parsePPUnknown(); 158 break; 159 } 160 } 161 162 void UnwrappedLineParser::parsePPDefine() { 163 nextToken(); 164 165 if (FormatTok.Tok.getKind() != tok::identifier) { 166 parsePPUnknown(); 167 return; 168 } 169 nextToken(); 170 if (FormatTok.Tok.getKind() == tok::l_paren) { 171 parseParens(); 172 } 173 addUnwrappedLine(); 174 Line.Level = 1; 175 176 // Errors during a preprocessor directive can only affect the layout of the 177 // preprocessor directive, and thus we ignore them. An alternative approach 178 // would be to use the same approach we use on the file level (no 179 // re-indentation if there was a structural error) within the macro 180 // definition. 181 parseFile(); 182 } 183 184 void UnwrappedLineParser::parsePPUnknown() { 185 do { 186 nextToken(); 187 } while (!eof()); 188 addUnwrappedLine(); 189 } 190 191 void UnwrappedLineParser::parseComments() { 192 // Consume leading line comments, e.g. for branches without compounds. 193 while (FormatTok.Tok.is(tok::comment)) { 194 nextToken(); 195 addUnwrappedLine(); 196 } 197 } 198 199 void UnwrappedLineParser::parseStructuralElement() { 200 parseComments(); 201 202 int TokenNumber = 0; 203 switch (FormatTok.Tok.getKind()) { 204 case tok::kw_namespace: 205 parseNamespace(); 206 return; 207 case tok::kw_inline: 208 nextToken(); 209 TokenNumber++; 210 if (FormatTok.Tok.is(tok::kw_namespace)) { 211 parseNamespace(); 212 return; 213 } 214 break; 215 case tok::kw_public: 216 case tok::kw_protected: 217 case tok::kw_private: 218 parseAccessSpecifier(); 219 return; 220 case tok::kw_if: 221 parseIfThenElse(); 222 return; 223 case tok::kw_for: 224 case tok::kw_while: 225 parseForOrWhileLoop(); 226 return; 227 case tok::kw_do: 228 parseDoWhile(); 229 return; 230 case tok::kw_switch: 231 parseSwitch(); 232 return; 233 case tok::kw_default: 234 nextToken(); 235 parseLabel(); 236 return; 237 case tok::kw_case: 238 parseCaseLabel(); 239 return; 240 default: 241 break; 242 } 243 do { 244 ++TokenNumber; 245 switch (FormatTok.Tok.getKind()) { 246 case tok::kw_enum: 247 parseEnum(); 248 return; 249 case tok::semi: 250 nextToken(); 251 addUnwrappedLine(); 252 return; 253 case tok::l_paren: 254 parseParens(); 255 break; 256 case tok::l_brace: 257 parseBlock(); 258 addUnwrappedLine(); 259 return; 260 case tok::identifier: 261 nextToken(); 262 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 263 parseLabel(); 264 return; 265 } 266 break; 267 case tok::equal: 268 nextToken(); 269 // Skip initializers as they will be formatted by a later step. 270 if (FormatTok.Tok.is(tok::l_brace)) 271 nextToken(); 272 break; 273 default: 274 nextToken(); 275 break; 276 } 277 } while (!eof()); 278 } 279 280 void UnwrappedLineParser::parseParens() { 281 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 282 nextToken(); 283 do { 284 switch (FormatTok.Tok.getKind()) { 285 case tok::l_paren: 286 parseParens(); 287 break; 288 case tok::r_paren: 289 nextToken(); 290 return; 291 default: 292 nextToken(); 293 break; 294 } 295 } while (!eof()); 296 } 297 298 void UnwrappedLineParser::parseIfThenElse() { 299 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 300 nextToken(); 301 parseParens(); 302 bool NeedsUnwrappedLine = false; 303 if (FormatTok.Tok.is(tok::l_brace)) { 304 parseBlock(); 305 NeedsUnwrappedLine = true; 306 } else { 307 addUnwrappedLine(); 308 ++Line.Level; 309 parseStructuralElement(); 310 --Line.Level; 311 } 312 if (FormatTok.Tok.is(tok::kw_else)) { 313 nextToken(); 314 if (FormatTok.Tok.is(tok::l_brace)) { 315 parseBlock(); 316 addUnwrappedLine(); 317 } else if (FormatTok.Tok.is(tok::kw_if)) { 318 parseIfThenElse(); 319 } else { 320 addUnwrappedLine(); 321 ++Line.Level; 322 parseStructuralElement(); 323 --Line.Level; 324 } 325 } else if (NeedsUnwrappedLine) { 326 addUnwrappedLine(); 327 } 328 } 329 330 void UnwrappedLineParser::parseNamespace() { 331 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 332 nextToken(); 333 if (FormatTok.Tok.is(tok::identifier)) 334 nextToken(); 335 if (FormatTok.Tok.is(tok::l_brace)) { 336 parseBlock(0); 337 addUnwrappedLine(); 338 } 339 // FIXME: Add error handling. 340 } 341 342 void UnwrappedLineParser::parseForOrWhileLoop() { 343 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 344 "'for' or 'while' expected"); 345 nextToken(); 346 parseParens(); 347 if (FormatTok.Tok.is(tok::l_brace)) { 348 parseBlock(); 349 addUnwrappedLine(); 350 } else { 351 addUnwrappedLine(); 352 ++Line.Level; 353 parseStructuralElement(); 354 --Line.Level; 355 } 356 } 357 358 void UnwrappedLineParser::parseDoWhile() { 359 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 360 nextToken(); 361 if (FormatTok.Tok.is(tok::l_brace)) { 362 parseBlock(); 363 } else { 364 addUnwrappedLine(); 365 ++Line.Level; 366 parseStructuralElement(); 367 --Line.Level; 368 } 369 370 // FIXME: Add error handling. 371 if (!FormatTok.Tok.is(tok::kw_while)) { 372 addUnwrappedLine(); 373 return; 374 } 375 376 nextToken(); 377 parseStructuralElement(); 378 } 379 380 void UnwrappedLineParser::parseLabel() { 381 // FIXME: remove all asserts. 382 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 383 nextToken(); 384 unsigned OldLineLevel = Line.Level; 385 if (Line.Level > 0) 386 --Line.Level; 387 if (FormatTok.Tok.is(tok::l_brace)) { 388 parseBlock(); 389 } 390 addUnwrappedLine(); 391 Line.Level = OldLineLevel; 392 } 393 394 void UnwrappedLineParser::parseCaseLabel() { 395 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 396 // FIXME: fix handling of complex expressions here. 397 do { 398 nextToken(); 399 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 400 parseLabel(); 401 } 402 403 void UnwrappedLineParser::parseSwitch() { 404 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 405 nextToken(); 406 parseParens(); 407 if (FormatTok.Tok.is(tok::l_brace)) { 408 parseBlock(Style.IndentCaseLabels ? 2 : 1); 409 addUnwrappedLine(); 410 } else { 411 addUnwrappedLine(); 412 Line.Level += (Style.IndentCaseLabels ? 2 : 1); 413 parseStructuralElement(); 414 Line.Level -= (Style.IndentCaseLabels ? 2 : 1); 415 } 416 } 417 418 void UnwrappedLineParser::parseAccessSpecifier() { 419 nextToken(); 420 // Otherwise, we don't know what it is, and we'd better keep the next token. 421 if (FormatTok.Tok.is(tok::colon)) 422 nextToken(); 423 addUnwrappedLine(); 424 } 425 426 void UnwrappedLineParser::parseEnum() { 427 bool HasContents = false; 428 do { 429 switch (FormatTok.Tok.getKind()) { 430 case tok::l_brace: 431 nextToken(); 432 addUnwrappedLine(); 433 ++Line.Level; 434 parseComments(); 435 break; 436 case tok::l_paren: 437 parseParens(); 438 break; 439 case tok::comma: 440 nextToken(); 441 addUnwrappedLine(); 442 parseComments(); 443 break; 444 case tok::r_brace: 445 if (HasContents) 446 addUnwrappedLine(); 447 --Line.Level; 448 nextToken(); 449 break; 450 case tok::semi: 451 nextToken(); 452 addUnwrappedLine(); 453 return; 454 default: 455 HasContents = true; 456 nextToken(); 457 break; 458 } 459 } while (!eof()); 460 } 461 462 void UnwrappedLineParser::addUnwrappedLine() { 463 // Consume trailing comments. 464 while (!eof() && FormatTok.NewlinesBefore == 0 && 465 FormatTok.Tok.is(tok::comment)) { 466 nextToken(); 467 } 468 Callback.consumeUnwrappedLine(Line); 469 Line.Tokens.clear(); 470 } 471 472 bool UnwrappedLineParser::eof() const { 473 return FormatTok.Tok.is(tok::eof); 474 } 475 476 void UnwrappedLineParser::nextToken() { 477 if (eof()) 478 return; 479 Line.Tokens.push_back(FormatTok); 480 readToken(); 481 } 482 483 void UnwrappedLineParser::readToken() { 484 FormatTok = Tokens->getNextToken(); 485 while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) && 486 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 487 FormatTok.IsFirst)) { 488 // FIXME: This is incorrect - the correct way is to create a 489 // data structure that will construct the parts around the preprocessor 490 // directive as a structured \c UnwrappedLine. 491 addUnwrappedLine(); 492 parsePPDirective(); 493 } 494 } 495 496 } // end namespace format 497 } // end namespace clang 498