1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineFormatter.h" 19 #include "UnwrappedLineParser.h" 20 #include "WhitespaceManager.h" 21 #include "clang/Basic/Diagnostic.h" 22 #include "clang/Basic/DiagnosticOptions.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/YAMLTraits.h" 32 #include <queue> 33 #include <string> 34 35 #define DEBUG_TYPE "format-formatter" 36 37 using clang::format::FormatStyle; 38 39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 40 41 namespace llvm { 42 namespace yaml { 43 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 44 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 45 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 46 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 47 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 48 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 49 } 50 }; 51 52 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 53 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 54 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 55 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 56 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 57 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 58 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 59 } 60 }; 61 62 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 63 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 64 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 65 IO.enumCase(Value, "false", FormatStyle::UT_Never); 66 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 67 IO.enumCase(Value, "true", FormatStyle::UT_Always); 68 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 69 } 70 }; 71 72 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 73 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 74 IO.enumCase(Value, "None", FormatStyle::SFS_None); 75 IO.enumCase(Value, "false", FormatStyle::SFS_None); 76 IO.enumCase(Value, "All", FormatStyle::SFS_All); 77 IO.enumCase(Value, "true", FormatStyle::SFS_All); 78 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 79 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); 80 } 81 }; 82 83 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 84 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 85 IO.enumCase(Value, "All", FormatStyle::BOS_All); 86 IO.enumCase(Value, "true", FormatStyle::BOS_All); 87 IO.enumCase(Value, "None", FormatStyle::BOS_None); 88 IO.enumCase(Value, "false", FormatStyle::BOS_None); 89 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 90 } 91 }; 92 93 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 94 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 95 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 96 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 97 IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla); 98 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 99 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 100 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 101 IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit); 102 } 103 }; 104 105 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { 106 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { 107 IO.enumCase(Value, "None", FormatStyle::DRTBS_None); 108 IO.enumCase(Value, "All", FormatStyle::DRTBS_All); 109 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); 110 111 // For backward compatibility. 112 IO.enumCase(Value, "false", FormatStyle::DRTBS_None); 113 IO.enumCase(Value, "true", FormatStyle::DRTBS_All); 114 } 115 }; 116 117 template <> 118 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 119 static void enumeration(IO &IO, 120 FormatStyle::NamespaceIndentationKind &Value) { 121 IO.enumCase(Value, "None", FormatStyle::NI_None); 122 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 123 IO.enumCase(Value, "All", FormatStyle::NI_All); 124 } 125 }; 126 127 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 128 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { 129 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 130 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 131 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 132 133 // For backward compatibility. 134 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 135 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 136 } 137 }; 138 139 template <> 140 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 141 static void enumeration(IO &IO, 142 FormatStyle::SpaceBeforeParensOptions &Value) { 143 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 144 IO.enumCase(Value, "ControlStatements", 145 FormatStyle::SBPO_ControlStatements); 146 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 147 148 // For backward compatibility. 149 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 150 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 151 } 152 }; 153 154 template <> struct MappingTraits<FormatStyle> { 155 static void mapping(IO &IO, FormatStyle &Style) { 156 // When reading, read the language first, we need it for getPredefinedStyle. 157 IO.mapOptional("Language", Style.Language); 158 159 if (IO.outputting()) { 160 StringRef StylesArray[] = {"LLVM", "Google", "Chromium", 161 "Mozilla", "WebKit", "GNU"}; 162 ArrayRef<StringRef> Styles(StylesArray); 163 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 164 StringRef StyleName(Styles[i]); 165 FormatStyle PredefinedStyle; 166 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 167 Style == PredefinedStyle) { 168 IO.mapOptional("# BasedOnStyle", StyleName); 169 break; 170 } 171 } 172 } else { 173 StringRef BasedOnStyle; 174 IO.mapOptional("BasedOnStyle", BasedOnStyle); 175 if (!BasedOnStyle.empty()) { 176 FormatStyle::LanguageKind OldLanguage = Style.Language; 177 FormatStyle::LanguageKind Language = 178 ((FormatStyle *)IO.getContext())->Language; 179 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 180 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 181 return; 182 } 183 Style.Language = OldLanguage; 184 } 185 } 186 187 // For backward compatibility. 188 if (!IO.outputting()) { 189 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 190 IO.mapOptional("IndentFunctionDeclarationAfterType", 191 Style.IndentWrappedFunctionNames); 192 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 193 IO.mapOptional("SpaceAfterControlStatementKeyword", 194 Style.SpaceBeforeParens); 195 } 196 197 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 198 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); 199 IO.mapOptional("AlignConsecutiveAssignments", 200 Style.AlignConsecutiveAssignments); 201 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 202 IO.mapOptional("AlignOperands", Style.AlignOperands); 203 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 204 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 205 Style.AllowAllParametersOfDeclarationOnNextLine); 206 IO.mapOptional("AllowShortBlocksOnASingleLine", 207 Style.AllowShortBlocksOnASingleLine); 208 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 209 Style.AllowShortCaseLabelsOnASingleLine); 210 IO.mapOptional("AllowShortFunctionsOnASingleLine", 211 Style.AllowShortFunctionsOnASingleLine); 212 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 213 Style.AllowShortIfStatementsOnASingleLine); 214 IO.mapOptional("AllowShortLoopsOnASingleLine", 215 Style.AllowShortLoopsOnASingleLine); 216 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 217 Style.AlwaysBreakAfterDefinitionReturnType); 218 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 219 Style.AlwaysBreakBeforeMultilineStrings); 220 IO.mapOptional("AlwaysBreakTemplateDeclarations", 221 Style.AlwaysBreakTemplateDeclarations); 222 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 223 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 224 IO.mapOptional("BreakBeforeBinaryOperators", 225 Style.BreakBeforeBinaryOperators); 226 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 227 IO.mapOptional("BreakBeforeTernaryOperators", 228 Style.BreakBeforeTernaryOperators); 229 IO.mapOptional("BreakConstructorInitializersBeforeComma", 230 Style.BreakConstructorInitializersBeforeComma); 231 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 232 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 233 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 234 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 235 IO.mapOptional("ConstructorInitializerIndentWidth", 236 Style.ConstructorInitializerIndentWidth); 237 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 238 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 239 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 240 IO.mapOptional("DisableFormat", Style.DisableFormat); 241 IO.mapOptional("ExperimentalAutoDetectBinPacking", 242 Style.ExperimentalAutoDetectBinPacking); 243 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 244 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 245 IO.mapOptional("IndentWidth", Style.IndentWidth); 246 IO.mapOptional("IndentWrappedFunctionNames", 247 Style.IndentWrappedFunctionNames); 248 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 249 Style.KeepEmptyLinesAtTheStartOfBlocks); 250 IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); 251 IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); 252 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 253 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 254 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 255 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 256 IO.mapOptional("ObjCSpaceBeforeProtocolList", 257 Style.ObjCSpaceBeforeProtocolList); 258 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 259 Style.PenaltyBreakBeforeFirstCallParameter); 260 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 261 IO.mapOptional("PenaltyBreakFirstLessLess", 262 Style.PenaltyBreakFirstLessLess); 263 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 264 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 265 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 266 Style.PenaltyReturnTypeOnItsOwnLine); 267 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 268 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 269 IO.mapOptional("SpaceBeforeAssignmentOperators", 270 Style.SpaceBeforeAssignmentOperators); 271 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 272 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 273 IO.mapOptional("SpacesBeforeTrailingComments", 274 Style.SpacesBeforeTrailingComments); 275 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 276 IO.mapOptional("SpacesInContainerLiterals", 277 Style.SpacesInContainerLiterals); 278 IO.mapOptional("SpacesInCStyleCastParentheses", 279 Style.SpacesInCStyleCastParentheses); 280 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 281 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 282 IO.mapOptional("Standard", Style.Standard); 283 IO.mapOptional("TabWidth", Style.TabWidth); 284 IO.mapOptional("UseTab", Style.UseTab); 285 } 286 }; 287 288 // Allows to read vector<FormatStyle> while keeping default values. 289 // IO.getContext() should contain a pointer to the FormatStyle structure, that 290 // will be used to get default values for missing keys. 291 // If the first element has no Language specified, it will be treated as the 292 // default one for the following elements. 293 template <> struct DocumentListTraits<std::vector<FormatStyle>> { 294 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 295 return Seq.size(); 296 } 297 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 298 size_t Index) { 299 if (Index >= Seq.size()) { 300 assert(Index == Seq.size()); 301 FormatStyle Template; 302 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 303 Template = Seq[0]; 304 } else { 305 Template = *((const FormatStyle *)IO.getContext()); 306 Template.Language = FormatStyle::LK_None; 307 } 308 Seq.resize(Index + 1, Template); 309 } 310 return Seq[Index]; 311 } 312 }; 313 } 314 } 315 316 namespace clang { 317 namespace format { 318 319 const std::error_category &getParseCategory() { 320 static ParseErrorCategory C; 321 return C; 322 } 323 std::error_code make_error_code(ParseError e) { 324 return std::error_code(static_cast<int>(e), getParseCategory()); 325 } 326 327 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 328 return "clang-format.parse_error"; 329 } 330 331 std::string ParseErrorCategory::message(int EV) const { 332 switch (static_cast<ParseError>(EV)) { 333 case ParseError::Success: 334 return "Success"; 335 case ParseError::Error: 336 return "Invalid argument"; 337 case ParseError::Unsuitable: 338 return "Unsuitable"; 339 } 340 llvm_unreachable("unexpected parse error"); 341 } 342 343 FormatStyle getLLVMStyle() { 344 FormatStyle LLVMStyle; 345 LLVMStyle.Language = FormatStyle::LK_Cpp; 346 LLVMStyle.AccessModifierOffset = -2; 347 LLVMStyle.AlignEscapedNewlinesLeft = false; 348 LLVMStyle.AlignAfterOpenBracket = true; 349 LLVMStyle.AlignOperands = true; 350 LLVMStyle.AlignTrailingComments = true; 351 LLVMStyle.AlignConsecutiveAssignments = false; 352 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 353 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 354 LLVMStyle.AllowShortBlocksOnASingleLine = false; 355 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 356 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 357 LLVMStyle.AllowShortLoopsOnASingleLine = false; 358 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; 359 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 360 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 361 LLVMStyle.BinPackParameters = true; 362 LLVMStyle.BinPackArguments = true; 363 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 364 LLVMStyle.BreakBeforeTernaryOperators = true; 365 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 366 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 367 LLVMStyle.ColumnLimit = 80; 368 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 369 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 370 LLVMStyle.ConstructorInitializerIndentWidth = 4; 371 LLVMStyle.ContinuationIndentWidth = 4; 372 LLVMStyle.Cpp11BracedListStyle = true; 373 LLVMStyle.DerivePointerAlignment = false; 374 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 375 LLVMStyle.ForEachMacros.push_back("foreach"); 376 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 377 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 378 LLVMStyle.IndentCaseLabels = false; 379 LLVMStyle.IndentWrappedFunctionNames = false; 380 LLVMStyle.IndentWidth = 2; 381 LLVMStyle.TabWidth = 8; 382 LLVMStyle.MaxEmptyLinesToKeep = 1; 383 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 384 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 385 LLVMStyle.ObjCBlockIndentWidth = 2; 386 LLVMStyle.ObjCSpaceAfterProperty = false; 387 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 388 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 389 LLVMStyle.SpacesBeforeTrailingComments = 1; 390 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 391 LLVMStyle.UseTab = FormatStyle::UT_Never; 392 LLVMStyle.SpacesInParentheses = false; 393 LLVMStyle.SpacesInSquareBrackets = false; 394 LLVMStyle.SpaceInEmptyParentheses = false; 395 LLVMStyle.SpacesInContainerLiterals = true; 396 LLVMStyle.SpacesInCStyleCastParentheses = false; 397 LLVMStyle.SpaceAfterCStyleCast = false; 398 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 399 LLVMStyle.SpaceBeforeAssignmentOperators = true; 400 LLVMStyle.SpacesInAngles = false; 401 402 LLVMStyle.PenaltyBreakComment = 300; 403 LLVMStyle.PenaltyBreakFirstLessLess = 120; 404 LLVMStyle.PenaltyBreakString = 1000; 405 LLVMStyle.PenaltyExcessCharacter = 1000000; 406 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 407 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 408 409 LLVMStyle.DisableFormat = false; 410 411 return LLVMStyle; 412 } 413 414 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 415 FormatStyle GoogleStyle = getLLVMStyle(); 416 GoogleStyle.Language = Language; 417 418 GoogleStyle.AccessModifierOffset = -1; 419 GoogleStyle.AlignEscapedNewlinesLeft = true; 420 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 421 GoogleStyle.AllowShortLoopsOnASingleLine = true; 422 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 423 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 424 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 425 GoogleStyle.DerivePointerAlignment = true; 426 GoogleStyle.IndentCaseLabels = true; 427 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 428 GoogleStyle.ObjCSpaceAfterProperty = false; 429 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 430 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 431 GoogleStyle.SpacesBeforeTrailingComments = 2; 432 GoogleStyle.Standard = FormatStyle::LS_Auto; 433 434 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 435 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 436 437 if (Language == FormatStyle::LK_Java) { 438 GoogleStyle.AlignAfterOpenBracket = false; 439 GoogleStyle.AlignOperands = false; 440 GoogleStyle.AlignTrailingComments = false; 441 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; 442 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 443 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 444 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 445 GoogleStyle.ColumnLimit = 100; 446 GoogleStyle.SpaceAfterCStyleCast = true; 447 GoogleStyle.SpacesBeforeTrailingComments = 1; 448 } else if (Language == FormatStyle::LK_JavaScript) { 449 GoogleStyle.BreakBeforeTernaryOperators = false; 450 GoogleStyle.MaxEmptyLinesToKeep = 3; 451 GoogleStyle.SpacesInContainerLiterals = false; 452 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 453 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 454 } else if (Language == FormatStyle::LK_Proto) { 455 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 456 GoogleStyle.SpacesInContainerLiterals = false; 457 } 458 459 return GoogleStyle; 460 } 461 462 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 463 FormatStyle ChromiumStyle = getGoogleStyle(Language); 464 if (Language == FormatStyle::LK_Java) { 465 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; 466 ChromiumStyle.IndentWidth = 4; 467 ChromiumStyle.ContinuationIndentWidth = 8; 468 } else { 469 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 470 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 471 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 472 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 473 ChromiumStyle.BinPackParameters = false; 474 ChromiumStyle.DerivePointerAlignment = false; 475 } 476 ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$"; 477 ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$"; 478 return ChromiumStyle; 479 } 480 481 FormatStyle getMozillaStyle() { 482 FormatStyle MozillaStyle = getLLVMStyle(); 483 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 484 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 485 MozillaStyle.AlwaysBreakAfterDefinitionReturnType = 486 FormatStyle::DRTBS_TopLevel; 487 MozillaStyle.AlwaysBreakTemplateDeclarations = true; 488 MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; 489 MozillaStyle.BreakConstructorInitializersBeforeComma = true; 490 MozillaStyle.ConstructorInitializerIndentWidth = 2; 491 MozillaStyle.ContinuationIndentWidth = 2; 492 MozillaStyle.Cpp11BracedListStyle = false; 493 MozillaStyle.IndentCaseLabels = true; 494 MozillaStyle.ObjCSpaceAfterProperty = true; 495 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 496 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 497 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 498 return MozillaStyle; 499 } 500 501 FormatStyle getWebKitStyle() { 502 FormatStyle Style = getLLVMStyle(); 503 Style.AccessModifierOffset = -4; 504 Style.AlignAfterOpenBracket = false; 505 Style.AlignOperands = false; 506 Style.AlignTrailingComments = false; 507 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 508 Style.BreakBeforeBraces = FormatStyle::BS_WebKit; 509 Style.BreakConstructorInitializersBeforeComma = true; 510 Style.Cpp11BracedListStyle = false; 511 Style.ColumnLimit = 0; 512 Style.IndentWidth = 4; 513 Style.NamespaceIndentation = FormatStyle::NI_Inner; 514 Style.ObjCBlockIndentWidth = 4; 515 Style.ObjCSpaceAfterProperty = true; 516 Style.PointerAlignment = FormatStyle::PAS_Left; 517 Style.Standard = FormatStyle::LS_Cpp03; 518 return Style; 519 } 520 521 FormatStyle getGNUStyle() { 522 FormatStyle Style = getLLVMStyle(); 523 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; 524 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 525 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 526 Style.BreakBeforeTernaryOperators = true; 527 Style.Cpp11BracedListStyle = false; 528 Style.ColumnLimit = 79; 529 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 530 Style.Standard = FormatStyle::LS_Cpp03; 531 return Style; 532 } 533 534 FormatStyle getNoStyle() { 535 FormatStyle NoStyle = getLLVMStyle(); 536 NoStyle.DisableFormat = true; 537 return NoStyle; 538 } 539 540 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 541 FormatStyle *Style) { 542 if (Name.equals_lower("llvm")) { 543 *Style = getLLVMStyle(); 544 } else if (Name.equals_lower("chromium")) { 545 *Style = getChromiumStyle(Language); 546 } else if (Name.equals_lower("mozilla")) { 547 *Style = getMozillaStyle(); 548 } else if (Name.equals_lower("google")) { 549 *Style = getGoogleStyle(Language); 550 } else if (Name.equals_lower("webkit")) { 551 *Style = getWebKitStyle(); 552 } else if (Name.equals_lower("gnu")) { 553 *Style = getGNUStyle(); 554 } else if (Name.equals_lower("none")) { 555 *Style = getNoStyle(); 556 } else { 557 return false; 558 } 559 560 Style->Language = Language; 561 return true; 562 } 563 564 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 565 assert(Style); 566 FormatStyle::LanguageKind Language = Style->Language; 567 assert(Language != FormatStyle::LK_None); 568 if (Text.trim().empty()) 569 return make_error_code(ParseError::Error); 570 571 std::vector<FormatStyle> Styles; 572 llvm::yaml::Input Input(Text); 573 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 574 // values for the fields, keys for which are missing from the configuration. 575 // Mapping also uses the context to get the language to find the correct 576 // base style. 577 Input.setContext(Style); 578 Input >> Styles; 579 if (Input.error()) 580 return Input.error(); 581 582 for (unsigned i = 0; i < Styles.size(); ++i) { 583 // Ensures that only the first configuration can skip the Language option. 584 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 585 return make_error_code(ParseError::Error); 586 // Ensure that each language is configured at most once. 587 for (unsigned j = 0; j < i; ++j) { 588 if (Styles[i].Language == Styles[j].Language) { 589 DEBUG(llvm::dbgs() 590 << "Duplicate languages in the config file on positions " << j 591 << " and " << i << "\n"); 592 return make_error_code(ParseError::Error); 593 } 594 } 595 } 596 // Look for a suitable configuration starting from the end, so we can 597 // find the configuration for the specific language first, and the default 598 // configuration (which can only be at slot 0) after it. 599 for (int i = Styles.size() - 1; i >= 0; --i) { 600 if (Styles[i].Language == Language || 601 Styles[i].Language == FormatStyle::LK_None) { 602 *Style = Styles[i]; 603 Style->Language = Language; 604 return make_error_code(ParseError::Success); 605 } 606 } 607 return make_error_code(ParseError::Unsuitable); 608 } 609 610 std::string configurationAsText(const FormatStyle &Style) { 611 std::string Text; 612 llvm::raw_string_ostream Stream(Text); 613 llvm::yaml::Output Output(Stream); 614 // We use the same mapping method for input and output, so we need a non-const 615 // reference here. 616 FormatStyle NonConstStyle = Style; 617 Output << NonConstStyle; 618 return Stream.str(); 619 } 620 621 namespace { 622 623 class FormatTokenLexer { 624 public: 625 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 626 encoding::Encoding Encoding) 627 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 628 LessStashed(false), Column(0), TrailingWhitespace(0), 629 SourceMgr(SourceMgr), ID(ID), Style(Style), 630 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), 631 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), 632 MacroBlockBeginRegex(Style.MacroBlockBegin), 633 MacroBlockEndRegex(Style.MacroBlockEnd) { 634 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 635 getFormattingLangOpts(Style))); 636 Lex->SetKeepWhitespaceMode(true); 637 638 for (const std::string &ForEachMacro : Style.ForEachMacros) 639 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 640 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 641 } 642 643 ArrayRef<FormatToken *> lex() { 644 assert(Tokens.empty()); 645 assert(FirstInLineIndex == 0); 646 do { 647 Tokens.push_back(getNextToken()); 648 tryMergePreviousTokens(); 649 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) 650 FirstInLineIndex = Tokens.size() - 1; 651 } while (Tokens.back()->Tok.isNot(tok::eof)); 652 return Tokens; 653 } 654 655 const AdditionalKeywords &getKeywords() { return Keywords; } 656 657 private: 658 void tryMergePreviousTokens() { 659 if (tryMerge_TMacro()) 660 return; 661 if (tryMergeConflictMarkers()) 662 return; 663 if (tryMergeLessLess()) 664 return; 665 666 if (Style.Language == FormatStyle::LK_JavaScript) { 667 if (tryMergeJSRegexLiteral()) 668 return; 669 if (tryMergeEscapeSequence()) 670 return; 671 if (tryMergeTemplateString()) 672 return; 673 674 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; 675 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, 676 tok::equal}; 677 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, 678 tok::greaterequal}; 679 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; 680 // FIXME: Investigate what token type gives the correct operator priority. 681 if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) 682 return; 683 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) 684 return; 685 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) 686 return; 687 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) 688 return; 689 } 690 } 691 692 bool tryMergeLessLess() { 693 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. 694 if (Tokens.size() < 3) 695 return false; 696 697 bool FourthTokenIsLess = false; 698 if (Tokens.size() > 3) 699 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); 700 701 auto First = Tokens.end() - 3; 702 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || 703 First[0]->isNot(tok::less) || FourthTokenIsLess) 704 return false; 705 706 // Only merge if there currently is no whitespace between the two "<". 707 if (First[1]->WhitespaceRange.getBegin() != 708 First[1]->WhitespaceRange.getEnd()) 709 return false; 710 711 First[0]->Tok.setKind(tok::lessless); 712 First[0]->TokenText = "<<"; 713 First[0]->ColumnWidth += 1; 714 Tokens.erase(Tokens.end() - 2); 715 return true; 716 } 717 718 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { 719 if (Tokens.size() < Kinds.size()) 720 return false; 721 722 SmallVectorImpl<FormatToken *>::const_iterator First = 723 Tokens.end() - Kinds.size(); 724 if (!First[0]->is(Kinds[0])) 725 return false; 726 unsigned AddLength = 0; 727 for (unsigned i = 1; i < Kinds.size(); ++i) { 728 if (!First[i]->is(Kinds[i]) || 729 First[i]->WhitespaceRange.getBegin() != 730 First[i]->WhitespaceRange.getEnd()) 731 return false; 732 AddLength += First[i]->TokenText.size(); 733 } 734 Tokens.resize(Tokens.size() - Kinds.size() + 1); 735 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 736 First[0]->TokenText.size() + AddLength); 737 First[0]->ColumnWidth += AddLength; 738 First[0]->Type = NewType; 739 return true; 740 } 741 742 // Tries to merge an escape sequence, i.e. a "\\" and the following 743 // character. Use e.g. inside JavaScript regex literals. 744 bool tryMergeEscapeSequence() { 745 if (Tokens.size() < 2) 746 return false; 747 FormatToken *Previous = Tokens[Tokens.size() - 2]; 748 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 749 return false; 750 ++Previous->ColumnWidth; 751 StringRef Text = Previous->TokenText; 752 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 753 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 754 Tokens.resize(Tokens.size() - 1); 755 Column = Previous->OriginalColumn + Previous->ColumnWidth; 756 return true; 757 } 758 759 // Try to determine whether the current token ends a JavaScript regex literal. 760 // We heuristically assume that this is a regex literal if we find two 761 // unescaped slashes on a line and the token before the first slash is one of 762 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 763 // a division. 764 bool tryMergeJSRegexLiteral() { 765 if (Tokens.size() < 2) 766 return false; 767 768 // If this is a string literal with a slash inside, compute the slash's 769 // offset and try to find the beginning of the regex literal. 770 // Also look at tok::unknown, as it can be an unterminated char literal. 771 size_t SlashInStringPos = StringRef::npos; 772 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, 773 tok::unknown)) { 774 // Start search from position 1 as otherwise, this is an unknown token 775 // for an unterminated /*-comment which is handled elsewhere. 776 SlashInStringPos = Tokens.back()->TokenText.find('/', 1); 777 if (SlashInStringPos == StringRef::npos) 778 return false; 779 } 780 781 // If a regex literal ends in "\//", this gets represented by an unknown 782 // token "\" and a comment. 783 bool MightEndWithEscapedSlash = 784 Tokens.back()->is(tok::comment) && 785 Tokens.back()->TokenText.startswith("//") && 786 Tokens[Tokens.size() - 2]->TokenText == "\\"; 787 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && 788 (Tokens.back()->isNot(tok::slash) || 789 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 790 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 791 return false; 792 793 unsigned TokenCount = 0; 794 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 795 ++TokenCount; 796 auto Prev = I + 1; 797 while (Prev != E && Prev[0]->is(tok::comment)) 798 ++Prev; 799 if (I[0]->isOneOf(tok::slash, tok::slashequal) && 800 (Prev == E || 801 ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, 802 tok::r_brace, tok::exclaim, tok::l_square, 803 tok::colon, tok::comma, tok::question, 804 tok::kw_return) || 805 Prev[0]->isBinaryOperator())))) { 806 unsigned LastColumn = Tokens.back()->OriginalColumn; 807 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 808 if (MightEndWithEscapedSlash) { 809 // This regex literal ends in '\//'. Skip past the '//' of the last 810 // token and re-start lexing from there. 811 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 812 } else if (SlashInStringPos != StringRef::npos) { 813 // This regex literal ends in a string_literal with a slash inside. 814 // Calculate end column and reset lexer appropriately. 815 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); 816 LastColumn += SlashInStringPos; 817 } 818 Tokens.resize(Tokens.size() - TokenCount); 819 Tokens.back()->Tok.setKind(tok::unknown); 820 Tokens.back()->Type = TT_RegexLiteral; 821 // Treat regex literals like other string_literals. 822 Tokens.back()->Tok.setKind(tok::string_literal); 823 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 824 return true; 825 } 826 827 // There can't be a newline inside a regex literal. 828 if (I[0]->NewlinesBefore > 0) 829 return false; 830 } 831 return false; 832 } 833 834 bool tryMergeTemplateString() { 835 if (Tokens.size() < 2) 836 return false; 837 838 FormatToken *EndBacktick = Tokens.back(); 839 // Backticks get lexed as tok::unknown tokens. If a template string contains 840 // a comment start, it gets lexed as a tok::comment, or tok::unknown if 841 // unterminated. 842 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, 843 tok::char_constant, tok::unknown)) 844 return false; 845 size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); 846 // Unknown token that's not actually a backtick, or a comment that doesn't 847 // contain a backtick. 848 if (CommentBacktickPos == StringRef::npos) 849 return false; 850 851 unsigned TokenCount = 0; 852 bool IsMultiline = false; 853 unsigned EndColumnInFirstLine = 854 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; 855 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { 856 ++TokenCount; 857 if (I[0]->IsMultiline) 858 IsMultiline = true; 859 860 // If there was a preceding template string, this must be the start of a 861 // template string, not the end. 862 if (I[0]->is(TT_TemplateString)) 863 return false; 864 865 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { 866 // Keep track of the rhs offset of the last token to wrap across lines - 867 // its the rhs offset of the first line of the template string, used to 868 // determine its width. 869 if (I[0]->IsMultiline) 870 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; 871 // If the token has newlines, the token before it (if it exists) is the 872 // rhs end of the previous line. 873 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { 874 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; 875 IsMultiline = true; 876 } 877 continue; 878 } 879 880 Tokens.resize(Tokens.size() - TokenCount); 881 Tokens.back()->Type = TT_TemplateString; 882 const char *EndOffset = 883 EndBacktick->TokenText.data() + 1 + CommentBacktickPos; 884 if (CommentBacktickPos != 0) { 885 // If the backtick was not the first character (e.g. in a comment), 886 // re-lex after the backtick position. 887 SourceLocation Loc = EndBacktick->Tok.getLocation(); 888 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); 889 } 890 Tokens.back()->TokenText = 891 StringRef(Tokens.back()->TokenText.data(), 892 EndOffset - Tokens.back()->TokenText.data()); 893 894 unsigned EndOriginalColumn = EndBacktick->OriginalColumn; 895 if (EndOriginalColumn == 0) { 896 SourceLocation Loc = EndBacktick->Tok.getLocation(); 897 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); 898 } 899 // If the ` is further down within the token (e.g. in a comment). 900 EndOriginalColumn += CommentBacktickPos; 901 902 if (IsMultiline) { 903 // ColumnWidth is from backtick to last token in line. 904 // LastLineColumnWidth is 0 to backtick. 905 // x = `some content 906 // until here`; 907 Tokens.back()->ColumnWidth = 908 EndColumnInFirstLine - Tokens.back()->OriginalColumn; 909 // +1 for the ` itself. 910 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; 911 Tokens.back()->IsMultiline = true; 912 } else { 913 // Token simply spans from start to end, +1 for the ` itself. 914 Tokens.back()->ColumnWidth = 915 EndOriginalColumn - Tokens.back()->OriginalColumn + 1; 916 } 917 return true; 918 } 919 return false; 920 } 921 922 bool tryMerge_TMacro() { 923 if (Tokens.size() < 4) 924 return false; 925 FormatToken *Last = Tokens.back(); 926 if (!Last->is(tok::r_paren)) 927 return false; 928 929 FormatToken *String = Tokens[Tokens.size() - 2]; 930 if (!String->is(tok::string_literal) || String->IsMultiline) 931 return false; 932 933 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 934 return false; 935 936 FormatToken *Macro = Tokens[Tokens.size() - 4]; 937 if (Macro->TokenText != "_T") 938 return false; 939 940 const char *Start = Macro->TokenText.data(); 941 const char *End = Last->TokenText.data() + Last->TokenText.size(); 942 String->TokenText = StringRef(Start, End - Start); 943 String->IsFirst = Macro->IsFirst; 944 String->LastNewlineOffset = Macro->LastNewlineOffset; 945 String->WhitespaceRange = Macro->WhitespaceRange; 946 String->OriginalColumn = Macro->OriginalColumn; 947 String->ColumnWidth = encoding::columnWidthWithTabs( 948 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 949 String->NewlinesBefore = Macro->NewlinesBefore; 950 String->HasUnescapedNewline = Macro->HasUnescapedNewline; 951 952 Tokens.pop_back(); 953 Tokens.pop_back(); 954 Tokens.pop_back(); 955 Tokens.back() = String; 956 return true; 957 } 958 959 bool tryMergeConflictMarkers() { 960 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 961 return false; 962 963 // Conflict lines look like: 964 // <marker> <text from the vcs> 965 // For example: 966 // >>>>>>> /file/in/file/system at revision 1234 967 // 968 // We merge all tokens in a line that starts with a conflict marker 969 // into a single token with a special token type that the unwrapped line 970 // parser will use to correctly rebuild the underlying code. 971 972 FileID ID; 973 // Get the position of the first token in the line. 974 unsigned FirstInLineOffset; 975 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 976 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 977 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 978 // Calculate the offset of the start of the current line. 979 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 980 if (LineOffset == StringRef::npos) { 981 LineOffset = 0; 982 } else { 983 ++LineOffset; 984 } 985 986 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 987 StringRef LineStart; 988 if (FirstSpace == StringRef::npos) { 989 LineStart = Buffer.substr(LineOffset); 990 } else { 991 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 992 } 993 994 TokenType Type = TT_Unknown; 995 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 996 Type = TT_ConflictStart; 997 } else if (LineStart == "|||||||" || LineStart == "=======" || 998 LineStart == "====") { 999 Type = TT_ConflictAlternative; 1000 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 1001 Type = TT_ConflictEnd; 1002 } 1003 1004 if (Type != TT_Unknown) { 1005 FormatToken *Next = Tokens.back(); 1006 1007 Tokens.resize(FirstInLineIndex + 1); 1008 // We do not need to build a complete token here, as we will skip it 1009 // during parsing anyway (as we must not touch whitespace around conflict 1010 // markers). 1011 Tokens.back()->Type = Type; 1012 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1013 1014 Tokens.push_back(Next); 1015 return true; 1016 } 1017 1018 return false; 1019 } 1020 1021 FormatToken *getStashedToken() { 1022 // Create a synthesized second '>' or '<' token. 1023 Token Tok = FormatTok->Tok; 1024 StringRef TokenText = FormatTok->TokenText; 1025 1026 unsigned OriginalColumn = FormatTok->OriginalColumn; 1027 FormatTok = new (Allocator.Allocate()) FormatToken; 1028 FormatTok->Tok = Tok; 1029 SourceLocation TokLocation = 1030 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); 1031 FormatTok->Tok.setLocation(TokLocation); 1032 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); 1033 FormatTok->TokenText = TokenText; 1034 FormatTok->ColumnWidth = 1; 1035 FormatTok->OriginalColumn = OriginalColumn + 1; 1036 1037 return FormatTok; 1038 } 1039 1040 FormatToken *getNextToken() { 1041 if (GreaterStashed) { 1042 GreaterStashed = false; 1043 return getStashedToken(); 1044 } 1045 if (LessStashed) { 1046 LessStashed = false; 1047 return getStashedToken(); 1048 } 1049 1050 FormatTok = new (Allocator.Allocate()) FormatToken; 1051 readRawToken(*FormatTok); 1052 SourceLocation WhitespaceStart = 1053 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1054 FormatTok->IsFirst = IsFirstToken; 1055 IsFirstToken = false; 1056 1057 // Consume and record whitespace until we find a significant token. 1058 unsigned WhitespaceLength = TrailingWhitespace; 1059 while (FormatTok->Tok.is(tok::unknown)) { 1060 StringRef Text = FormatTok->TokenText; 1061 auto EscapesNewline = [&](int pos) { 1062 // A '\r' here is just part of '\r\n'. Skip it. 1063 if (pos >= 0 && Text[pos] == '\r') 1064 --pos; 1065 // See whether there is an odd number of '\' before this. 1066 unsigned count = 0; 1067 for (; pos >= 0; --pos, ++count) 1068 if (Text[pos] != '\\') 1069 break; 1070 return count & 1; 1071 }; 1072 // FIXME: This miscounts tok:unknown tokens that are not just 1073 // whitespace, e.g. a '`' character. 1074 for (int i = 0, e = Text.size(); i != e; ++i) { 1075 switch (Text[i]) { 1076 case '\n': 1077 ++FormatTok->NewlinesBefore; 1078 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); 1079 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1080 Column = 0; 1081 break; 1082 case '\r': 1083 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1084 Column = 0; 1085 break; 1086 case '\f': 1087 case '\v': 1088 Column = 0; 1089 break; 1090 case ' ': 1091 ++Column; 1092 break; 1093 case '\t': 1094 Column += Style.TabWidth - Column % Style.TabWidth; 1095 break; 1096 case '\\': 1097 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) 1098 FormatTok->Type = TT_ImplicitStringLiteral; 1099 break; 1100 default: 1101 FormatTok->Type = TT_ImplicitStringLiteral; 1102 break; 1103 } 1104 } 1105 1106 if (FormatTok->is(TT_ImplicitStringLiteral)) 1107 break; 1108 WhitespaceLength += FormatTok->Tok.getLength(); 1109 1110 readRawToken(*FormatTok); 1111 } 1112 1113 // In case the token starts with escaped newlines, we want to 1114 // take them into account as whitespace - this pattern is quite frequent 1115 // in macro definitions. 1116 // FIXME: Add a more explicit test. 1117 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1118 FormatTok->TokenText[1] == '\n') { 1119 ++FormatTok->NewlinesBefore; 1120 WhitespaceLength += 2; 1121 FormatTok->LastNewlineOffset = 2; 1122 Column = 0; 1123 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1124 } 1125 1126 FormatTok->WhitespaceRange = SourceRange( 1127 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1128 1129 FormatTok->OriginalColumn = Column; 1130 1131 TrailingWhitespace = 0; 1132 if (FormatTok->Tok.is(tok::comment)) { 1133 // FIXME: Add the trimmed whitespace to Column. 1134 StringRef UntrimmedText = FormatTok->TokenText; 1135 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1136 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1137 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1138 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1139 FormatTok->Tok.setIdentifierInfo(&Info); 1140 FormatTok->Tok.setKind(Info.getTokenID()); 1141 if (Style.Language == FormatStyle::LK_Java && 1142 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { 1143 FormatTok->Tok.setKind(tok::identifier); 1144 FormatTok->Tok.setIdentifierInfo(nullptr); 1145 } 1146 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1147 FormatTok->Tok.setKind(tok::greater); 1148 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1149 GreaterStashed = true; 1150 } else if (FormatTok->Tok.is(tok::lessless)) { 1151 FormatTok->Tok.setKind(tok::less); 1152 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1153 LessStashed = true; 1154 } 1155 1156 // Now FormatTok is the next non-whitespace token. 1157 1158 StringRef Text = FormatTok->TokenText; 1159 size_t FirstNewlinePos = Text.find('\n'); 1160 if (FirstNewlinePos == StringRef::npos) { 1161 // FIXME: ColumnWidth actually depends on the start column, we need to 1162 // take this into account when the token is moved. 1163 FormatTok->ColumnWidth = 1164 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1165 Column += FormatTok->ColumnWidth; 1166 } else { 1167 FormatTok->IsMultiline = true; 1168 // FIXME: ColumnWidth actually depends on the start column, we need to 1169 // take this into account when the token is moved. 1170 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1171 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1172 1173 // The last line of the token always starts in column 0. 1174 // Thus, the length can be precomputed even in the presence of tabs. 1175 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1176 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1177 Encoding); 1178 Column = FormatTok->LastLineColumnWidth; 1179 } 1180 1181 if (Style.Language == FormatStyle::LK_Cpp) { 1182 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && 1183 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == 1184 tok::pp_define) && 1185 std::find(ForEachMacros.begin(), ForEachMacros.end(), 1186 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) { 1187 FormatTok->Type = TT_ForEachMacro; 1188 } else if (FormatTok->is(tok::identifier)) { 1189 if (MacroBlockBeginRegex.match(Text)) { 1190 FormatTok->Type = TT_MacroBlockBegin; 1191 } else if (MacroBlockEndRegex.match(Text)) { 1192 FormatTok->Type = TT_MacroBlockEnd; 1193 } 1194 } 1195 } 1196 1197 return FormatTok; 1198 } 1199 1200 FormatToken *FormatTok; 1201 bool IsFirstToken; 1202 bool GreaterStashed, LessStashed; 1203 unsigned Column; 1204 unsigned TrailingWhitespace; 1205 std::unique_ptr<Lexer> Lex; 1206 SourceManager &SourceMgr; 1207 FileID ID; 1208 FormatStyle &Style; 1209 IdentifierTable IdentTable; 1210 AdditionalKeywords Keywords; 1211 encoding::Encoding Encoding; 1212 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1213 // Index (in 'Tokens') of the last token that starts a new line. 1214 unsigned FirstInLineIndex; 1215 SmallVector<FormatToken *, 16> Tokens; 1216 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1217 1218 bool FormattingDisabled; 1219 1220 llvm::Regex MacroBlockBeginRegex; 1221 llvm::Regex MacroBlockEndRegex; 1222 1223 void readRawToken(FormatToken &Tok) { 1224 Lex->LexFromRawLexer(Tok.Tok); 1225 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1226 Tok.Tok.getLength()); 1227 // For formatting, treat unterminated string literals like normal string 1228 // literals. 1229 if (Tok.is(tok::unknown)) { 1230 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1231 Tok.Tok.setKind(tok::string_literal); 1232 Tok.IsUnterminatedLiteral = true; 1233 } else if (Style.Language == FormatStyle::LK_JavaScript && 1234 Tok.TokenText == "''") { 1235 Tok.Tok.setKind(tok::char_constant); 1236 } 1237 } 1238 1239 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1240 Tok.TokenText == "/* clang-format on */")) { 1241 FormattingDisabled = false; 1242 } 1243 1244 Tok.Finalized = FormattingDisabled; 1245 1246 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1247 Tok.TokenText == "/* clang-format off */")) { 1248 FormattingDisabled = true; 1249 } 1250 } 1251 1252 void resetLexer(unsigned Offset) { 1253 StringRef Buffer = SourceMgr.getBufferData(ID); 1254 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1255 getFormattingLangOpts(Style), Buffer.begin(), 1256 Buffer.begin() + Offset, Buffer.end())); 1257 Lex->SetKeepWhitespaceMode(true); 1258 TrailingWhitespace = 0; 1259 } 1260 }; 1261 1262 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1263 switch (Language) { 1264 case FormatStyle::LK_Cpp: 1265 return "C++"; 1266 case FormatStyle::LK_Java: 1267 return "Java"; 1268 case FormatStyle::LK_JavaScript: 1269 return "JavaScript"; 1270 case FormatStyle::LK_Proto: 1271 return "Proto"; 1272 default: 1273 return "Unknown"; 1274 } 1275 } 1276 1277 class Formatter : public UnwrappedLineConsumer { 1278 public: 1279 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1280 ArrayRef<CharSourceRange> Ranges) 1281 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1282 Whitespaces(SourceMgr, Style, 1283 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1284 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1285 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1286 DEBUG(llvm::dbgs() << "File encoding: " 1287 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1288 : "unknown") 1289 << "\n"); 1290 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1291 << "\n"); 1292 } 1293 1294 tooling::Replacements format(bool *IncompleteFormat) { 1295 tooling::Replacements Result; 1296 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1297 1298 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1299 *this); 1300 Parser.parse(); 1301 assert(UnwrappedLines.rbegin()->empty()); 1302 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1303 ++Run) { 1304 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1305 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1306 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1307 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1308 } 1309 tooling::Replacements RunResult = 1310 format(AnnotatedLines, Tokens, IncompleteFormat); 1311 DEBUG({ 1312 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1313 for (tooling::Replacements::iterator I = RunResult.begin(), 1314 E = RunResult.end(); 1315 I != E; ++I) { 1316 llvm::dbgs() << I->toString() << "\n"; 1317 } 1318 }); 1319 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1320 delete AnnotatedLines[i]; 1321 } 1322 Result.insert(RunResult.begin(), RunResult.end()); 1323 Whitespaces.reset(); 1324 } 1325 return Result; 1326 } 1327 1328 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1329 FormatTokenLexer &Tokens, 1330 bool *IncompleteFormat) { 1331 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1332 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1333 Annotator.annotate(*AnnotatedLines[i]); 1334 } 1335 deriveLocalStyle(AnnotatedLines); 1336 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1337 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1338 } 1339 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1340 1341 Annotator.setCommentLineLevels(AnnotatedLines); 1342 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1343 Whitespaces, Encoding, 1344 BinPackInconclusiveFunctions); 1345 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), 1346 IncompleteFormat) 1347 .format(AnnotatedLines); 1348 return Whitespaces.generateReplacements(); 1349 } 1350 1351 private: 1352 // Determines which lines are affected by the SourceRanges given as input. 1353 // Returns \c true if at least one line between I and E or one of their 1354 // children is affected. 1355 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1356 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1357 bool SomeLineAffected = false; 1358 const AnnotatedLine *PreviousLine = nullptr; 1359 while (I != E) { 1360 AnnotatedLine *Line = *I; 1361 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1362 1363 // If a line is part of a preprocessor directive, it needs to be formatted 1364 // if any token within the directive is affected. 1365 if (Line->InPPDirective) { 1366 FormatToken *Last = Line->Last; 1367 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1368 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1369 Last = (*PPEnd)->Last; 1370 ++PPEnd; 1371 } 1372 1373 if (affectsTokenRange(*Line->First, *Last, 1374 /*IncludeLeadingNewlines=*/false)) { 1375 SomeLineAffected = true; 1376 markAllAsAffected(I, PPEnd); 1377 } 1378 I = PPEnd; 1379 continue; 1380 } 1381 1382 if (nonPPLineAffected(Line, PreviousLine)) 1383 SomeLineAffected = true; 1384 1385 PreviousLine = Line; 1386 ++I; 1387 } 1388 return SomeLineAffected; 1389 } 1390 1391 // Determines whether 'Line' is affected by the SourceRanges given as input. 1392 // Returns \c true if line or one if its children is affected. 1393 bool nonPPLineAffected(AnnotatedLine *Line, 1394 const AnnotatedLine *PreviousLine) { 1395 bool SomeLineAffected = false; 1396 Line->ChildrenAffected = 1397 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1398 if (Line->ChildrenAffected) 1399 SomeLineAffected = true; 1400 1401 // Stores whether one of the line's tokens is directly affected. 1402 bool SomeTokenAffected = false; 1403 // Stores whether we need to look at the leading newlines of the next token 1404 // in order to determine whether it was affected. 1405 bool IncludeLeadingNewlines = false; 1406 1407 // Stores whether the first child line of any of this line's tokens is 1408 // affected. 1409 bool SomeFirstChildAffected = false; 1410 1411 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1412 // Determine whether 'Tok' was affected. 1413 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1414 SomeTokenAffected = true; 1415 1416 // Determine whether the first child of 'Tok' was affected. 1417 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1418 SomeFirstChildAffected = true; 1419 1420 IncludeLeadingNewlines = Tok->Children.empty(); 1421 } 1422 1423 // Was this line moved, i.e. has it previously been on the same line as an 1424 // affected line? 1425 bool LineMoved = PreviousLine && PreviousLine->Affected && 1426 Line->First->NewlinesBefore == 0; 1427 1428 bool IsContinuedComment = 1429 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1430 Line->First->NewlinesBefore < 2 && PreviousLine && 1431 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1432 1433 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1434 IsContinuedComment) { 1435 Line->Affected = true; 1436 SomeLineAffected = true; 1437 } 1438 return SomeLineAffected; 1439 } 1440 1441 // Marks all lines between I and E as well as all their children as affected. 1442 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1443 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1444 while (I != E) { 1445 (*I)->Affected = true; 1446 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1447 ++I; 1448 } 1449 } 1450 1451 // Returns true if the range from 'First' to 'Last' intersects with one of the 1452 // input ranges. 1453 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1454 bool IncludeLeadingNewlines) { 1455 SourceLocation Start = First.WhitespaceRange.getBegin(); 1456 if (!IncludeLeadingNewlines) 1457 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1458 SourceLocation End = Last.getStartOfNonWhitespace(); 1459 End = End.getLocWithOffset(Last.TokenText.size()); 1460 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1461 return affectsCharSourceRange(Range); 1462 } 1463 1464 // Returns true if one of the input ranges intersect the leading empty lines 1465 // before 'Tok'. 1466 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1467 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1468 Tok.WhitespaceRange.getBegin(), 1469 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1470 return affectsCharSourceRange(EmptyLineRange); 1471 } 1472 1473 // Returns true if 'Range' intersects with one of the input ranges. 1474 bool affectsCharSourceRange(const CharSourceRange &Range) { 1475 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1476 E = Ranges.end(); 1477 I != E; ++I) { 1478 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1479 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1480 return true; 1481 } 1482 return false; 1483 } 1484 1485 static bool inputUsesCRLF(StringRef Text) { 1486 return Text.count('\r') * 2 > Text.count('\n'); 1487 } 1488 1489 bool 1490 hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1491 for (const AnnotatedLine* Line : Lines) { 1492 if (hasCpp03IncompatibleFormat(Line->Children)) 1493 return true; 1494 for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { 1495 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1496 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) 1497 return true; 1498 if (Tok->is(TT_TemplateCloser) && 1499 Tok->Previous->is(TT_TemplateCloser)) 1500 return true; 1501 } 1502 } 1503 } 1504 return false; 1505 } 1506 1507 int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1508 int AlignmentDiff = 0; 1509 for (const AnnotatedLine* Line : Lines) { 1510 AlignmentDiff += countVariableAlignments(Line->Children); 1511 for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { 1512 if (!Tok->is(TT_PointerOrReference)) 1513 continue; 1514 bool SpaceBefore = 1515 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1516 bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() != 1517 Tok->Next->WhitespaceRange.getEnd(); 1518 if (SpaceBefore && !SpaceAfter) 1519 ++AlignmentDiff; 1520 if (!SpaceBefore && SpaceAfter) 1521 --AlignmentDiff; 1522 } 1523 } 1524 return AlignmentDiff; 1525 } 1526 1527 void 1528 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1529 bool HasBinPackedFunction = false; 1530 bool HasOnePerLineFunction = false; 1531 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1532 if (!AnnotatedLines[i]->First->Next) 1533 continue; 1534 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1535 while (Tok->Next) { 1536 if (Tok->PackingKind == PPK_BinPacked) 1537 HasBinPackedFunction = true; 1538 if (Tok->PackingKind == PPK_OnePerLine) 1539 HasOnePerLineFunction = true; 1540 1541 Tok = Tok->Next; 1542 } 1543 } 1544 if (Style.DerivePointerAlignment) 1545 Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 1546 ? FormatStyle::PAS_Left 1547 : FormatStyle::PAS_Right; 1548 if (Style.Standard == FormatStyle::LS_Auto) 1549 Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) 1550 ? FormatStyle::LS_Cpp11 1551 : FormatStyle::LS_Cpp03; 1552 BinPackInconclusiveFunctions = 1553 HasBinPackedFunction || !HasOnePerLineFunction; 1554 } 1555 1556 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1557 assert(!UnwrappedLines.empty()); 1558 UnwrappedLines.back().push_back(TheLine); 1559 } 1560 1561 void finishRun() override { 1562 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1563 } 1564 1565 FormatStyle Style; 1566 FileID ID; 1567 SourceManager &SourceMgr; 1568 WhitespaceManager Whitespaces; 1569 SmallVector<CharSourceRange, 8> Ranges; 1570 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1571 1572 encoding::Encoding Encoding; 1573 bool BinPackInconclusiveFunctions; 1574 }; 1575 1576 } // end anonymous namespace 1577 1578 tooling::Replacements reformat(const FormatStyle &Style, 1579 SourceManager &SourceMgr, FileID ID, 1580 ArrayRef<CharSourceRange> Ranges, 1581 bool *IncompleteFormat) { 1582 if (Style.DisableFormat) 1583 return tooling::Replacements(); 1584 Formatter formatter(Style, SourceMgr, ID, Ranges); 1585 return formatter.format(IncompleteFormat); 1586 } 1587 1588 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1589 ArrayRef<tooling::Range> Ranges, 1590 StringRef FileName, bool *IncompleteFormat) { 1591 if (Style.DisableFormat) 1592 return tooling::Replacements(); 1593 1594 FileManager Files((FileSystemOptions())); 1595 DiagnosticsEngine Diagnostics( 1596 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1597 new DiagnosticOptions); 1598 SourceManager SourceMgr(Diagnostics, Files); 1599 std::unique_ptr<llvm::MemoryBuffer> Buf = 1600 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1601 const clang::FileEntry *Entry = 1602 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1603 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 1604 FileID ID = 1605 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1606 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1607 std::vector<CharSourceRange> CharRanges; 1608 for (const tooling::Range &Range : Ranges) { 1609 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 1610 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 1611 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1612 } 1613 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); 1614 } 1615 1616 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 1617 LangOptions LangOpts; 1618 LangOpts.CPlusPlus = 1; 1619 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1620 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1621 LangOpts.LineComment = 1; 1622 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; 1623 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; 1624 LangOpts.Bool = 1; 1625 LangOpts.ObjC1 = 1; 1626 LangOpts.ObjC2 = 1; 1627 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 1628 return LangOpts; 1629 } 1630 1631 const char *StyleOptionHelpDescription = 1632 "Coding style, currently supports:\n" 1633 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1634 "Use -style=file to load style configuration from\n" 1635 ".clang-format file located in one of the parent\n" 1636 "directories of the source file (or current\n" 1637 "directory for stdin).\n" 1638 "Use -style=\"{key: value, ...}\" to set specific\n" 1639 "parameters, e.g.:\n" 1640 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1641 1642 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1643 if (FileName.endswith(".java")) { 1644 return FormatStyle::LK_Java; 1645 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { 1646 // JavaScript or TypeScript. 1647 return FormatStyle::LK_JavaScript; 1648 } else if (FileName.endswith_lower(".proto") || 1649 FileName.endswith_lower(".protodevel")) { 1650 return FormatStyle::LK_Proto; 1651 } 1652 return FormatStyle::LK_Cpp; 1653 } 1654 1655 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1656 StringRef FallbackStyle) { 1657 FormatStyle Style = getLLVMStyle(); 1658 Style.Language = getLanguageByFileName(FileName); 1659 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1660 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1661 << "\" using LLVM style\n"; 1662 return Style; 1663 } 1664 1665 if (StyleName.startswith("{")) { 1666 // Parse YAML/JSON style from the command line. 1667 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 1668 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1669 << FallbackStyle << " style\n"; 1670 } 1671 return Style; 1672 } 1673 1674 if (!StyleName.equals_lower("file")) { 1675 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1676 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1677 << " style\n"; 1678 return Style; 1679 } 1680 1681 // Look for .clang-format/_clang-format file in the file's parent directories. 1682 SmallString<128> UnsuitableConfigFiles; 1683 SmallString<128> Path(FileName); 1684 llvm::sys::fs::make_absolute(Path); 1685 for (StringRef Directory = Path; !Directory.empty(); 1686 Directory = llvm::sys::path::parent_path(Directory)) { 1687 if (!llvm::sys::fs::is_directory(Directory)) 1688 continue; 1689 SmallString<128> ConfigFile(Directory); 1690 1691 llvm::sys::path::append(ConfigFile, ".clang-format"); 1692 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1693 bool IsFile = false; 1694 // Ignore errors from is_regular_file: we only need to know if we can read 1695 // the file or not. 1696 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1697 1698 if (!IsFile) { 1699 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1700 ConfigFile = Directory; 1701 llvm::sys::path::append(ConfigFile, "_clang-format"); 1702 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1703 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1704 } 1705 1706 if (IsFile) { 1707 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1708 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 1709 if (std::error_code EC = Text.getError()) { 1710 llvm::errs() << EC.message() << "\n"; 1711 break; 1712 } 1713 if (std::error_code ec = 1714 parseConfiguration(Text.get()->getBuffer(), &Style)) { 1715 if (ec == ParseError::Unsuitable) { 1716 if (!UnsuitableConfigFiles.empty()) 1717 UnsuitableConfigFiles.append(", "); 1718 UnsuitableConfigFiles.append(ConfigFile); 1719 continue; 1720 } 1721 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1722 << "\n"; 1723 break; 1724 } 1725 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1726 return Style; 1727 } 1728 } 1729 if (!UnsuitableConfigFiles.empty()) { 1730 llvm::errs() << "Configuration file(s) do(es) not support " 1731 << getLanguageName(Style.Language) << ": " 1732 << UnsuitableConfigFiles << "\n"; 1733 } 1734 return Style; 1735 } 1736 1737 } // namespace format 1738 } // namespace clang 1739