1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineFormatter.h" 19 #include "UnwrappedLineParser.h" 20 #include "WhitespaceManager.h" 21 #include "clang/Basic/Diagnostic.h" 22 #include "clang/Basic/DiagnosticOptions.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/YAMLTraits.h" 32 #include <queue> 33 #include <string> 34 35 #define DEBUG_TYPE "format-formatter" 36 37 using clang::format::FormatStyle; 38 39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 40 41 namespace llvm { 42 namespace yaml { 43 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 44 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 45 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 46 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 47 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 48 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 49 } 50 }; 51 52 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 53 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 54 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 55 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 56 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 57 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 58 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 59 } 60 }; 61 62 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 63 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 64 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 65 IO.enumCase(Value, "false", FormatStyle::UT_Never); 66 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 67 IO.enumCase(Value, "true", FormatStyle::UT_Always); 68 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 69 } 70 }; 71 72 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 73 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 74 IO.enumCase(Value, "None", FormatStyle::SFS_None); 75 IO.enumCase(Value, "false", FormatStyle::SFS_None); 76 IO.enumCase(Value, "All", FormatStyle::SFS_All); 77 IO.enumCase(Value, "true", FormatStyle::SFS_All); 78 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 79 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); 80 } 81 }; 82 83 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 84 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 85 IO.enumCase(Value, "All", FormatStyle::BOS_All); 86 IO.enumCase(Value, "true", FormatStyle::BOS_All); 87 IO.enumCase(Value, "None", FormatStyle::BOS_None); 88 IO.enumCase(Value, "false", FormatStyle::BOS_None); 89 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 90 } 91 }; 92 93 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 94 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 95 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 96 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 97 IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla); 98 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 99 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 100 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 101 } 102 }; 103 104 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { 105 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { 106 IO.enumCase(Value, "None", FormatStyle::DRTBS_None); 107 IO.enumCase(Value, "All", FormatStyle::DRTBS_All); 108 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); 109 110 // For backward compatibility. 111 IO.enumCase(Value, "false", FormatStyle::DRTBS_None); 112 IO.enumCase(Value, "true", FormatStyle::DRTBS_All); 113 } 114 }; 115 116 template <> 117 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 118 static void enumeration(IO &IO, 119 FormatStyle::NamespaceIndentationKind &Value) { 120 IO.enumCase(Value, "None", FormatStyle::NI_None); 121 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 122 IO.enumCase(Value, "All", FormatStyle::NI_All); 123 } 124 }; 125 126 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 127 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { 128 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 129 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 130 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 131 132 // For backward compatibility. 133 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 134 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 135 } 136 }; 137 138 template <> 139 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 140 static void enumeration(IO &IO, 141 FormatStyle::SpaceBeforeParensOptions &Value) { 142 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 143 IO.enumCase(Value, "ControlStatements", 144 FormatStyle::SBPO_ControlStatements); 145 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 146 147 // For backward compatibility. 148 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 149 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 150 } 151 }; 152 153 template <> struct MappingTraits<FormatStyle> { 154 static void mapping(IO &IO, FormatStyle &Style) { 155 // When reading, read the language first, we need it for getPredefinedStyle. 156 IO.mapOptional("Language", Style.Language); 157 158 if (IO.outputting()) { 159 StringRef StylesArray[] = {"LLVM", "Google", "Chromium", 160 "Mozilla", "WebKit", "GNU"}; 161 ArrayRef<StringRef> Styles(StylesArray); 162 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 163 StringRef StyleName(Styles[i]); 164 FormatStyle PredefinedStyle; 165 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 166 Style == PredefinedStyle) { 167 IO.mapOptional("# BasedOnStyle", StyleName); 168 break; 169 } 170 } 171 } else { 172 StringRef BasedOnStyle; 173 IO.mapOptional("BasedOnStyle", BasedOnStyle); 174 if (!BasedOnStyle.empty()) { 175 FormatStyle::LanguageKind OldLanguage = Style.Language; 176 FormatStyle::LanguageKind Language = 177 ((FormatStyle *)IO.getContext())->Language; 178 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 179 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 180 return; 181 } 182 Style.Language = OldLanguage; 183 } 184 } 185 186 // For backward compatibility. 187 if (!IO.outputting()) { 188 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 189 IO.mapOptional("IndentFunctionDeclarationAfterType", 190 Style.IndentWrappedFunctionNames); 191 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 192 IO.mapOptional("SpaceAfterControlStatementKeyword", 193 Style.SpaceBeforeParens); 194 } 195 196 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 197 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); 198 IO.mapOptional("AlignConsecutiveAssignments", 199 Style.AlignConsecutiveAssignments); 200 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 201 IO.mapOptional("AlignOperands", Style.AlignOperands); 202 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 203 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 204 Style.AllowAllParametersOfDeclarationOnNextLine); 205 IO.mapOptional("AllowShortBlocksOnASingleLine", 206 Style.AllowShortBlocksOnASingleLine); 207 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 208 Style.AllowShortCaseLabelsOnASingleLine); 209 IO.mapOptional("AllowShortFunctionsOnASingleLine", 210 Style.AllowShortFunctionsOnASingleLine); 211 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 212 Style.AllowShortIfStatementsOnASingleLine); 213 IO.mapOptional("AllowShortLoopsOnASingleLine", 214 Style.AllowShortLoopsOnASingleLine); 215 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 216 Style.AlwaysBreakAfterDefinitionReturnType); 217 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 218 Style.AlwaysBreakBeforeMultilineStrings); 219 IO.mapOptional("AlwaysBreakTemplateDeclarations", 220 Style.AlwaysBreakTemplateDeclarations); 221 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 222 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 223 IO.mapOptional("BreakBeforeBinaryOperators", 224 Style.BreakBeforeBinaryOperators); 225 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 226 IO.mapOptional("BreakBeforeTernaryOperators", 227 Style.BreakBeforeTernaryOperators); 228 IO.mapOptional("BreakConstructorInitializersBeforeComma", 229 Style.BreakConstructorInitializersBeforeComma); 230 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 231 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 232 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 233 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 234 IO.mapOptional("ConstructorInitializerIndentWidth", 235 Style.ConstructorInitializerIndentWidth); 236 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 237 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 238 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 239 IO.mapOptional("DisableFormat", Style.DisableFormat); 240 IO.mapOptional("ExperimentalAutoDetectBinPacking", 241 Style.ExperimentalAutoDetectBinPacking); 242 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 243 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 244 IO.mapOptional("IndentWidth", Style.IndentWidth); 245 IO.mapOptional("IndentWrappedFunctionNames", 246 Style.IndentWrappedFunctionNames); 247 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 248 Style.KeepEmptyLinesAtTheStartOfBlocks); 249 IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); 250 IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); 251 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 252 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 253 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 254 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 255 IO.mapOptional("ObjCSpaceBeforeProtocolList", 256 Style.ObjCSpaceBeforeProtocolList); 257 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 258 Style.PenaltyBreakBeforeFirstCallParameter); 259 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 260 IO.mapOptional("PenaltyBreakFirstLessLess", 261 Style.PenaltyBreakFirstLessLess); 262 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 263 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 264 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 265 Style.PenaltyReturnTypeOnItsOwnLine); 266 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 267 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 268 IO.mapOptional("SpaceBeforeAssignmentOperators", 269 Style.SpaceBeforeAssignmentOperators); 270 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 271 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 272 IO.mapOptional("SpacesBeforeTrailingComments", 273 Style.SpacesBeforeTrailingComments); 274 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 275 IO.mapOptional("SpacesInContainerLiterals", 276 Style.SpacesInContainerLiterals); 277 IO.mapOptional("SpacesInCStyleCastParentheses", 278 Style.SpacesInCStyleCastParentheses); 279 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 280 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 281 IO.mapOptional("Standard", Style.Standard); 282 IO.mapOptional("TabWidth", Style.TabWidth); 283 IO.mapOptional("UseTab", Style.UseTab); 284 } 285 }; 286 287 // Allows to read vector<FormatStyle> while keeping default values. 288 // IO.getContext() should contain a pointer to the FormatStyle structure, that 289 // will be used to get default values for missing keys. 290 // If the first element has no Language specified, it will be treated as the 291 // default one for the following elements. 292 template <> struct DocumentListTraits<std::vector<FormatStyle>> { 293 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 294 return Seq.size(); 295 } 296 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 297 size_t Index) { 298 if (Index >= Seq.size()) { 299 assert(Index == Seq.size()); 300 FormatStyle Template; 301 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 302 Template = Seq[0]; 303 } else { 304 Template = *((const FormatStyle *)IO.getContext()); 305 Template.Language = FormatStyle::LK_None; 306 } 307 Seq.resize(Index + 1, Template); 308 } 309 return Seq[Index]; 310 } 311 }; 312 } 313 } 314 315 namespace clang { 316 namespace format { 317 318 const std::error_category &getParseCategory() { 319 static ParseErrorCategory C; 320 return C; 321 } 322 std::error_code make_error_code(ParseError e) { 323 return std::error_code(static_cast<int>(e), getParseCategory()); 324 } 325 326 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 327 return "clang-format.parse_error"; 328 } 329 330 std::string ParseErrorCategory::message(int EV) const { 331 switch (static_cast<ParseError>(EV)) { 332 case ParseError::Success: 333 return "Success"; 334 case ParseError::Error: 335 return "Invalid argument"; 336 case ParseError::Unsuitable: 337 return "Unsuitable"; 338 } 339 llvm_unreachable("unexpected parse error"); 340 } 341 342 FormatStyle getLLVMStyle() { 343 FormatStyle LLVMStyle; 344 LLVMStyle.Language = FormatStyle::LK_Cpp; 345 LLVMStyle.AccessModifierOffset = -2; 346 LLVMStyle.AlignEscapedNewlinesLeft = false; 347 LLVMStyle.AlignAfterOpenBracket = true; 348 LLVMStyle.AlignOperands = true; 349 LLVMStyle.AlignTrailingComments = true; 350 LLVMStyle.AlignConsecutiveAssignments = false; 351 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 352 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 353 LLVMStyle.AllowShortBlocksOnASingleLine = false; 354 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 355 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 356 LLVMStyle.AllowShortLoopsOnASingleLine = false; 357 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; 358 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 359 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 360 LLVMStyle.BinPackParameters = true; 361 LLVMStyle.BinPackArguments = true; 362 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 363 LLVMStyle.BreakBeforeTernaryOperators = true; 364 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 365 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 366 LLVMStyle.ColumnLimit = 80; 367 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 368 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 369 LLVMStyle.ConstructorInitializerIndentWidth = 4; 370 LLVMStyle.ContinuationIndentWidth = 4; 371 LLVMStyle.Cpp11BracedListStyle = true; 372 LLVMStyle.DerivePointerAlignment = false; 373 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 374 LLVMStyle.ForEachMacros.push_back("foreach"); 375 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 376 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 377 LLVMStyle.IndentCaseLabels = false; 378 LLVMStyle.IndentWrappedFunctionNames = false; 379 LLVMStyle.IndentWidth = 2; 380 LLVMStyle.TabWidth = 8; 381 LLVMStyle.MaxEmptyLinesToKeep = 1; 382 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 383 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 384 LLVMStyle.ObjCBlockIndentWidth = 2; 385 LLVMStyle.ObjCSpaceAfterProperty = false; 386 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 387 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 388 LLVMStyle.SpacesBeforeTrailingComments = 1; 389 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 390 LLVMStyle.UseTab = FormatStyle::UT_Never; 391 LLVMStyle.SpacesInParentheses = false; 392 LLVMStyle.SpacesInSquareBrackets = false; 393 LLVMStyle.SpaceInEmptyParentheses = false; 394 LLVMStyle.SpacesInContainerLiterals = true; 395 LLVMStyle.SpacesInCStyleCastParentheses = false; 396 LLVMStyle.SpaceAfterCStyleCast = false; 397 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 398 LLVMStyle.SpaceBeforeAssignmentOperators = true; 399 LLVMStyle.SpacesInAngles = false; 400 401 LLVMStyle.PenaltyBreakComment = 300; 402 LLVMStyle.PenaltyBreakFirstLessLess = 120; 403 LLVMStyle.PenaltyBreakString = 1000; 404 LLVMStyle.PenaltyExcessCharacter = 1000000; 405 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 406 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 407 408 LLVMStyle.DisableFormat = false; 409 410 return LLVMStyle; 411 } 412 413 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 414 FormatStyle GoogleStyle = getLLVMStyle(); 415 GoogleStyle.Language = Language; 416 417 GoogleStyle.AccessModifierOffset = -1; 418 GoogleStyle.AlignEscapedNewlinesLeft = true; 419 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 420 GoogleStyle.AllowShortLoopsOnASingleLine = true; 421 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 422 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 423 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 424 GoogleStyle.DerivePointerAlignment = true; 425 GoogleStyle.IndentCaseLabels = true; 426 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 427 GoogleStyle.ObjCSpaceAfterProperty = false; 428 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 429 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 430 GoogleStyle.SpacesBeforeTrailingComments = 2; 431 GoogleStyle.Standard = FormatStyle::LS_Auto; 432 433 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 434 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 435 436 if (Language == FormatStyle::LK_Java) { 437 GoogleStyle.AlignAfterOpenBracket = false; 438 GoogleStyle.AlignOperands = false; 439 GoogleStyle.AlignTrailingComments = false; 440 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; 441 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 442 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 443 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 444 GoogleStyle.ColumnLimit = 100; 445 GoogleStyle.SpaceAfterCStyleCast = true; 446 GoogleStyle.SpacesBeforeTrailingComments = 1; 447 } else if (Language == FormatStyle::LK_JavaScript) { 448 GoogleStyle.BreakBeforeTernaryOperators = false; 449 GoogleStyle.MaxEmptyLinesToKeep = 3; 450 GoogleStyle.SpacesInContainerLiterals = false; 451 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 452 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 453 } else if (Language == FormatStyle::LK_Proto) { 454 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 455 GoogleStyle.SpacesInContainerLiterals = false; 456 } 457 458 return GoogleStyle; 459 } 460 461 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 462 FormatStyle ChromiumStyle = getGoogleStyle(Language); 463 if (Language == FormatStyle::LK_Java) { 464 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; 465 ChromiumStyle.IndentWidth = 4; 466 ChromiumStyle.ContinuationIndentWidth = 8; 467 } else { 468 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 469 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 470 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 471 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 472 ChromiumStyle.BinPackParameters = false; 473 ChromiumStyle.DerivePointerAlignment = false; 474 } 475 ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$"; 476 ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$"; 477 return ChromiumStyle; 478 } 479 480 FormatStyle getMozillaStyle() { 481 FormatStyle MozillaStyle = getLLVMStyle(); 482 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 483 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 484 MozillaStyle.AlwaysBreakAfterDefinitionReturnType = 485 FormatStyle::DRTBS_TopLevel; 486 MozillaStyle.AlwaysBreakTemplateDeclarations = true; 487 MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; 488 MozillaStyle.BreakConstructorInitializersBeforeComma = true; 489 MozillaStyle.ConstructorInitializerIndentWidth = 2; 490 MozillaStyle.ContinuationIndentWidth = 2; 491 MozillaStyle.Cpp11BracedListStyle = false; 492 MozillaStyle.IndentCaseLabels = true; 493 MozillaStyle.ObjCSpaceAfterProperty = true; 494 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 495 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 496 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 497 return MozillaStyle; 498 } 499 500 FormatStyle getWebKitStyle() { 501 FormatStyle Style = getLLVMStyle(); 502 Style.AccessModifierOffset = -4; 503 Style.AlignAfterOpenBracket = false; 504 Style.AlignOperands = false; 505 Style.AlignTrailingComments = false; 506 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 507 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 508 Style.BreakConstructorInitializersBeforeComma = true; 509 Style.Cpp11BracedListStyle = false; 510 Style.ColumnLimit = 0; 511 Style.IndentWidth = 4; 512 Style.NamespaceIndentation = FormatStyle::NI_Inner; 513 Style.ObjCBlockIndentWidth = 4; 514 Style.ObjCSpaceAfterProperty = true; 515 Style.PointerAlignment = FormatStyle::PAS_Left; 516 Style.Standard = FormatStyle::LS_Cpp03; 517 return Style; 518 } 519 520 FormatStyle getGNUStyle() { 521 FormatStyle Style = getLLVMStyle(); 522 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; 523 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 524 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 525 Style.BreakBeforeTernaryOperators = true; 526 Style.Cpp11BracedListStyle = false; 527 Style.ColumnLimit = 79; 528 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 529 Style.Standard = FormatStyle::LS_Cpp03; 530 return Style; 531 } 532 533 FormatStyle getNoStyle() { 534 FormatStyle NoStyle = getLLVMStyle(); 535 NoStyle.DisableFormat = true; 536 return NoStyle; 537 } 538 539 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 540 FormatStyle *Style) { 541 if (Name.equals_lower("llvm")) { 542 *Style = getLLVMStyle(); 543 } else if (Name.equals_lower("chromium")) { 544 *Style = getChromiumStyle(Language); 545 } else if (Name.equals_lower("mozilla")) { 546 *Style = getMozillaStyle(); 547 } else if (Name.equals_lower("google")) { 548 *Style = getGoogleStyle(Language); 549 } else if (Name.equals_lower("webkit")) { 550 *Style = getWebKitStyle(); 551 } else if (Name.equals_lower("gnu")) { 552 *Style = getGNUStyle(); 553 } else if (Name.equals_lower("none")) { 554 *Style = getNoStyle(); 555 } else { 556 return false; 557 } 558 559 Style->Language = Language; 560 return true; 561 } 562 563 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 564 assert(Style); 565 FormatStyle::LanguageKind Language = Style->Language; 566 assert(Language != FormatStyle::LK_None); 567 if (Text.trim().empty()) 568 return make_error_code(ParseError::Error); 569 570 std::vector<FormatStyle> Styles; 571 llvm::yaml::Input Input(Text); 572 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 573 // values for the fields, keys for which are missing from the configuration. 574 // Mapping also uses the context to get the language to find the correct 575 // base style. 576 Input.setContext(Style); 577 Input >> Styles; 578 if (Input.error()) 579 return Input.error(); 580 581 for (unsigned i = 0; i < Styles.size(); ++i) { 582 // Ensures that only the first configuration can skip the Language option. 583 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 584 return make_error_code(ParseError::Error); 585 // Ensure that each language is configured at most once. 586 for (unsigned j = 0; j < i; ++j) { 587 if (Styles[i].Language == Styles[j].Language) { 588 DEBUG(llvm::dbgs() 589 << "Duplicate languages in the config file on positions " << j 590 << " and " << i << "\n"); 591 return make_error_code(ParseError::Error); 592 } 593 } 594 } 595 // Look for a suitable configuration starting from the end, so we can 596 // find the configuration for the specific language first, and the default 597 // configuration (which can only be at slot 0) after it. 598 for (int i = Styles.size() - 1; i >= 0; --i) { 599 if (Styles[i].Language == Language || 600 Styles[i].Language == FormatStyle::LK_None) { 601 *Style = Styles[i]; 602 Style->Language = Language; 603 return make_error_code(ParseError::Success); 604 } 605 } 606 return make_error_code(ParseError::Unsuitable); 607 } 608 609 std::string configurationAsText(const FormatStyle &Style) { 610 std::string Text; 611 llvm::raw_string_ostream Stream(Text); 612 llvm::yaml::Output Output(Stream); 613 // We use the same mapping method for input and output, so we need a non-const 614 // reference here. 615 FormatStyle NonConstStyle = Style; 616 Output << NonConstStyle; 617 return Stream.str(); 618 } 619 620 namespace { 621 622 class FormatTokenLexer { 623 public: 624 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 625 encoding::Encoding Encoding) 626 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 627 LessStashed(false), Column(0), TrailingWhitespace(0), 628 SourceMgr(SourceMgr), ID(ID), Style(Style), 629 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), 630 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), 631 MacroBlockBeginRegex(Style.MacroBlockBegin), 632 MacroBlockEndRegex(Style.MacroBlockEnd) { 633 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 634 getFormattingLangOpts(Style))); 635 Lex->SetKeepWhitespaceMode(true); 636 637 for (const std::string &ForEachMacro : Style.ForEachMacros) 638 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 639 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 640 } 641 642 ArrayRef<FormatToken *> lex() { 643 assert(Tokens.empty()); 644 assert(FirstInLineIndex == 0); 645 do { 646 Tokens.push_back(getNextToken()); 647 tryMergePreviousTokens(); 648 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) 649 FirstInLineIndex = Tokens.size() - 1; 650 } while (Tokens.back()->Tok.isNot(tok::eof)); 651 return Tokens; 652 } 653 654 const AdditionalKeywords &getKeywords() { return Keywords; } 655 656 private: 657 void tryMergePreviousTokens() { 658 if (tryMerge_TMacro()) 659 return; 660 if (tryMergeConflictMarkers()) 661 return; 662 if (tryMergeLessLess()) 663 return; 664 665 if (Style.Language == FormatStyle::LK_JavaScript) { 666 if (tryMergeJSRegexLiteral()) 667 return; 668 if (tryMergeEscapeSequence()) 669 return; 670 if (tryMergeTemplateString()) 671 return; 672 673 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; 674 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, 675 tok::equal}; 676 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, 677 tok::greaterequal}; 678 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; 679 // FIXME: Investigate what token type gives the correct operator priority. 680 if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) 681 return; 682 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) 683 return; 684 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) 685 return; 686 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) 687 return; 688 } 689 } 690 691 bool tryMergeLessLess() { 692 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. 693 if (Tokens.size() < 3) 694 return false; 695 696 bool FourthTokenIsLess = false; 697 if (Tokens.size() > 3) 698 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); 699 700 auto First = Tokens.end() - 3; 701 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || 702 First[0]->isNot(tok::less) || FourthTokenIsLess) 703 return false; 704 705 // Only merge if there currently is no whitespace between the two "<". 706 if (First[1]->WhitespaceRange.getBegin() != 707 First[1]->WhitespaceRange.getEnd()) 708 return false; 709 710 First[0]->Tok.setKind(tok::lessless); 711 First[0]->TokenText = "<<"; 712 First[0]->ColumnWidth += 1; 713 Tokens.erase(Tokens.end() - 2); 714 return true; 715 } 716 717 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { 718 if (Tokens.size() < Kinds.size()) 719 return false; 720 721 SmallVectorImpl<FormatToken *>::const_iterator First = 722 Tokens.end() - Kinds.size(); 723 if (!First[0]->is(Kinds[0])) 724 return false; 725 unsigned AddLength = 0; 726 for (unsigned i = 1; i < Kinds.size(); ++i) { 727 if (!First[i]->is(Kinds[i]) || 728 First[i]->WhitespaceRange.getBegin() != 729 First[i]->WhitespaceRange.getEnd()) 730 return false; 731 AddLength += First[i]->TokenText.size(); 732 } 733 Tokens.resize(Tokens.size() - Kinds.size() + 1); 734 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 735 First[0]->TokenText.size() + AddLength); 736 First[0]->ColumnWidth += AddLength; 737 First[0]->Type = NewType; 738 return true; 739 } 740 741 // Tries to merge an escape sequence, i.e. a "\\" and the following 742 // character. Use e.g. inside JavaScript regex literals. 743 bool tryMergeEscapeSequence() { 744 if (Tokens.size() < 2) 745 return false; 746 FormatToken *Previous = Tokens[Tokens.size() - 2]; 747 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 748 return false; 749 ++Previous->ColumnWidth; 750 StringRef Text = Previous->TokenText; 751 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 752 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 753 Tokens.resize(Tokens.size() - 1); 754 Column = Previous->OriginalColumn + Previous->ColumnWidth; 755 return true; 756 } 757 758 // Try to determine whether the current token ends a JavaScript regex literal. 759 // We heuristically assume that this is a regex literal if we find two 760 // unescaped slashes on a line and the token before the first slash is one of 761 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 762 // a division. 763 bool tryMergeJSRegexLiteral() { 764 if (Tokens.size() < 2) 765 return false; 766 767 // If this is a string literal with a slash inside, compute the slash's 768 // offset and try to find the beginning of the regex literal. 769 // Also look at tok::unknown, as it can be an unterminated char literal. 770 size_t SlashInStringPos = StringRef::npos; 771 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, 772 tok::unknown)) { 773 // Start search from position 1 as otherwise, this is an unknown token 774 // for an unterminated /*-comment which is handled elsewhere. 775 SlashInStringPos = Tokens.back()->TokenText.find('/', 1); 776 if (SlashInStringPos == StringRef::npos) 777 return false; 778 } 779 780 // If a regex literal ends in "\//", this gets represented by an unknown 781 // token "\" and a comment. 782 bool MightEndWithEscapedSlash = 783 Tokens.back()->is(tok::comment) && 784 Tokens.back()->TokenText.startswith("//") && 785 Tokens[Tokens.size() - 2]->TokenText == "\\"; 786 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && 787 (Tokens.back()->isNot(tok::slash) || 788 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 789 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 790 return false; 791 792 unsigned TokenCount = 0; 793 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 794 ++TokenCount; 795 auto Prev = I + 1; 796 while (Prev != E && Prev[0]->is(tok::comment)) 797 ++Prev; 798 if (I[0]->isOneOf(tok::slash, tok::slashequal) && 799 (Prev == E || 800 ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, 801 tok::r_brace, tok::exclaim, tok::l_square, 802 tok::colon, tok::comma, tok::question, 803 tok::kw_return) || 804 Prev[0]->isBinaryOperator())))) { 805 unsigned LastColumn = Tokens.back()->OriginalColumn; 806 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 807 if (MightEndWithEscapedSlash) { 808 // This regex literal ends in '\//'. Skip past the '//' of the last 809 // token and re-start lexing from there. 810 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 811 } else if (SlashInStringPos != StringRef::npos) { 812 // This regex literal ends in a string_literal with a slash inside. 813 // Calculate end column and reset lexer appropriately. 814 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); 815 LastColumn += SlashInStringPos; 816 } 817 Tokens.resize(Tokens.size() - TokenCount); 818 Tokens.back()->Tok.setKind(tok::unknown); 819 Tokens.back()->Type = TT_RegexLiteral; 820 // Treat regex literals like other string_literals. 821 Tokens.back()->Tok.setKind(tok::string_literal); 822 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 823 return true; 824 } 825 826 // There can't be a newline inside a regex literal. 827 if (I[0]->NewlinesBefore > 0) 828 return false; 829 } 830 return false; 831 } 832 833 bool tryMergeTemplateString() { 834 if (Tokens.size() < 2) 835 return false; 836 837 FormatToken *EndBacktick = Tokens.back(); 838 // Backticks get lexed as tok::unknown tokens. If a template string contains 839 // a comment start, it gets lexed as a tok::comment, or tok::unknown if 840 // unterminated. 841 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, 842 tok::char_constant, tok::unknown)) 843 return false; 844 size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); 845 // Unknown token that's not actually a backtick, or a comment that doesn't 846 // contain a backtick. 847 if (CommentBacktickPos == StringRef::npos) 848 return false; 849 850 unsigned TokenCount = 0; 851 bool IsMultiline = false; 852 unsigned EndColumnInFirstLine = 853 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; 854 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { 855 ++TokenCount; 856 if (I[0]->IsMultiline) 857 IsMultiline = true; 858 859 // If there was a preceding template string, this must be the start of a 860 // template string, not the end. 861 if (I[0]->is(TT_TemplateString)) 862 return false; 863 864 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { 865 // Keep track of the rhs offset of the last token to wrap across lines - 866 // its the rhs offset of the first line of the template string, used to 867 // determine its width. 868 if (I[0]->IsMultiline) 869 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; 870 // If the token has newlines, the token before it (if it exists) is the 871 // rhs end of the previous line. 872 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { 873 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; 874 IsMultiline = true; 875 } 876 continue; 877 } 878 879 Tokens.resize(Tokens.size() - TokenCount); 880 Tokens.back()->Type = TT_TemplateString; 881 const char *EndOffset = 882 EndBacktick->TokenText.data() + 1 + CommentBacktickPos; 883 if (CommentBacktickPos != 0) { 884 // If the backtick was not the first character (e.g. in a comment), 885 // re-lex after the backtick position. 886 SourceLocation Loc = EndBacktick->Tok.getLocation(); 887 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); 888 } 889 Tokens.back()->TokenText = 890 StringRef(Tokens.back()->TokenText.data(), 891 EndOffset - Tokens.back()->TokenText.data()); 892 893 unsigned EndOriginalColumn = EndBacktick->OriginalColumn; 894 if (EndOriginalColumn == 0) { 895 SourceLocation Loc = EndBacktick->Tok.getLocation(); 896 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); 897 } 898 // If the ` is further down within the token (e.g. in a comment). 899 EndOriginalColumn += CommentBacktickPos; 900 901 if (IsMultiline) { 902 // ColumnWidth is from backtick to last token in line. 903 // LastLineColumnWidth is 0 to backtick. 904 // x = `some content 905 // until here`; 906 Tokens.back()->ColumnWidth = 907 EndColumnInFirstLine - Tokens.back()->OriginalColumn; 908 // +1 for the ` itself. 909 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; 910 Tokens.back()->IsMultiline = true; 911 } else { 912 // Token simply spans from start to end, +1 for the ` itself. 913 Tokens.back()->ColumnWidth = 914 EndOriginalColumn - Tokens.back()->OriginalColumn + 1; 915 } 916 return true; 917 } 918 return false; 919 } 920 921 bool tryMerge_TMacro() { 922 if (Tokens.size() < 4) 923 return false; 924 FormatToken *Last = Tokens.back(); 925 if (!Last->is(tok::r_paren)) 926 return false; 927 928 FormatToken *String = Tokens[Tokens.size() - 2]; 929 if (!String->is(tok::string_literal) || String->IsMultiline) 930 return false; 931 932 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 933 return false; 934 935 FormatToken *Macro = Tokens[Tokens.size() - 4]; 936 if (Macro->TokenText != "_T") 937 return false; 938 939 const char *Start = Macro->TokenText.data(); 940 const char *End = Last->TokenText.data() + Last->TokenText.size(); 941 String->TokenText = StringRef(Start, End - Start); 942 String->IsFirst = Macro->IsFirst; 943 String->LastNewlineOffset = Macro->LastNewlineOffset; 944 String->WhitespaceRange = Macro->WhitespaceRange; 945 String->OriginalColumn = Macro->OriginalColumn; 946 String->ColumnWidth = encoding::columnWidthWithTabs( 947 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 948 String->NewlinesBefore = Macro->NewlinesBefore; 949 String->HasUnescapedNewline = Macro->HasUnescapedNewline; 950 951 Tokens.pop_back(); 952 Tokens.pop_back(); 953 Tokens.pop_back(); 954 Tokens.back() = String; 955 return true; 956 } 957 958 bool tryMergeConflictMarkers() { 959 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 960 return false; 961 962 // Conflict lines look like: 963 // <marker> <text from the vcs> 964 // For example: 965 // >>>>>>> /file/in/file/system at revision 1234 966 // 967 // We merge all tokens in a line that starts with a conflict marker 968 // into a single token with a special token type that the unwrapped line 969 // parser will use to correctly rebuild the underlying code. 970 971 FileID ID; 972 // Get the position of the first token in the line. 973 unsigned FirstInLineOffset; 974 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 975 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 976 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 977 // Calculate the offset of the start of the current line. 978 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 979 if (LineOffset == StringRef::npos) { 980 LineOffset = 0; 981 } else { 982 ++LineOffset; 983 } 984 985 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 986 StringRef LineStart; 987 if (FirstSpace == StringRef::npos) { 988 LineStart = Buffer.substr(LineOffset); 989 } else { 990 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 991 } 992 993 TokenType Type = TT_Unknown; 994 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 995 Type = TT_ConflictStart; 996 } else if (LineStart == "|||||||" || LineStart == "=======" || 997 LineStart == "====") { 998 Type = TT_ConflictAlternative; 999 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 1000 Type = TT_ConflictEnd; 1001 } 1002 1003 if (Type != TT_Unknown) { 1004 FormatToken *Next = Tokens.back(); 1005 1006 Tokens.resize(FirstInLineIndex + 1); 1007 // We do not need to build a complete token here, as we will skip it 1008 // during parsing anyway (as we must not touch whitespace around conflict 1009 // markers). 1010 Tokens.back()->Type = Type; 1011 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1012 1013 Tokens.push_back(Next); 1014 return true; 1015 } 1016 1017 return false; 1018 } 1019 1020 FormatToken *getStashedToken() { 1021 // Create a synthesized second '>' or '<' token. 1022 Token Tok = FormatTok->Tok; 1023 StringRef TokenText = FormatTok->TokenText; 1024 1025 unsigned OriginalColumn = FormatTok->OriginalColumn; 1026 FormatTok = new (Allocator.Allocate()) FormatToken; 1027 FormatTok->Tok = Tok; 1028 SourceLocation TokLocation = 1029 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); 1030 FormatTok->Tok.setLocation(TokLocation); 1031 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); 1032 FormatTok->TokenText = TokenText; 1033 FormatTok->ColumnWidth = 1; 1034 FormatTok->OriginalColumn = OriginalColumn + 1; 1035 1036 return FormatTok; 1037 } 1038 1039 FormatToken *getNextToken() { 1040 if (GreaterStashed) { 1041 GreaterStashed = false; 1042 return getStashedToken(); 1043 } 1044 if (LessStashed) { 1045 LessStashed = false; 1046 return getStashedToken(); 1047 } 1048 1049 FormatTok = new (Allocator.Allocate()) FormatToken; 1050 readRawToken(*FormatTok); 1051 SourceLocation WhitespaceStart = 1052 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1053 FormatTok->IsFirst = IsFirstToken; 1054 IsFirstToken = false; 1055 1056 // Consume and record whitespace until we find a significant token. 1057 unsigned WhitespaceLength = TrailingWhitespace; 1058 while (FormatTok->Tok.is(tok::unknown)) { 1059 StringRef Text = FormatTok->TokenText; 1060 auto EscapesNewline = [&](int pos) { 1061 // A '\r' here is just part of '\r\n'. Skip it. 1062 if (pos >= 0 && Text[pos] == '\r') 1063 --pos; 1064 // See whether there is an odd number of '\' before this. 1065 unsigned count = 0; 1066 for (; pos >= 0; --pos, ++count) 1067 if (Text[pos] != '\\') 1068 break; 1069 return count & 1; 1070 }; 1071 // FIXME: This miscounts tok:unknown tokens that are not just 1072 // whitespace, e.g. a '`' character. 1073 for (int i = 0, e = Text.size(); i != e; ++i) { 1074 switch (Text[i]) { 1075 case '\n': 1076 ++FormatTok->NewlinesBefore; 1077 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); 1078 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1079 Column = 0; 1080 break; 1081 case '\r': 1082 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1083 Column = 0; 1084 break; 1085 case '\f': 1086 case '\v': 1087 Column = 0; 1088 break; 1089 case ' ': 1090 ++Column; 1091 break; 1092 case '\t': 1093 Column += Style.TabWidth - Column % Style.TabWidth; 1094 break; 1095 case '\\': 1096 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) 1097 FormatTok->Type = TT_ImplicitStringLiteral; 1098 break; 1099 default: 1100 FormatTok->Type = TT_ImplicitStringLiteral; 1101 break; 1102 } 1103 } 1104 1105 if (FormatTok->is(TT_ImplicitStringLiteral)) 1106 break; 1107 WhitespaceLength += FormatTok->Tok.getLength(); 1108 1109 readRawToken(*FormatTok); 1110 } 1111 1112 // In case the token starts with escaped newlines, we want to 1113 // take them into account as whitespace - this pattern is quite frequent 1114 // in macro definitions. 1115 // FIXME: Add a more explicit test. 1116 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1117 FormatTok->TokenText[1] == '\n') { 1118 ++FormatTok->NewlinesBefore; 1119 WhitespaceLength += 2; 1120 FormatTok->LastNewlineOffset = 2; 1121 Column = 0; 1122 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1123 } 1124 1125 FormatTok->WhitespaceRange = SourceRange( 1126 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1127 1128 FormatTok->OriginalColumn = Column; 1129 1130 TrailingWhitespace = 0; 1131 if (FormatTok->Tok.is(tok::comment)) { 1132 // FIXME: Add the trimmed whitespace to Column. 1133 StringRef UntrimmedText = FormatTok->TokenText; 1134 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1135 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1136 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1137 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1138 FormatTok->Tok.setIdentifierInfo(&Info); 1139 FormatTok->Tok.setKind(Info.getTokenID()); 1140 if (Style.Language == FormatStyle::LK_Java && 1141 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { 1142 FormatTok->Tok.setKind(tok::identifier); 1143 FormatTok->Tok.setIdentifierInfo(nullptr); 1144 } 1145 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1146 FormatTok->Tok.setKind(tok::greater); 1147 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1148 GreaterStashed = true; 1149 } else if (FormatTok->Tok.is(tok::lessless)) { 1150 FormatTok->Tok.setKind(tok::less); 1151 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1152 LessStashed = true; 1153 } 1154 1155 // Now FormatTok is the next non-whitespace token. 1156 1157 StringRef Text = FormatTok->TokenText; 1158 size_t FirstNewlinePos = Text.find('\n'); 1159 if (FirstNewlinePos == StringRef::npos) { 1160 // FIXME: ColumnWidth actually depends on the start column, we need to 1161 // take this into account when the token is moved. 1162 FormatTok->ColumnWidth = 1163 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1164 Column += FormatTok->ColumnWidth; 1165 } else { 1166 FormatTok->IsMultiline = true; 1167 // FIXME: ColumnWidth actually depends on the start column, we need to 1168 // take this into account when the token is moved. 1169 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1170 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1171 1172 // The last line of the token always starts in column 0. 1173 // Thus, the length can be precomputed even in the presence of tabs. 1174 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1175 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1176 Encoding); 1177 Column = FormatTok->LastLineColumnWidth; 1178 } 1179 1180 if (Style.Language == FormatStyle::LK_Cpp) { 1181 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && 1182 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == 1183 tok::pp_define) && 1184 std::find(ForEachMacros.begin(), ForEachMacros.end(), 1185 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) { 1186 FormatTok->Type = TT_ForEachMacro; 1187 } else if (FormatTok->is(tok::identifier)) { 1188 if (MacroBlockBeginRegex.match(Text)) { 1189 FormatTok->Type = TT_MacroBlockBegin; 1190 } else if (MacroBlockEndRegex.match(Text)) { 1191 FormatTok->Type = TT_MacroBlockEnd; 1192 } 1193 } 1194 } 1195 1196 return FormatTok; 1197 } 1198 1199 FormatToken *FormatTok; 1200 bool IsFirstToken; 1201 bool GreaterStashed, LessStashed; 1202 unsigned Column; 1203 unsigned TrailingWhitespace; 1204 std::unique_ptr<Lexer> Lex; 1205 SourceManager &SourceMgr; 1206 FileID ID; 1207 FormatStyle &Style; 1208 IdentifierTable IdentTable; 1209 AdditionalKeywords Keywords; 1210 encoding::Encoding Encoding; 1211 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1212 // Index (in 'Tokens') of the last token that starts a new line. 1213 unsigned FirstInLineIndex; 1214 SmallVector<FormatToken *, 16> Tokens; 1215 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1216 1217 bool FormattingDisabled; 1218 1219 llvm::Regex MacroBlockBeginRegex; 1220 llvm::Regex MacroBlockEndRegex; 1221 1222 void readRawToken(FormatToken &Tok) { 1223 Lex->LexFromRawLexer(Tok.Tok); 1224 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1225 Tok.Tok.getLength()); 1226 // For formatting, treat unterminated string literals like normal string 1227 // literals. 1228 if (Tok.is(tok::unknown)) { 1229 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1230 Tok.Tok.setKind(tok::string_literal); 1231 Tok.IsUnterminatedLiteral = true; 1232 } else if (Style.Language == FormatStyle::LK_JavaScript && 1233 Tok.TokenText == "''") { 1234 Tok.Tok.setKind(tok::char_constant); 1235 } 1236 } 1237 1238 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1239 Tok.TokenText == "/* clang-format on */")) { 1240 FormattingDisabled = false; 1241 } 1242 1243 Tok.Finalized = FormattingDisabled; 1244 1245 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1246 Tok.TokenText == "/* clang-format off */")) { 1247 FormattingDisabled = true; 1248 } 1249 } 1250 1251 void resetLexer(unsigned Offset) { 1252 StringRef Buffer = SourceMgr.getBufferData(ID); 1253 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1254 getFormattingLangOpts(Style), Buffer.begin(), 1255 Buffer.begin() + Offset, Buffer.end())); 1256 Lex->SetKeepWhitespaceMode(true); 1257 TrailingWhitespace = 0; 1258 } 1259 }; 1260 1261 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1262 switch (Language) { 1263 case FormatStyle::LK_Cpp: 1264 return "C++"; 1265 case FormatStyle::LK_Java: 1266 return "Java"; 1267 case FormatStyle::LK_JavaScript: 1268 return "JavaScript"; 1269 case FormatStyle::LK_Proto: 1270 return "Proto"; 1271 default: 1272 return "Unknown"; 1273 } 1274 } 1275 1276 class Formatter : public UnwrappedLineConsumer { 1277 public: 1278 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1279 ArrayRef<CharSourceRange> Ranges) 1280 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1281 Whitespaces(SourceMgr, Style, 1282 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1283 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1284 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1285 DEBUG(llvm::dbgs() << "File encoding: " 1286 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1287 : "unknown") 1288 << "\n"); 1289 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1290 << "\n"); 1291 } 1292 1293 tooling::Replacements format(bool *IncompleteFormat) { 1294 tooling::Replacements Result; 1295 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1296 1297 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1298 *this); 1299 Parser.parse(); 1300 assert(UnwrappedLines.rbegin()->empty()); 1301 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1302 ++Run) { 1303 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1304 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1305 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1306 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1307 } 1308 tooling::Replacements RunResult = 1309 format(AnnotatedLines, Tokens, IncompleteFormat); 1310 DEBUG({ 1311 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1312 for (tooling::Replacements::iterator I = RunResult.begin(), 1313 E = RunResult.end(); 1314 I != E; ++I) { 1315 llvm::dbgs() << I->toString() << "\n"; 1316 } 1317 }); 1318 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1319 delete AnnotatedLines[i]; 1320 } 1321 Result.insert(RunResult.begin(), RunResult.end()); 1322 Whitespaces.reset(); 1323 } 1324 return Result; 1325 } 1326 1327 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1328 FormatTokenLexer &Tokens, 1329 bool *IncompleteFormat) { 1330 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1331 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1332 Annotator.annotate(*AnnotatedLines[i]); 1333 } 1334 deriveLocalStyle(AnnotatedLines); 1335 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1336 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1337 } 1338 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1339 1340 Annotator.setCommentLineLevels(AnnotatedLines); 1341 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1342 Whitespaces, Encoding, 1343 BinPackInconclusiveFunctions); 1344 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), 1345 IncompleteFormat) 1346 .format(AnnotatedLines); 1347 return Whitespaces.generateReplacements(); 1348 } 1349 1350 private: 1351 // Determines which lines are affected by the SourceRanges given as input. 1352 // Returns \c true if at least one line between I and E or one of their 1353 // children is affected. 1354 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1355 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1356 bool SomeLineAffected = false; 1357 const AnnotatedLine *PreviousLine = nullptr; 1358 while (I != E) { 1359 AnnotatedLine *Line = *I; 1360 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1361 1362 // If a line is part of a preprocessor directive, it needs to be formatted 1363 // if any token within the directive is affected. 1364 if (Line->InPPDirective) { 1365 FormatToken *Last = Line->Last; 1366 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1367 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1368 Last = (*PPEnd)->Last; 1369 ++PPEnd; 1370 } 1371 1372 if (affectsTokenRange(*Line->First, *Last, 1373 /*IncludeLeadingNewlines=*/false)) { 1374 SomeLineAffected = true; 1375 markAllAsAffected(I, PPEnd); 1376 } 1377 I = PPEnd; 1378 continue; 1379 } 1380 1381 if (nonPPLineAffected(Line, PreviousLine)) 1382 SomeLineAffected = true; 1383 1384 PreviousLine = Line; 1385 ++I; 1386 } 1387 return SomeLineAffected; 1388 } 1389 1390 // Determines whether 'Line' is affected by the SourceRanges given as input. 1391 // Returns \c true if line or one if its children is affected. 1392 bool nonPPLineAffected(AnnotatedLine *Line, 1393 const AnnotatedLine *PreviousLine) { 1394 bool SomeLineAffected = false; 1395 Line->ChildrenAffected = 1396 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1397 if (Line->ChildrenAffected) 1398 SomeLineAffected = true; 1399 1400 // Stores whether one of the line's tokens is directly affected. 1401 bool SomeTokenAffected = false; 1402 // Stores whether we need to look at the leading newlines of the next token 1403 // in order to determine whether it was affected. 1404 bool IncludeLeadingNewlines = false; 1405 1406 // Stores whether the first child line of any of this line's tokens is 1407 // affected. 1408 bool SomeFirstChildAffected = false; 1409 1410 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1411 // Determine whether 'Tok' was affected. 1412 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1413 SomeTokenAffected = true; 1414 1415 // Determine whether the first child of 'Tok' was affected. 1416 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1417 SomeFirstChildAffected = true; 1418 1419 IncludeLeadingNewlines = Tok->Children.empty(); 1420 } 1421 1422 // Was this line moved, i.e. has it previously been on the same line as an 1423 // affected line? 1424 bool LineMoved = PreviousLine && PreviousLine->Affected && 1425 Line->First->NewlinesBefore == 0; 1426 1427 bool IsContinuedComment = 1428 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1429 Line->First->NewlinesBefore < 2 && PreviousLine && 1430 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1431 1432 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1433 IsContinuedComment) { 1434 Line->Affected = true; 1435 SomeLineAffected = true; 1436 } 1437 return SomeLineAffected; 1438 } 1439 1440 // Marks all lines between I and E as well as all their children as affected. 1441 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1442 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1443 while (I != E) { 1444 (*I)->Affected = true; 1445 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1446 ++I; 1447 } 1448 } 1449 1450 // Returns true if the range from 'First' to 'Last' intersects with one of the 1451 // input ranges. 1452 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1453 bool IncludeLeadingNewlines) { 1454 SourceLocation Start = First.WhitespaceRange.getBegin(); 1455 if (!IncludeLeadingNewlines) 1456 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1457 SourceLocation End = Last.getStartOfNonWhitespace(); 1458 End = End.getLocWithOffset(Last.TokenText.size()); 1459 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1460 return affectsCharSourceRange(Range); 1461 } 1462 1463 // Returns true if one of the input ranges intersect the leading empty lines 1464 // before 'Tok'. 1465 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1466 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1467 Tok.WhitespaceRange.getBegin(), 1468 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1469 return affectsCharSourceRange(EmptyLineRange); 1470 } 1471 1472 // Returns true if 'Range' intersects with one of the input ranges. 1473 bool affectsCharSourceRange(const CharSourceRange &Range) { 1474 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1475 E = Ranges.end(); 1476 I != E; ++I) { 1477 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1478 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1479 return true; 1480 } 1481 return false; 1482 } 1483 1484 static bool inputUsesCRLF(StringRef Text) { 1485 return Text.count('\r') * 2 > Text.count('\n'); 1486 } 1487 1488 bool 1489 hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1490 for (const AnnotatedLine* Line : Lines) { 1491 if (hasCpp03IncompatibleFormat(Line->Children)) 1492 return true; 1493 for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { 1494 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1495 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) 1496 return true; 1497 if (Tok->is(TT_TemplateCloser) && 1498 Tok->Previous->is(TT_TemplateCloser)) 1499 return true; 1500 } 1501 } 1502 } 1503 return false; 1504 } 1505 1506 int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1507 int AlignmentDiff = 0; 1508 for (const AnnotatedLine* Line : Lines) { 1509 AlignmentDiff += countVariableAlignments(Line->Children); 1510 for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { 1511 if (!Tok->is(TT_PointerOrReference)) 1512 continue; 1513 bool SpaceBefore = 1514 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1515 bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() != 1516 Tok->Next->WhitespaceRange.getEnd(); 1517 if (SpaceBefore && !SpaceAfter) 1518 ++AlignmentDiff; 1519 if (!SpaceBefore && SpaceAfter) 1520 --AlignmentDiff; 1521 } 1522 } 1523 return AlignmentDiff; 1524 } 1525 1526 void 1527 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1528 bool HasBinPackedFunction = false; 1529 bool HasOnePerLineFunction = false; 1530 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1531 if (!AnnotatedLines[i]->First->Next) 1532 continue; 1533 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1534 while (Tok->Next) { 1535 if (Tok->PackingKind == PPK_BinPacked) 1536 HasBinPackedFunction = true; 1537 if (Tok->PackingKind == PPK_OnePerLine) 1538 HasOnePerLineFunction = true; 1539 1540 Tok = Tok->Next; 1541 } 1542 } 1543 if (Style.DerivePointerAlignment) 1544 Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 1545 ? FormatStyle::PAS_Left 1546 : FormatStyle::PAS_Right; 1547 if (Style.Standard == FormatStyle::LS_Auto) 1548 Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) 1549 ? FormatStyle::LS_Cpp11 1550 : FormatStyle::LS_Cpp03; 1551 BinPackInconclusiveFunctions = 1552 HasBinPackedFunction || !HasOnePerLineFunction; 1553 } 1554 1555 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1556 assert(!UnwrappedLines.empty()); 1557 UnwrappedLines.back().push_back(TheLine); 1558 } 1559 1560 void finishRun() override { 1561 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1562 } 1563 1564 FormatStyle Style; 1565 FileID ID; 1566 SourceManager &SourceMgr; 1567 WhitespaceManager Whitespaces; 1568 SmallVector<CharSourceRange, 8> Ranges; 1569 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1570 1571 encoding::Encoding Encoding; 1572 bool BinPackInconclusiveFunctions; 1573 }; 1574 1575 } // end anonymous namespace 1576 1577 tooling::Replacements reformat(const FormatStyle &Style, 1578 SourceManager &SourceMgr, FileID ID, 1579 ArrayRef<CharSourceRange> Ranges, 1580 bool *IncompleteFormat) { 1581 if (Style.DisableFormat) 1582 return tooling::Replacements(); 1583 Formatter formatter(Style, SourceMgr, ID, Ranges); 1584 return formatter.format(IncompleteFormat); 1585 } 1586 1587 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1588 ArrayRef<tooling::Range> Ranges, 1589 StringRef FileName, bool *IncompleteFormat) { 1590 if (Style.DisableFormat) 1591 return tooling::Replacements(); 1592 1593 FileManager Files((FileSystemOptions())); 1594 DiagnosticsEngine Diagnostics( 1595 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1596 new DiagnosticOptions); 1597 SourceManager SourceMgr(Diagnostics, Files); 1598 std::unique_ptr<llvm::MemoryBuffer> Buf = 1599 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1600 const clang::FileEntry *Entry = 1601 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1602 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 1603 FileID ID = 1604 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1605 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1606 std::vector<CharSourceRange> CharRanges; 1607 for (const tooling::Range &Range : Ranges) { 1608 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 1609 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 1610 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1611 } 1612 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); 1613 } 1614 1615 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 1616 LangOptions LangOpts; 1617 LangOpts.CPlusPlus = 1; 1618 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1619 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1620 LangOpts.LineComment = 1; 1621 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; 1622 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; 1623 LangOpts.Bool = 1; 1624 LangOpts.ObjC1 = 1; 1625 LangOpts.ObjC2 = 1; 1626 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 1627 return LangOpts; 1628 } 1629 1630 const char *StyleOptionHelpDescription = 1631 "Coding style, currently supports:\n" 1632 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1633 "Use -style=file to load style configuration from\n" 1634 ".clang-format file located in one of the parent\n" 1635 "directories of the source file (or current\n" 1636 "directory for stdin).\n" 1637 "Use -style=\"{key: value, ...}\" to set specific\n" 1638 "parameters, e.g.:\n" 1639 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1640 1641 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1642 if (FileName.endswith(".java")) { 1643 return FormatStyle::LK_Java; 1644 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { 1645 // JavaScript or TypeScript. 1646 return FormatStyle::LK_JavaScript; 1647 } else if (FileName.endswith_lower(".proto") || 1648 FileName.endswith_lower(".protodevel")) { 1649 return FormatStyle::LK_Proto; 1650 } 1651 return FormatStyle::LK_Cpp; 1652 } 1653 1654 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1655 StringRef FallbackStyle) { 1656 FormatStyle Style = getLLVMStyle(); 1657 Style.Language = getLanguageByFileName(FileName); 1658 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1659 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1660 << "\" using LLVM style\n"; 1661 return Style; 1662 } 1663 1664 if (StyleName.startswith("{")) { 1665 // Parse YAML/JSON style from the command line. 1666 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 1667 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1668 << FallbackStyle << " style\n"; 1669 } 1670 return Style; 1671 } 1672 1673 if (!StyleName.equals_lower("file")) { 1674 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1675 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1676 << " style\n"; 1677 return Style; 1678 } 1679 1680 // Look for .clang-format/_clang-format file in the file's parent directories. 1681 SmallString<128> UnsuitableConfigFiles; 1682 SmallString<128> Path(FileName); 1683 llvm::sys::fs::make_absolute(Path); 1684 for (StringRef Directory = Path; !Directory.empty(); 1685 Directory = llvm::sys::path::parent_path(Directory)) { 1686 if (!llvm::sys::fs::is_directory(Directory)) 1687 continue; 1688 SmallString<128> ConfigFile(Directory); 1689 1690 llvm::sys::path::append(ConfigFile, ".clang-format"); 1691 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1692 bool IsFile = false; 1693 // Ignore errors from is_regular_file: we only need to know if we can read 1694 // the file or not. 1695 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1696 1697 if (!IsFile) { 1698 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1699 ConfigFile = Directory; 1700 llvm::sys::path::append(ConfigFile, "_clang-format"); 1701 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1702 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1703 } 1704 1705 if (IsFile) { 1706 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1707 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 1708 if (std::error_code EC = Text.getError()) { 1709 llvm::errs() << EC.message() << "\n"; 1710 break; 1711 } 1712 if (std::error_code ec = 1713 parseConfiguration(Text.get()->getBuffer(), &Style)) { 1714 if (ec == ParseError::Unsuitable) { 1715 if (!UnsuitableConfigFiles.empty()) 1716 UnsuitableConfigFiles.append(", "); 1717 UnsuitableConfigFiles.append(ConfigFile); 1718 continue; 1719 } 1720 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1721 << "\n"; 1722 break; 1723 } 1724 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1725 return Style; 1726 } 1727 } 1728 if (!UnsuitableConfigFiles.empty()) { 1729 llvm::errs() << "Configuration file(s) do(es) not support " 1730 << getLanguageName(Style.Language) << ": " 1731 << UnsuitableConfigFiles << "\n"; 1732 } 1733 return Style; 1734 } 1735 1736 } // namespace format 1737 } // namespace clang 1738