1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineFormatter.h" 19 #include "UnwrappedLineParser.h" 20 #include "WhitespaceManager.h" 21 #include "clang/Basic/Diagnostic.h" 22 #include "clang/Basic/DiagnosticOptions.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/YAMLTraits.h" 32 #include <queue> 33 #include <string> 34 35 #define DEBUG_TYPE "format-formatter" 36 37 using clang::format::FormatStyle; 38 39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 40 41 namespace llvm { 42 namespace yaml { 43 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 44 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 45 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 46 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 47 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 48 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 49 } 50 }; 51 52 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 53 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 54 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 55 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 56 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 57 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 58 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 59 } 60 }; 61 62 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 63 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 64 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 65 IO.enumCase(Value, "false", FormatStyle::UT_Never); 66 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 67 IO.enumCase(Value, "true", FormatStyle::UT_Always); 68 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 69 } 70 }; 71 72 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 73 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 74 IO.enumCase(Value, "None", FormatStyle::SFS_None); 75 IO.enumCase(Value, "false", FormatStyle::SFS_None); 76 IO.enumCase(Value, "All", FormatStyle::SFS_All); 77 IO.enumCase(Value, "true", FormatStyle::SFS_All); 78 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 79 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); 80 } 81 }; 82 83 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 84 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 85 IO.enumCase(Value, "All", FormatStyle::BOS_All); 86 IO.enumCase(Value, "true", FormatStyle::BOS_All); 87 IO.enumCase(Value, "None", FormatStyle::BOS_None); 88 IO.enumCase(Value, "false", FormatStyle::BOS_None); 89 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 90 } 91 }; 92 93 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 94 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 95 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 96 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 97 IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla); 98 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 99 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 100 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 101 IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit); 102 } 103 }; 104 105 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { 106 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { 107 IO.enumCase(Value, "None", FormatStyle::DRTBS_None); 108 IO.enumCase(Value, "All", FormatStyle::DRTBS_All); 109 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); 110 111 // For backward compatibility. 112 IO.enumCase(Value, "false", FormatStyle::DRTBS_None); 113 IO.enumCase(Value, "true", FormatStyle::DRTBS_All); 114 } 115 }; 116 117 template <> 118 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 119 static void enumeration(IO &IO, 120 FormatStyle::NamespaceIndentationKind &Value) { 121 IO.enumCase(Value, "None", FormatStyle::NI_None); 122 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 123 IO.enumCase(Value, "All", FormatStyle::NI_All); 124 } 125 }; 126 127 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 128 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { 129 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 130 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 131 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 132 133 // For backward compatibility. 134 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 135 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 136 } 137 }; 138 139 template <> 140 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 141 static void enumeration(IO &IO, 142 FormatStyle::SpaceBeforeParensOptions &Value) { 143 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 144 IO.enumCase(Value, "ControlStatements", 145 FormatStyle::SBPO_ControlStatements); 146 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 147 148 // For backward compatibility. 149 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 150 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 151 } 152 }; 153 154 template <> struct MappingTraits<FormatStyle> { 155 static void mapping(IO &IO, FormatStyle &Style) { 156 // When reading, read the language first, we need it for getPredefinedStyle. 157 IO.mapOptional("Language", Style.Language); 158 159 if (IO.outputting()) { 160 StringRef StylesArray[] = {"LLVM", "Google", "Chromium", 161 "Mozilla", "WebKit", "GNU"}; 162 ArrayRef<StringRef> Styles(StylesArray); 163 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 164 StringRef StyleName(Styles[i]); 165 FormatStyle PredefinedStyle; 166 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 167 Style == PredefinedStyle) { 168 IO.mapOptional("# BasedOnStyle", StyleName); 169 break; 170 } 171 } 172 } else { 173 StringRef BasedOnStyle; 174 IO.mapOptional("BasedOnStyle", BasedOnStyle); 175 if (!BasedOnStyle.empty()) { 176 FormatStyle::LanguageKind OldLanguage = Style.Language; 177 FormatStyle::LanguageKind Language = 178 ((FormatStyle *)IO.getContext())->Language; 179 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 180 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 181 return; 182 } 183 Style.Language = OldLanguage; 184 } 185 } 186 187 // For backward compatibility. 188 if (!IO.outputting()) { 189 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 190 IO.mapOptional("IndentFunctionDeclarationAfterType", 191 Style.IndentWrappedFunctionNames); 192 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 193 IO.mapOptional("SpaceAfterControlStatementKeyword", 194 Style.SpaceBeforeParens); 195 } 196 197 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 198 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); 199 IO.mapOptional("AlignConsecutiveAssignments", 200 Style.AlignConsecutiveAssignments); 201 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 202 IO.mapOptional("AlignOperands", Style.AlignOperands); 203 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 204 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 205 Style.AllowAllParametersOfDeclarationOnNextLine); 206 IO.mapOptional("AllowShortBlocksOnASingleLine", 207 Style.AllowShortBlocksOnASingleLine); 208 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 209 Style.AllowShortCaseLabelsOnASingleLine); 210 IO.mapOptional("AllowShortFunctionsOnASingleLine", 211 Style.AllowShortFunctionsOnASingleLine); 212 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 213 Style.AllowShortIfStatementsOnASingleLine); 214 IO.mapOptional("AllowShortLoopsOnASingleLine", 215 Style.AllowShortLoopsOnASingleLine); 216 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 217 Style.AlwaysBreakAfterDefinitionReturnType); 218 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 219 Style.AlwaysBreakBeforeMultilineStrings); 220 IO.mapOptional("AlwaysBreakTemplateDeclarations", 221 Style.AlwaysBreakTemplateDeclarations); 222 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 223 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 224 IO.mapOptional("BreakBeforeBinaryOperators", 225 Style.BreakBeforeBinaryOperators); 226 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 227 IO.mapOptional("BreakBeforeTernaryOperators", 228 Style.BreakBeforeTernaryOperators); 229 IO.mapOptional("BreakConstructorInitializersBeforeComma", 230 Style.BreakConstructorInitializersBeforeComma); 231 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 232 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 233 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 234 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 235 IO.mapOptional("ConstructorInitializerIndentWidth", 236 Style.ConstructorInitializerIndentWidth); 237 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 238 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 239 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 240 IO.mapOptional("DisableFormat", Style.DisableFormat); 241 IO.mapOptional("ExperimentalAutoDetectBinPacking", 242 Style.ExperimentalAutoDetectBinPacking); 243 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 244 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 245 IO.mapOptional("IndentWidth", Style.IndentWidth); 246 IO.mapOptional("IndentWrappedFunctionNames", 247 Style.IndentWrappedFunctionNames); 248 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 249 Style.KeepEmptyLinesAtTheStartOfBlocks); 250 IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); 251 IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); 252 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 253 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 254 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 255 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 256 IO.mapOptional("ObjCSpaceBeforeProtocolList", 257 Style.ObjCSpaceBeforeProtocolList); 258 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 259 Style.PenaltyBreakBeforeFirstCallParameter); 260 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 261 IO.mapOptional("PenaltyBreakFirstLessLess", 262 Style.PenaltyBreakFirstLessLess); 263 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 264 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 265 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 266 Style.PenaltyReturnTypeOnItsOwnLine); 267 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 268 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 269 IO.mapOptional("SpaceBeforeAssignmentOperators", 270 Style.SpaceBeforeAssignmentOperators); 271 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 272 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 273 IO.mapOptional("SpacesBeforeTrailingComments", 274 Style.SpacesBeforeTrailingComments); 275 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 276 IO.mapOptional("SpacesInContainerLiterals", 277 Style.SpacesInContainerLiterals); 278 IO.mapOptional("SpacesInCStyleCastParentheses", 279 Style.SpacesInCStyleCastParentheses); 280 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 281 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 282 IO.mapOptional("Standard", Style.Standard); 283 IO.mapOptional("TabWidth", Style.TabWidth); 284 IO.mapOptional("UseTab", Style.UseTab); 285 } 286 }; 287 288 // Allows to read vector<FormatStyle> while keeping default values. 289 // IO.getContext() should contain a pointer to the FormatStyle structure, that 290 // will be used to get default values for missing keys. 291 // If the first element has no Language specified, it will be treated as the 292 // default one for the following elements. 293 template <> struct DocumentListTraits<std::vector<FormatStyle>> { 294 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 295 return Seq.size(); 296 } 297 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 298 size_t Index) { 299 if (Index >= Seq.size()) { 300 assert(Index == Seq.size()); 301 FormatStyle Template; 302 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 303 Template = Seq[0]; 304 } else { 305 Template = *((const FormatStyle *)IO.getContext()); 306 Template.Language = FormatStyle::LK_None; 307 } 308 Seq.resize(Index + 1, Template); 309 } 310 return Seq[Index]; 311 } 312 }; 313 } 314 } 315 316 namespace clang { 317 namespace format { 318 319 const std::error_category &getParseCategory() { 320 static ParseErrorCategory C; 321 return C; 322 } 323 std::error_code make_error_code(ParseError e) { 324 return std::error_code(static_cast<int>(e), getParseCategory()); 325 } 326 327 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 328 return "clang-format.parse_error"; 329 } 330 331 std::string ParseErrorCategory::message(int EV) const { 332 switch (static_cast<ParseError>(EV)) { 333 case ParseError::Success: 334 return "Success"; 335 case ParseError::Error: 336 return "Invalid argument"; 337 case ParseError::Unsuitable: 338 return "Unsuitable"; 339 } 340 llvm_unreachable("unexpected parse error"); 341 } 342 343 FormatStyle getLLVMStyle() { 344 FormatStyle LLVMStyle; 345 LLVMStyle.Language = FormatStyle::LK_Cpp; 346 LLVMStyle.AccessModifierOffset = -2; 347 LLVMStyle.AlignEscapedNewlinesLeft = false; 348 LLVMStyle.AlignAfterOpenBracket = true; 349 LLVMStyle.AlignOperands = true; 350 LLVMStyle.AlignTrailingComments = true; 351 LLVMStyle.AlignConsecutiveAssignments = false; 352 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 353 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 354 LLVMStyle.AllowShortBlocksOnASingleLine = false; 355 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 356 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 357 LLVMStyle.AllowShortLoopsOnASingleLine = false; 358 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; 359 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 360 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 361 LLVMStyle.BinPackParameters = true; 362 LLVMStyle.BinPackArguments = true; 363 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 364 LLVMStyle.BreakBeforeTernaryOperators = true; 365 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 366 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 367 LLVMStyle.ColumnLimit = 80; 368 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 369 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 370 LLVMStyle.ConstructorInitializerIndentWidth = 4; 371 LLVMStyle.ContinuationIndentWidth = 4; 372 LLVMStyle.Cpp11BracedListStyle = true; 373 LLVMStyle.DerivePointerAlignment = false; 374 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 375 LLVMStyle.ForEachMacros.push_back("foreach"); 376 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 377 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 378 LLVMStyle.IndentCaseLabels = false; 379 LLVMStyle.IndentWrappedFunctionNames = false; 380 LLVMStyle.IndentWidth = 2; 381 LLVMStyle.TabWidth = 8; 382 LLVMStyle.MaxEmptyLinesToKeep = 1; 383 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 384 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 385 LLVMStyle.ObjCBlockIndentWidth = 2; 386 LLVMStyle.ObjCSpaceAfterProperty = false; 387 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 388 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 389 LLVMStyle.SpacesBeforeTrailingComments = 1; 390 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 391 LLVMStyle.UseTab = FormatStyle::UT_Never; 392 LLVMStyle.SpacesInParentheses = false; 393 LLVMStyle.SpacesInSquareBrackets = false; 394 LLVMStyle.SpaceInEmptyParentheses = false; 395 LLVMStyle.SpacesInContainerLiterals = true; 396 LLVMStyle.SpacesInCStyleCastParentheses = false; 397 LLVMStyle.SpaceAfterCStyleCast = false; 398 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 399 LLVMStyle.SpaceBeforeAssignmentOperators = true; 400 LLVMStyle.SpacesInAngles = false; 401 402 LLVMStyle.PenaltyBreakComment = 300; 403 LLVMStyle.PenaltyBreakFirstLessLess = 120; 404 LLVMStyle.PenaltyBreakString = 1000; 405 LLVMStyle.PenaltyExcessCharacter = 1000000; 406 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 407 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 408 409 LLVMStyle.DisableFormat = false; 410 411 return LLVMStyle; 412 } 413 414 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 415 FormatStyle GoogleStyle = getLLVMStyle(); 416 GoogleStyle.Language = Language; 417 418 GoogleStyle.AccessModifierOffset = -1; 419 GoogleStyle.AlignEscapedNewlinesLeft = true; 420 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 421 GoogleStyle.AllowShortLoopsOnASingleLine = true; 422 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 423 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 424 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 425 GoogleStyle.DerivePointerAlignment = true; 426 GoogleStyle.IndentCaseLabels = true; 427 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 428 GoogleStyle.ObjCSpaceAfterProperty = false; 429 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 430 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 431 GoogleStyle.SpacesBeforeTrailingComments = 2; 432 GoogleStyle.Standard = FormatStyle::LS_Auto; 433 434 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 435 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 436 437 if (Language == FormatStyle::LK_Java) { 438 GoogleStyle.AlignAfterOpenBracket = false; 439 GoogleStyle.AlignOperands = false; 440 GoogleStyle.AlignTrailingComments = false; 441 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; 442 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 443 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 444 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 445 GoogleStyle.ColumnLimit = 100; 446 GoogleStyle.SpaceAfterCStyleCast = true; 447 GoogleStyle.SpacesBeforeTrailingComments = 1; 448 } else if (Language == FormatStyle::LK_JavaScript) { 449 GoogleStyle.BreakBeforeTernaryOperators = false; 450 GoogleStyle.MaxEmptyLinesToKeep = 3; 451 GoogleStyle.SpacesInContainerLiterals = false; 452 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 453 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 454 } else if (Language == FormatStyle::LK_Proto) { 455 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 456 GoogleStyle.SpacesInContainerLiterals = false; 457 } 458 459 return GoogleStyle; 460 } 461 462 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 463 FormatStyle ChromiumStyle = getGoogleStyle(Language); 464 if (Language == FormatStyle::LK_Java) { 465 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; 466 ChromiumStyle.IndentWidth = 4; 467 ChromiumStyle.ContinuationIndentWidth = 8; 468 } else { 469 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 470 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 471 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 472 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 473 ChromiumStyle.BinPackParameters = false; 474 ChromiumStyle.DerivePointerAlignment = false; 475 } 476 return ChromiumStyle; 477 } 478 479 FormatStyle getMozillaStyle() { 480 FormatStyle MozillaStyle = getLLVMStyle(); 481 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 482 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 483 MozillaStyle.AlwaysBreakAfterDefinitionReturnType = 484 FormatStyle::DRTBS_TopLevel; 485 MozillaStyle.AlwaysBreakTemplateDeclarations = true; 486 MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; 487 MozillaStyle.BreakConstructorInitializersBeforeComma = true; 488 MozillaStyle.ConstructorInitializerIndentWidth = 2; 489 MozillaStyle.ContinuationIndentWidth = 2; 490 MozillaStyle.Cpp11BracedListStyle = false; 491 MozillaStyle.IndentCaseLabels = true; 492 MozillaStyle.ObjCSpaceAfterProperty = true; 493 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 494 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 495 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 496 return MozillaStyle; 497 } 498 499 FormatStyle getWebKitStyle() { 500 FormatStyle Style = getLLVMStyle(); 501 Style.AccessModifierOffset = -4; 502 Style.AlignAfterOpenBracket = false; 503 Style.AlignOperands = false; 504 Style.AlignTrailingComments = false; 505 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 506 Style.BreakBeforeBraces = FormatStyle::BS_WebKit; 507 Style.BreakConstructorInitializersBeforeComma = true; 508 Style.Cpp11BracedListStyle = false; 509 Style.ColumnLimit = 0; 510 Style.IndentWidth = 4; 511 Style.NamespaceIndentation = FormatStyle::NI_Inner; 512 Style.ObjCBlockIndentWidth = 4; 513 Style.ObjCSpaceAfterProperty = true; 514 Style.PointerAlignment = FormatStyle::PAS_Left; 515 Style.Standard = FormatStyle::LS_Cpp03; 516 return Style; 517 } 518 519 FormatStyle getGNUStyle() { 520 FormatStyle Style = getLLVMStyle(); 521 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; 522 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 523 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 524 Style.BreakBeforeTernaryOperators = true; 525 Style.Cpp11BracedListStyle = false; 526 Style.ColumnLimit = 79; 527 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 528 Style.Standard = FormatStyle::LS_Cpp03; 529 return Style; 530 } 531 532 FormatStyle getNoStyle() { 533 FormatStyle NoStyle = getLLVMStyle(); 534 NoStyle.DisableFormat = true; 535 return NoStyle; 536 } 537 538 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 539 FormatStyle *Style) { 540 if (Name.equals_lower("llvm")) { 541 *Style = getLLVMStyle(); 542 } else if (Name.equals_lower("chromium")) { 543 *Style = getChromiumStyle(Language); 544 } else if (Name.equals_lower("mozilla")) { 545 *Style = getMozillaStyle(); 546 } else if (Name.equals_lower("google")) { 547 *Style = getGoogleStyle(Language); 548 } else if (Name.equals_lower("webkit")) { 549 *Style = getWebKitStyle(); 550 } else if (Name.equals_lower("gnu")) { 551 *Style = getGNUStyle(); 552 } else if (Name.equals_lower("none")) { 553 *Style = getNoStyle(); 554 } else { 555 return false; 556 } 557 558 Style->Language = Language; 559 return true; 560 } 561 562 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 563 assert(Style); 564 FormatStyle::LanguageKind Language = Style->Language; 565 assert(Language != FormatStyle::LK_None); 566 if (Text.trim().empty()) 567 return make_error_code(ParseError::Error); 568 569 std::vector<FormatStyle> Styles; 570 llvm::yaml::Input Input(Text); 571 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 572 // values for the fields, keys for which are missing from the configuration. 573 // Mapping also uses the context to get the language to find the correct 574 // base style. 575 Input.setContext(Style); 576 Input >> Styles; 577 if (Input.error()) 578 return Input.error(); 579 580 for (unsigned i = 0; i < Styles.size(); ++i) { 581 // Ensures that only the first configuration can skip the Language option. 582 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 583 return make_error_code(ParseError::Error); 584 // Ensure that each language is configured at most once. 585 for (unsigned j = 0; j < i; ++j) { 586 if (Styles[i].Language == Styles[j].Language) { 587 DEBUG(llvm::dbgs() 588 << "Duplicate languages in the config file on positions " << j 589 << " and " << i << "\n"); 590 return make_error_code(ParseError::Error); 591 } 592 } 593 } 594 // Look for a suitable configuration starting from the end, so we can 595 // find the configuration for the specific language first, and the default 596 // configuration (which can only be at slot 0) after it. 597 for (int i = Styles.size() - 1; i >= 0; --i) { 598 if (Styles[i].Language == Language || 599 Styles[i].Language == FormatStyle::LK_None) { 600 *Style = Styles[i]; 601 Style->Language = Language; 602 return make_error_code(ParseError::Success); 603 } 604 } 605 return make_error_code(ParseError::Unsuitable); 606 } 607 608 std::string configurationAsText(const FormatStyle &Style) { 609 std::string Text; 610 llvm::raw_string_ostream Stream(Text); 611 llvm::yaml::Output Output(Stream); 612 // We use the same mapping method for input and output, so we need a non-const 613 // reference here. 614 FormatStyle NonConstStyle = Style; 615 Output << NonConstStyle; 616 return Stream.str(); 617 } 618 619 namespace { 620 621 class FormatTokenLexer { 622 public: 623 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 624 encoding::Encoding Encoding) 625 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 626 LessStashed(false), Column(0), TrailingWhitespace(0), 627 SourceMgr(SourceMgr), ID(ID), Style(Style), 628 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), 629 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), 630 MacroBlockBeginRegex(Style.MacroBlockBegin), 631 MacroBlockEndRegex(Style.MacroBlockEnd) { 632 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 633 getFormattingLangOpts(Style))); 634 Lex->SetKeepWhitespaceMode(true); 635 636 for (const std::string &ForEachMacro : Style.ForEachMacros) 637 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 638 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 639 } 640 641 ArrayRef<FormatToken *> lex() { 642 assert(Tokens.empty()); 643 assert(FirstInLineIndex == 0); 644 do { 645 Tokens.push_back(getNextToken()); 646 tryMergePreviousTokens(); 647 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) 648 FirstInLineIndex = Tokens.size() - 1; 649 } while (Tokens.back()->Tok.isNot(tok::eof)); 650 return Tokens; 651 } 652 653 const AdditionalKeywords &getKeywords() { return Keywords; } 654 655 private: 656 void tryMergePreviousTokens() { 657 if (tryMerge_TMacro()) 658 return; 659 if (tryMergeConflictMarkers()) 660 return; 661 if (tryMergeLessLess()) 662 return; 663 664 if (Style.Language == FormatStyle::LK_JavaScript) { 665 if (tryMergeJSRegexLiteral()) 666 return; 667 if (tryMergeEscapeSequence()) 668 return; 669 if (tryMergeTemplateString()) 670 return; 671 672 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; 673 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, 674 tok::equal}; 675 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, 676 tok::greaterequal}; 677 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; 678 // FIXME: Investigate what token type gives the correct operator priority. 679 if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) 680 return; 681 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) 682 return; 683 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) 684 return; 685 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) 686 return; 687 } 688 } 689 690 bool tryMergeLessLess() { 691 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. 692 if (Tokens.size() < 3) 693 return false; 694 695 bool FourthTokenIsLess = false; 696 if (Tokens.size() > 3) 697 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); 698 699 auto First = Tokens.end() - 3; 700 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || 701 First[0]->isNot(tok::less) || FourthTokenIsLess) 702 return false; 703 704 // Only merge if there currently is no whitespace between the two "<". 705 if (First[1]->WhitespaceRange.getBegin() != 706 First[1]->WhitespaceRange.getEnd()) 707 return false; 708 709 First[0]->Tok.setKind(tok::lessless); 710 First[0]->TokenText = "<<"; 711 First[0]->ColumnWidth += 1; 712 Tokens.erase(Tokens.end() - 2); 713 return true; 714 } 715 716 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { 717 if (Tokens.size() < Kinds.size()) 718 return false; 719 720 SmallVectorImpl<FormatToken *>::const_iterator First = 721 Tokens.end() - Kinds.size(); 722 if (!First[0]->is(Kinds[0])) 723 return false; 724 unsigned AddLength = 0; 725 for (unsigned i = 1; i < Kinds.size(); ++i) { 726 if (!First[i]->is(Kinds[i]) || 727 First[i]->WhitespaceRange.getBegin() != 728 First[i]->WhitespaceRange.getEnd()) 729 return false; 730 AddLength += First[i]->TokenText.size(); 731 } 732 Tokens.resize(Tokens.size() - Kinds.size() + 1); 733 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 734 First[0]->TokenText.size() + AddLength); 735 First[0]->ColumnWidth += AddLength; 736 First[0]->Type = NewType; 737 return true; 738 } 739 740 // Tries to merge an escape sequence, i.e. a "\\" and the following 741 // character. Use e.g. inside JavaScript regex literals. 742 bool tryMergeEscapeSequence() { 743 if (Tokens.size() < 2) 744 return false; 745 FormatToken *Previous = Tokens[Tokens.size() - 2]; 746 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 747 return false; 748 ++Previous->ColumnWidth; 749 StringRef Text = Previous->TokenText; 750 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 751 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 752 Tokens.resize(Tokens.size() - 1); 753 Column = Previous->OriginalColumn + Previous->ColumnWidth; 754 return true; 755 } 756 757 // Try to determine whether the current token ends a JavaScript regex literal. 758 // We heuristically assume that this is a regex literal if we find two 759 // unescaped slashes on a line and the token before the first slash is one of 760 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 761 // a division. 762 bool tryMergeJSRegexLiteral() { 763 if (Tokens.size() < 2) 764 return false; 765 766 // If this is a string literal with a slash inside, compute the slash's 767 // offset and try to find the beginning of the regex literal. 768 // Also look at tok::unknown, as it can be an unterminated char literal. 769 size_t SlashInStringPos = StringRef::npos; 770 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, 771 tok::unknown)) { 772 // Start search from position 1 as otherwise, this is an unknown token 773 // for an unterminated /*-comment which is handled elsewhere. 774 SlashInStringPos = Tokens.back()->TokenText.find('/', 1); 775 if (SlashInStringPos == StringRef::npos) 776 return false; 777 } 778 779 // If a regex literal ends in "\//", this gets represented by an unknown 780 // token "\" and a comment. 781 bool MightEndWithEscapedSlash = 782 Tokens.back()->is(tok::comment) && 783 Tokens.back()->TokenText.startswith("//") && 784 Tokens[Tokens.size() - 2]->TokenText == "\\"; 785 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && 786 (Tokens.back()->isNot(tok::slash) || 787 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 788 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 789 return false; 790 791 unsigned TokenCount = 0; 792 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 793 ++TokenCount; 794 auto Prev = I + 1; 795 while (Prev != E && Prev[0]->is(tok::comment)) 796 ++Prev; 797 if (I[0]->isOneOf(tok::slash, tok::slashequal) && 798 (Prev == E || 799 ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, 800 tok::r_brace, tok::exclaim, tok::l_square, 801 tok::colon, tok::comma, tok::question, 802 tok::kw_return) || 803 Prev[0]->isBinaryOperator())))) { 804 unsigned LastColumn = Tokens.back()->OriginalColumn; 805 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 806 if (MightEndWithEscapedSlash) { 807 // This regex literal ends in '\//'. Skip past the '//' of the last 808 // token and re-start lexing from there. 809 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 810 } else if (SlashInStringPos != StringRef::npos) { 811 // This regex literal ends in a string_literal with a slash inside. 812 // Calculate end column and reset lexer appropriately. 813 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); 814 LastColumn += SlashInStringPos; 815 } 816 Tokens.resize(Tokens.size() - TokenCount); 817 Tokens.back()->Tok.setKind(tok::unknown); 818 Tokens.back()->Type = TT_RegexLiteral; 819 // Treat regex literals like other string_literals. 820 Tokens.back()->Tok.setKind(tok::string_literal); 821 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 822 return true; 823 } 824 825 // There can't be a newline inside a regex literal. 826 if (I[0]->NewlinesBefore > 0) 827 return false; 828 } 829 return false; 830 } 831 832 bool tryMergeTemplateString() { 833 if (Tokens.size() < 2) 834 return false; 835 836 FormatToken *EndBacktick = Tokens.back(); 837 // Backticks get lexed as tok::unknown tokens. If a template string contains 838 // a comment start, it gets lexed as a tok::comment, or tok::unknown if 839 // unterminated. 840 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, 841 tok::char_constant, tok::unknown)) 842 return false; 843 size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); 844 // Unknown token that's not actually a backtick, or a comment that doesn't 845 // contain a backtick. 846 if (CommentBacktickPos == StringRef::npos) 847 return false; 848 849 unsigned TokenCount = 0; 850 bool IsMultiline = false; 851 unsigned EndColumnInFirstLine = 852 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; 853 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { 854 ++TokenCount; 855 if (I[0]->IsMultiline) 856 IsMultiline = true; 857 858 // If there was a preceding template string, this must be the start of a 859 // template string, not the end. 860 if (I[0]->is(TT_TemplateString)) 861 return false; 862 863 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { 864 // Keep track of the rhs offset of the last token to wrap across lines - 865 // its the rhs offset of the first line of the template string, used to 866 // determine its width. 867 if (I[0]->IsMultiline) 868 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; 869 // If the token has newlines, the token before it (if it exists) is the 870 // rhs end of the previous line. 871 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { 872 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; 873 IsMultiline = true; 874 } 875 continue; 876 } 877 878 Tokens.resize(Tokens.size() - TokenCount); 879 Tokens.back()->Type = TT_TemplateString; 880 const char *EndOffset = 881 EndBacktick->TokenText.data() + 1 + CommentBacktickPos; 882 if (CommentBacktickPos != 0) { 883 // If the backtick was not the first character (e.g. in a comment), 884 // re-lex after the backtick position. 885 SourceLocation Loc = EndBacktick->Tok.getLocation(); 886 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); 887 } 888 Tokens.back()->TokenText = 889 StringRef(Tokens.back()->TokenText.data(), 890 EndOffset - Tokens.back()->TokenText.data()); 891 892 unsigned EndOriginalColumn = EndBacktick->OriginalColumn; 893 if (EndOriginalColumn == 0) { 894 SourceLocation Loc = EndBacktick->Tok.getLocation(); 895 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); 896 } 897 // If the ` is further down within the token (e.g. in a comment). 898 EndOriginalColumn += CommentBacktickPos; 899 900 if (IsMultiline) { 901 // ColumnWidth is from backtick to last token in line. 902 // LastLineColumnWidth is 0 to backtick. 903 // x = `some content 904 // until here`; 905 Tokens.back()->ColumnWidth = 906 EndColumnInFirstLine - Tokens.back()->OriginalColumn; 907 // +1 for the ` itself. 908 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; 909 Tokens.back()->IsMultiline = true; 910 } else { 911 // Token simply spans from start to end, +1 for the ` itself. 912 Tokens.back()->ColumnWidth = 913 EndOriginalColumn - Tokens.back()->OriginalColumn + 1; 914 } 915 return true; 916 } 917 return false; 918 } 919 920 bool tryMerge_TMacro() { 921 if (Tokens.size() < 4) 922 return false; 923 FormatToken *Last = Tokens.back(); 924 if (!Last->is(tok::r_paren)) 925 return false; 926 927 FormatToken *String = Tokens[Tokens.size() - 2]; 928 if (!String->is(tok::string_literal) || String->IsMultiline) 929 return false; 930 931 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 932 return false; 933 934 FormatToken *Macro = Tokens[Tokens.size() - 4]; 935 if (Macro->TokenText != "_T") 936 return false; 937 938 const char *Start = Macro->TokenText.data(); 939 const char *End = Last->TokenText.data() + Last->TokenText.size(); 940 String->TokenText = StringRef(Start, End - Start); 941 String->IsFirst = Macro->IsFirst; 942 String->LastNewlineOffset = Macro->LastNewlineOffset; 943 String->WhitespaceRange = Macro->WhitespaceRange; 944 String->OriginalColumn = Macro->OriginalColumn; 945 String->ColumnWidth = encoding::columnWidthWithTabs( 946 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 947 String->NewlinesBefore = Macro->NewlinesBefore; 948 String->HasUnescapedNewline = Macro->HasUnescapedNewline; 949 950 Tokens.pop_back(); 951 Tokens.pop_back(); 952 Tokens.pop_back(); 953 Tokens.back() = String; 954 return true; 955 } 956 957 bool tryMergeConflictMarkers() { 958 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 959 return false; 960 961 // Conflict lines look like: 962 // <marker> <text from the vcs> 963 // For example: 964 // >>>>>>> /file/in/file/system at revision 1234 965 // 966 // We merge all tokens in a line that starts with a conflict marker 967 // into a single token with a special token type that the unwrapped line 968 // parser will use to correctly rebuild the underlying code. 969 970 FileID ID; 971 // Get the position of the first token in the line. 972 unsigned FirstInLineOffset; 973 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 974 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 975 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 976 // Calculate the offset of the start of the current line. 977 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 978 if (LineOffset == StringRef::npos) { 979 LineOffset = 0; 980 } else { 981 ++LineOffset; 982 } 983 984 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 985 StringRef LineStart; 986 if (FirstSpace == StringRef::npos) { 987 LineStart = Buffer.substr(LineOffset); 988 } else { 989 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 990 } 991 992 TokenType Type = TT_Unknown; 993 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 994 Type = TT_ConflictStart; 995 } else if (LineStart == "|||||||" || LineStart == "=======" || 996 LineStart == "====") { 997 Type = TT_ConflictAlternative; 998 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 999 Type = TT_ConflictEnd; 1000 } 1001 1002 if (Type != TT_Unknown) { 1003 FormatToken *Next = Tokens.back(); 1004 1005 Tokens.resize(FirstInLineIndex + 1); 1006 // We do not need to build a complete token here, as we will skip it 1007 // during parsing anyway (as we must not touch whitespace around conflict 1008 // markers). 1009 Tokens.back()->Type = Type; 1010 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1011 1012 Tokens.push_back(Next); 1013 return true; 1014 } 1015 1016 return false; 1017 } 1018 1019 FormatToken *getStashedToken() { 1020 // Create a synthesized second '>' or '<' token. 1021 Token Tok = FormatTok->Tok; 1022 StringRef TokenText = FormatTok->TokenText; 1023 1024 unsigned OriginalColumn = FormatTok->OriginalColumn; 1025 FormatTok = new (Allocator.Allocate()) FormatToken; 1026 FormatTok->Tok = Tok; 1027 SourceLocation TokLocation = 1028 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); 1029 FormatTok->Tok.setLocation(TokLocation); 1030 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); 1031 FormatTok->TokenText = TokenText; 1032 FormatTok->ColumnWidth = 1; 1033 FormatTok->OriginalColumn = OriginalColumn + 1; 1034 1035 return FormatTok; 1036 } 1037 1038 FormatToken *getNextToken() { 1039 if (GreaterStashed) { 1040 GreaterStashed = false; 1041 return getStashedToken(); 1042 } 1043 if (LessStashed) { 1044 LessStashed = false; 1045 return getStashedToken(); 1046 } 1047 1048 FormatTok = new (Allocator.Allocate()) FormatToken; 1049 readRawToken(*FormatTok); 1050 SourceLocation WhitespaceStart = 1051 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1052 FormatTok->IsFirst = IsFirstToken; 1053 IsFirstToken = false; 1054 1055 // Consume and record whitespace until we find a significant token. 1056 unsigned WhitespaceLength = TrailingWhitespace; 1057 while (FormatTok->Tok.is(tok::unknown)) { 1058 StringRef Text = FormatTok->TokenText; 1059 auto EscapesNewline = [&](int pos) { 1060 // A '\r' here is just part of '\r\n'. Skip it. 1061 if (pos >= 0 && Text[pos] == '\r') 1062 --pos; 1063 // See whether there is an odd number of '\' before this. 1064 unsigned count = 0; 1065 for (; pos >= 0; --pos, ++count) 1066 if (Text[pos] != '\\') 1067 break; 1068 return count & 1; 1069 }; 1070 // FIXME: This miscounts tok:unknown tokens that are not just 1071 // whitespace, e.g. a '`' character. 1072 for (int i = 0, e = Text.size(); i != e; ++i) { 1073 switch (Text[i]) { 1074 case '\n': 1075 ++FormatTok->NewlinesBefore; 1076 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); 1077 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1078 Column = 0; 1079 break; 1080 case '\r': 1081 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1082 Column = 0; 1083 break; 1084 case '\f': 1085 case '\v': 1086 Column = 0; 1087 break; 1088 case ' ': 1089 ++Column; 1090 break; 1091 case '\t': 1092 Column += Style.TabWidth - Column % Style.TabWidth; 1093 break; 1094 case '\\': 1095 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) 1096 FormatTok->Type = TT_ImplicitStringLiteral; 1097 break; 1098 default: 1099 FormatTok->Type = TT_ImplicitStringLiteral; 1100 break; 1101 } 1102 } 1103 1104 if (FormatTok->is(TT_ImplicitStringLiteral)) 1105 break; 1106 WhitespaceLength += FormatTok->Tok.getLength(); 1107 1108 readRawToken(*FormatTok); 1109 } 1110 1111 // In case the token starts with escaped newlines, we want to 1112 // take them into account as whitespace - this pattern is quite frequent 1113 // in macro definitions. 1114 // FIXME: Add a more explicit test. 1115 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1116 FormatTok->TokenText[1] == '\n') { 1117 ++FormatTok->NewlinesBefore; 1118 WhitespaceLength += 2; 1119 FormatTok->LastNewlineOffset = 2; 1120 Column = 0; 1121 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1122 } 1123 1124 FormatTok->WhitespaceRange = SourceRange( 1125 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1126 1127 FormatTok->OriginalColumn = Column; 1128 1129 TrailingWhitespace = 0; 1130 if (FormatTok->Tok.is(tok::comment)) { 1131 // FIXME: Add the trimmed whitespace to Column. 1132 StringRef UntrimmedText = FormatTok->TokenText; 1133 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1134 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1135 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1136 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1137 FormatTok->Tok.setIdentifierInfo(&Info); 1138 FormatTok->Tok.setKind(Info.getTokenID()); 1139 if (Style.Language == FormatStyle::LK_Java && 1140 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { 1141 FormatTok->Tok.setKind(tok::identifier); 1142 FormatTok->Tok.setIdentifierInfo(nullptr); 1143 } 1144 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1145 FormatTok->Tok.setKind(tok::greater); 1146 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1147 GreaterStashed = true; 1148 } else if (FormatTok->Tok.is(tok::lessless)) { 1149 FormatTok->Tok.setKind(tok::less); 1150 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1151 LessStashed = true; 1152 } 1153 1154 // Now FormatTok is the next non-whitespace token. 1155 1156 StringRef Text = FormatTok->TokenText; 1157 size_t FirstNewlinePos = Text.find('\n'); 1158 if (FirstNewlinePos == StringRef::npos) { 1159 // FIXME: ColumnWidth actually depends on the start column, we need to 1160 // take this into account when the token is moved. 1161 FormatTok->ColumnWidth = 1162 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1163 Column += FormatTok->ColumnWidth; 1164 } else { 1165 FormatTok->IsMultiline = true; 1166 // FIXME: ColumnWidth actually depends on the start column, we need to 1167 // take this into account when the token is moved. 1168 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1169 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1170 1171 // The last line of the token always starts in column 0. 1172 // Thus, the length can be precomputed even in the presence of tabs. 1173 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1174 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1175 Encoding); 1176 Column = FormatTok->LastLineColumnWidth; 1177 } 1178 1179 if (Style.Language == FormatStyle::LK_Cpp) { 1180 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && 1181 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == 1182 tok::pp_define) && 1183 std::find(ForEachMacros.begin(), ForEachMacros.end(), 1184 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) { 1185 FormatTok->Type = TT_ForEachMacro; 1186 } else if (FormatTok->is(tok::identifier)) { 1187 if (MacroBlockBeginRegex.match(Text)) { 1188 FormatTok->Type = TT_MacroBlockBegin; 1189 } else if (MacroBlockEndRegex.match(Text)) { 1190 FormatTok->Type = TT_MacroBlockEnd; 1191 } 1192 } 1193 } 1194 1195 return FormatTok; 1196 } 1197 1198 FormatToken *FormatTok; 1199 bool IsFirstToken; 1200 bool GreaterStashed, LessStashed; 1201 unsigned Column; 1202 unsigned TrailingWhitespace; 1203 std::unique_ptr<Lexer> Lex; 1204 SourceManager &SourceMgr; 1205 FileID ID; 1206 FormatStyle &Style; 1207 IdentifierTable IdentTable; 1208 AdditionalKeywords Keywords; 1209 encoding::Encoding Encoding; 1210 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1211 // Index (in 'Tokens') of the last token that starts a new line. 1212 unsigned FirstInLineIndex; 1213 SmallVector<FormatToken *, 16> Tokens; 1214 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1215 1216 bool FormattingDisabled; 1217 1218 llvm::Regex MacroBlockBeginRegex; 1219 llvm::Regex MacroBlockEndRegex; 1220 1221 void readRawToken(FormatToken &Tok) { 1222 Lex->LexFromRawLexer(Tok.Tok); 1223 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1224 Tok.Tok.getLength()); 1225 // For formatting, treat unterminated string literals like normal string 1226 // literals. 1227 if (Tok.is(tok::unknown)) { 1228 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1229 Tok.Tok.setKind(tok::string_literal); 1230 Tok.IsUnterminatedLiteral = true; 1231 } else if (Style.Language == FormatStyle::LK_JavaScript && 1232 Tok.TokenText == "''") { 1233 Tok.Tok.setKind(tok::char_constant); 1234 } 1235 } 1236 1237 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1238 Tok.TokenText == "/* clang-format on */")) { 1239 FormattingDisabled = false; 1240 } 1241 1242 Tok.Finalized = FormattingDisabled; 1243 1244 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1245 Tok.TokenText == "/* clang-format off */")) { 1246 FormattingDisabled = true; 1247 } 1248 } 1249 1250 void resetLexer(unsigned Offset) { 1251 StringRef Buffer = SourceMgr.getBufferData(ID); 1252 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1253 getFormattingLangOpts(Style), Buffer.begin(), 1254 Buffer.begin() + Offset, Buffer.end())); 1255 Lex->SetKeepWhitespaceMode(true); 1256 TrailingWhitespace = 0; 1257 } 1258 }; 1259 1260 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1261 switch (Language) { 1262 case FormatStyle::LK_Cpp: 1263 return "C++"; 1264 case FormatStyle::LK_Java: 1265 return "Java"; 1266 case FormatStyle::LK_JavaScript: 1267 return "JavaScript"; 1268 case FormatStyle::LK_Proto: 1269 return "Proto"; 1270 default: 1271 return "Unknown"; 1272 } 1273 } 1274 1275 class Formatter : public UnwrappedLineConsumer { 1276 public: 1277 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1278 ArrayRef<CharSourceRange> Ranges) 1279 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1280 Whitespaces(SourceMgr, Style, 1281 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1282 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1283 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1284 DEBUG(llvm::dbgs() << "File encoding: " 1285 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1286 : "unknown") 1287 << "\n"); 1288 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1289 << "\n"); 1290 } 1291 1292 tooling::Replacements format(bool *IncompleteFormat) { 1293 tooling::Replacements Result; 1294 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1295 1296 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1297 *this); 1298 Parser.parse(); 1299 assert(UnwrappedLines.rbegin()->empty()); 1300 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1301 ++Run) { 1302 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1303 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1304 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1305 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1306 } 1307 tooling::Replacements RunResult = 1308 format(AnnotatedLines, Tokens, IncompleteFormat); 1309 DEBUG({ 1310 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1311 for (tooling::Replacements::iterator I = RunResult.begin(), 1312 E = RunResult.end(); 1313 I != E; ++I) { 1314 llvm::dbgs() << I->toString() << "\n"; 1315 } 1316 }); 1317 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1318 delete AnnotatedLines[i]; 1319 } 1320 Result.insert(RunResult.begin(), RunResult.end()); 1321 Whitespaces.reset(); 1322 } 1323 return Result; 1324 } 1325 1326 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1327 FormatTokenLexer &Tokens, 1328 bool *IncompleteFormat) { 1329 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1330 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1331 Annotator.annotate(*AnnotatedLines[i]); 1332 } 1333 deriveLocalStyle(AnnotatedLines); 1334 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1335 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1336 } 1337 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1338 1339 Annotator.setCommentLineLevels(AnnotatedLines); 1340 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1341 Whitespaces, Encoding, 1342 BinPackInconclusiveFunctions); 1343 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), 1344 IncompleteFormat) 1345 .format(AnnotatedLines); 1346 return Whitespaces.generateReplacements(); 1347 } 1348 1349 private: 1350 // Determines which lines are affected by the SourceRanges given as input. 1351 // Returns \c true if at least one line between I and E or one of their 1352 // children is affected. 1353 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1354 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1355 bool SomeLineAffected = false; 1356 const AnnotatedLine *PreviousLine = nullptr; 1357 while (I != E) { 1358 AnnotatedLine *Line = *I; 1359 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1360 1361 // If a line is part of a preprocessor directive, it needs to be formatted 1362 // if any token within the directive is affected. 1363 if (Line->InPPDirective) { 1364 FormatToken *Last = Line->Last; 1365 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1366 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1367 Last = (*PPEnd)->Last; 1368 ++PPEnd; 1369 } 1370 1371 if (affectsTokenRange(*Line->First, *Last, 1372 /*IncludeLeadingNewlines=*/false)) { 1373 SomeLineAffected = true; 1374 markAllAsAffected(I, PPEnd); 1375 } 1376 I = PPEnd; 1377 continue; 1378 } 1379 1380 if (nonPPLineAffected(Line, PreviousLine)) 1381 SomeLineAffected = true; 1382 1383 PreviousLine = Line; 1384 ++I; 1385 } 1386 return SomeLineAffected; 1387 } 1388 1389 // Determines whether 'Line' is affected by the SourceRanges given as input. 1390 // Returns \c true if line or one if its children is affected. 1391 bool nonPPLineAffected(AnnotatedLine *Line, 1392 const AnnotatedLine *PreviousLine) { 1393 bool SomeLineAffected = false; 1394 Line->ChildrenAffected = 1395 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1396 if (Line->ChildrenAffected) 1397 SomeLineAffected = true; 1398 1399 // Stores whether one of the line's tokens is directly affected. 1400 bool SomeTokenAffected = false; 1401 // Stores whether we need to look at the leading newlines of the next token 1402 // in order to determine whether it was affected. 1403 bool IncludeLeadingNewlines = false; 1404 1405 // Stores whether the first child line of any of this line's tokens is 1406 // affected. 1407 bool SomeFirstChildAffected = false; 1408 1409 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1410 // Determine whether 'Tok' was affected. 1411 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1412 SomeTokenAffected = true; 1413 1414 // Determine whether the first child of 'Tok' was affected. 1415 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1416 SomeFirstChildAffected = true; 1417 1418 IncludeLeadingNewlines = Tok->Children.empty(); 1419 } 1420 1421 // Was this line moved, i.e. has it previously been on the same line as an 1422 // affected line? 1423 bool LineMoved = PreviousLine && PreviousLine->Affected && 1424 Line->First->NewlinesBefore == 0; 1425 1426 bool IsContinuedComment = 1427 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1428 Line->First->NewlinesBefore < 2 && PreviousLine && 1429 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1430 1431 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1432 IsContinuedComment) { 1433 Line->Affected = true; 1434 SomeLineAffected = true; 1435 } 1436 return SomeLineAffected; 1437 } 1438 1439 // Marks all lines between I and E as well as all their children as affected. 1440 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1441 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1442 while (I != E) { 1443 (*I)->Affected = true; 1444 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1445 ++I; 1446 } 1447 } 1448 1449 // Returns true if the range from 'First' to 'Last' intersects with one of the 1450 // input ranges. 1451 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1452 bool IncludeLeadingNewlines) { 1453 SourceLocation Start = First.WhitespaceRange.getBegin(); 1454 if (!IncludeLeadingNewlines) 1455 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1456 SourceLocation End = Last.getStartOfNonWhitespace(); 1457 End = End.getLocWithOffset(Last.TokenText.size()); 1458 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1459 return affectsCharSourceRange(Range); 1460 } 1461 1462 // Returns true if one of the input ranges intersect the leading empty lines 1463 // before 'Tok'. 1464 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1465 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1466 Tok.WhitespaceRange.getBegin(), 1467 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1468 return affectsCharSourceRange(EmptyLineRange); 1469 } 1470 1471 // Returns true if 'Range' intersects with one of the input ranges. 1472 bool affectsCharSourceRange(const CharSourceRange &Range) { 1473 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1474 E = Ranges.end(); 1475 I != E; ++I) { 1476 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1477 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1478 return true; 1479 } 1480 return false; 1481 } 1482 1483 static bool inputUsesCRLF(StringRef Text) { 1484 return Text.count('\r') * 2 > Text.count('\n'); 1485 } 1486 1487 bool 1488 hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1489 for (const AnnotatedLine* Line : Lines) { 1490 if (hasCpp03IncompatibleFormat(Line->Children)) 1491 return true; 1492 for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { 1493 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1494 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) 1495 return true; 1496 if (Tok->is(TT_TemplateCloser) && 1497 Tok->Previous->is(TT_TemplateCloser)) 1498 return true; 1499 } 1500 } 1501 } 1502 return false; 1503 } 1504 1505 int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) { 1506 int AlignmentDiff = 0; 1507 for (const AnnotatedLine* Line : Lines) { 1508 AlignmentDiff += countVariableAlignments(Line->Children); 1509 for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { 1510 if (!Tok->is(TT_PointerOrReference)) 1511 continue; 1512 bool SpaceBefore = 1513 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1514 bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() != 1515 Tok->Next->WhitespaceRange.getEnd(); 1516 if (SpaceBefore && !SpaceAfter) 1517 ++AlignmentDiff; 1518 if (!SpaceBefore && SpaceAfter) 1519 --AlignmentDiff; 1520 } 1521 } 1522 return AlignmentDiff; 1523 } 1524 1525 void 1526 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1527 bool HasBinPackedFunction = false; 1528 bool HasOnePerLineFunction = false; 1529 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1530 if (!AnnotatedLines[i]->First->Next) 1531 continue; 1532 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1533 while (Tok->Next) { 1534 if (Tok->PackingKind == PPK_BinPacked) 1535 HasBinPackedFunction = true; 1536 if (Tok->PackingKind == PPK_OnePerLine) 1537 HasOnePerLineFunction = true; 1538 1539 Tok = Tok->Next; 1540 } 1541 } 1542 if (Style.DerivePointerAlignment) 1543 Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 1544 ? FormatStyle::PAS_Left 1545 : FormatStyle::PAS_Right; 1546 if (Style.Standard == FormatStyle::LS_Auto) 1547 Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) 1548 ? FormatStyle::LS_Cpp11 1549 : FormatStyle::LS_Cpp03; 1550 BinPackInconclusiveFunctions = 1551 HasBinPackedFunction || !HasOnePerLineFunction; 1552 } 1553 1554 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1555 assert(!UnwrappedLines.empty()); 1556 UnwrappedLines.back().push_back(TheLine); 1557 } 1558 1559 void finishRun() override { 1560 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1561 } 1562 1563 FormatStyle Style; 1564 FileID ID; 1565 SourceManager &SourceMgr; 1566 WhitespaceManager Whitespaces; 1567 SmallVector<CharSourceRange, 8> Ranges; 1568 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1569 1570 encoding::Encoding Encoding; 1571 bool BinPackInconclusiveFunctions; 1572 }; 1573 1574 } // end anonymous namespace 1575 1576 tooling::Replacements reformat(const FormatStyle &Style, 1577 SourceManager &SourceMgr, FileID ID, 1578 ArrayRef<CharSourceRange> Ranges, 1579 bool *IncompleteFormat) { 1580 if (Style.DisableFormat) 1581 return tooling::Replacements(); 1582 Formatter formatter(Style, SourceMgr, ID, Ranges); 1583 return formatter.format(IncompleteFormat); 1584 } 1585 1586 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1587 ArrayRef<tooling::Range> Ranges, 1588 StringRef FileName, bool *IncompleteFormat) { 1589 if (Style.DisableFormat) 1590 return tooling::Replacements(); 1591 1592 FileManager Files((FileSystemOptions())); 1593 DiagnosticsEngine Diagnostics( 1594 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1595 new DiagnosticOptions); 1596 SourceManager SourceMgr(Diagnostics, Files); 1597 std::unique_ptr<llvm::MemoryBuffer> Buf = 1598 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1599 const clang::FileEntry *Entry = 1600 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1601 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 1602 FileID ID = 1603 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1604 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1605 std::vector<CharSourceRange> CharRanges; 1606 for (const tooling::Range &Range : Ranges) { 1607 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 1608 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 1609 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1610 } 1611 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); 1612 } 1613 1614 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 1615 LangOptions LangOpts; 1616 LangOpts.CPlusPlus = 1; 1617 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1618 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1619 LangOpts.LineComment = 1; 1620 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; 1621 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; 1622 LangOpts.Bool = 1; 1623 LangOpts.ObjC1 = 1; 1624 LangOpts.ObjC2 = 1; 1625 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 1626 return LangOpts; 1627 } 1628 1629 const char *StyleOptionHelpDescription = 1630 "Coding style, currently supports:\n" 1631 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1632 "Use -style=file to load style configuration from\n" 1633 ".clang-format file located in one of the parent\n" 1634 "directories of the source file (or current\n" 1635 "directory for stdin).\n" 1636 "Use -style=\"{key: value, ...}\" to set specific\n" 1637 "parameters, e.g.:\n" 1638 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1639 1640 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1641 if (FileName.endswith(".java")) { 1642 return FormatStyle::LK_Java; 1643 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { 1644 // JavaScript or TypeScript. 1645 return FormatStyle::LK_JavaScript; 1646 } else if (FileName.endswith_lower(".proto") || 1647 FileName.endswith_lower(".protodevel")) { 1648 return FormatStyle::LK_Proto; 1649 } 1650 return FormatStyle::LK_Cpp; 1651 } 1652 1653 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1654 StringRef FallbackStyle) { 1655 FormatStyle Style = getLLVMStyle(); 1656 Style.Language = getLanguageByFileName(FileName); 1657 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1658 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1659 << "\" using LLVM style\n"; 1660 return Style; 1661 } 1662 1663 if (StyleName.startswith("{")) { 1664 // Parse YAML/JSON style from the command line. 1665 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 1666 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1667 << FallbackStyle << " style\n"; 1668 } 1669 return Style; 1670 } 1671 1672 if (!StyleName.equals_lower("file")) { 1673 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1674 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1675 << " style\n"; 1676 return Style; 1677 } 1678 1679 // Look for .clang-format/_clang-format file in the file's parent directories. 1680 SmallString<128> UnsuitableConfigFiles; 1681 SmallString<128> Path(FileName); 1682 llvm::sys::fs::make_absolute(Path); 1683 for (StringRef Directory = Path; !Directory.empty(); 1684 Directory = llvm::sys::path::parent_path(Directory)) { 1685 if (!llvm::sys::fs::is_directory(Directory)) 1686 continue; 1687 SmallString<128> ConfigFile(Directory); 1688 1689 llvm::sys::path::append(ConfigFile, ".clang-format"); 1690 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1691 bool IsFile = false; 1692 // Ignore errors from is_regular_file: we only need to know if we can read 1693 // the file or not. 1694 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1695 1696 if (!IsFile) { 1697 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1698 ConfigFile = Directory; 1699 llvm::sys::path::append(ConfigFile, "_clang-format"); 1700 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1701 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1702 } 1703 1704 if (IsFile) { 1705 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1706 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 1707 if (std::error_code EC = Text.getError()) { 1708 llvm::errs() << EC.message() << "\n"; 1709 break; 1710 } 1711 if (std::error_code ec = 1712 parseConfiguration(Text.get()->getBuffer(), &Style)) { 1713 if (ec == ParseError::Unsuitable) { 1714 if (!UnsuitableConfigFiles.empty()) 1715 UnsuitableConfigFiles.append(", "); 1716 UnsuitableConfigFiles.append(ConfigFile); 1717 continue; 1718 } 1719 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1720 << "\n"; 1721 break; 1722 } 1723 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1724 return Style; 1725 } 1726 } 1727 if (!UnsuitableConfigFiles.empty()) { 1728 llvm::errs() << "Configuration file(s) do(es) not support " 1729 << getLanguageName(Style.Language) << ": " 1730 << UnsuitableConfigFiles << "\n"; 1731 } 1732 return Style; 1733 } 1734 1735 } // namespace format 1736 } // namespace clang 1737