1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineFormatter.h" 19 #include "UnwrappedLineParser.h" 20 #include "WhitespaceManager.h" 21 #include "clang/Basic/Diagnostic.h" 22 #include "clang/Basic/DiagnosticOptions.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 #define DEBUG_TYPE "format-formatter" 35 36 using clang::format::FormatStyle; 37 38 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 39 40 namespace llvm { 41 namespace yaml { 42 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 43 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 44 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 45 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 46 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 47 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 48 } 49 }; 50 51 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 52 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 53 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 54 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 55 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 56 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 57 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 58 } 59 }; 60 61 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 62 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 63 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 64 IO.enumCase(Value, "false", FormatStyle::UT_Never); 65 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 66 IO.enumCase(Value, "true", FormatStyle::UT_Always); 67 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 68 } 69 }; 70 71 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 72 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 73 IO.enumCase(Value, "None", FormatStyle::SFS_None); 74 IO.enumCase(Value, "false", FormatStyle::SFS_None); 75 IO.enumCase(Value, "All", FormatStyle::SFS_All); 76 IO.enumCase(Value, "true", FormatStyle::SFS_All); 77 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 78 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); 79 } 80 }; 81 82 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 83 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 84 IO.enumCase(Value, "All", FormatStyle::BOS_All); 85 IO.enumCase(Value, "true", FormatStyle::BOS_All); 86 IO.enumCase(Value, "None", FormatStyle::BOS_None); 87 IO.enumCase(Value, "false", FormatStyle::BOS_None); 88 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 89 } 90 }; 91 92 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 93 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 94 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 95 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 96 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 97 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 98 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 99 } 100 }; 101 102 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { 103 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { 104 IO.enumCase(Value, "None", FormatStyle::DRTBS_None); 105 IO.enumCase(Value, "All", FormatStyle::DRTBS_All); 106 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); 107 108 // For backward compatibility. 109 IO.enumCase(Value, "false", FormatStyle::DRTBS_None); 110 IO.enumCase(Value, "true", FormatStyle::DRTBS_All); 111 } 112 }; 113 114 template <> 115 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 116 static void enumeration(IO &IO, 117 FormatStyle::NamespaceIndentationKind &Value) { 118 IO.enumCase(Value, "None", FormatStyle::NI_None); 119 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 120 IO.enumCase(Value, "All", FormatStyle::NI_All); 121 } 122 }; 123 124 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 125 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { 126 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 127 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 128 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 129 130 // For backward compatibility. 131 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 132 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 133 } 134 }; 135 136 template <> 137 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 138 static void enumeration(IO &IO, 139 FormatStyle::SpaceBeforeParensOptions &Value) { 140 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 141 IO.enumCase(Value, "ControlStatements", 142 FormatStyle::SBPO_ControlStatements); 143 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 144 145 // For backward compatibility. 146 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 147 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 148 } 149 }; 150 151 template <> struct MappingTraits<FormatStyle> { 152 static void mapping(IO &IO, FormatStyle &Style) { 153 // When reading, read the language first, we need it for getPredefinedStyle. 154 IO.mapOptional("Language", Style.Language); 155 156 if (IO.outputting()) { 157 StringRef StylesArray[] = {"LLVM", "Google", "Chromium", 158 "Mozilla", "WebKit", "GNU"}; 159 ArrayRef<StringRef> Styles(StylesArray); 160 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 161 StringRef StyleName(Styles[i]); 162 FormatStyle PredefinedStyle; 163 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 164 Style == PredefinedStyle) { 165 IO.mapOptional("# BasedOnStyle", StyleName); 166 break; 167 } 168 } 169 } else { 170 StringRef BasedOnStyle; 171 IO.mapOptional("BasedOnStyle", BasedOnStyle); 172 if (!BasedOnStyle.empty()) { 173 FormatStyle::LanguageKind OldLanguage = Style.Language; 174 FormatStyle::LanguageKind Language = 175 ((FormatStyle *)IO.getContext())->Language; 176 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 177 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 178 return; 179 } 180 Style.Language = OldLanguage; 181 } 182 } 183 184 // For backward compatibility. 185 if (!IO.outputting()) { 186 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 187 IO.mapOptional("IndentFunctionDeclarationAfterType", 188 Style.IndentWrappedFunctionNames); 189 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 190 IO.mapOptional("SpaceAfterControlStatementKeyword", 191 Style.SpaceBeforeParens); 192 } 193 194 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 195 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); 196 IO.mapOptional("AlignConsecutiveAssignments", 197 Style.AlignConsecutiveAssignments); 198 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 199 IO.mapOptional("AlignOperands", Style.AlignOperands); 200 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 201 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 202 Style.AllowAllParametersOfDeclarationOnNextLine); 203 IO.mapOptional("AllowShortBlocksOnASingleLine", 204 Style.AllowShortBlocksOnASingleLine); 205 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 206 Style.AllowShortCaseLabelsOnASingleLine); 207 IO.mapOptional("AllowShortFunctionsOnASingleLine", 208 Style.AllowShortFunctionsOnASingleLine); 209 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 210 Style.AllowShortIfStatementsOnASingleLine); 211 IO.mapOptional("AllowShortLoopsOnASingleLine", 212 Style.AllowShortLoopsOnASingleLine); 213 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 214 Style.AlwaysBreakAfterDefinitionReturnType); 215 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 216 Style.AlwaysBreakBeforeMultilineStrings); 217 IO.mapOptional("AlwaysBreakTemplateDeclarations", 218 Style.AlwaysBreakTemplateDeclarations); 219 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 220 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 221 IO.mapOptional("BreakBeforeBinaryOperators", 222 Style.BreakBeforeBinaryOperators); 223 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 224 IO.mapOptional("BreakBeforeTernaryOperators", 225 Style.BreakBeforeTernaryOperators); 226 IO.mapOptional("BreakConstructorInitializersBeforeComma", 227 Style.BreakConstructorInitializersBeforeComma); 228 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 229 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 230 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 231 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 232 IO.mapOptional("ConstructorInitializerIndentWidth", 233 Style.ConstructorInitializerIndentWidth); 234 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 235 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 236 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 237 IO.mapOptional("DisableFormat", Style.DisableFormat); 238 IO.mapOptional("ExperimentalAutoDetectBinPacking", 239 Style.ExperimentalAutoDetectBinPacking); 240 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 241 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 242 IO.mapOptional("IndentWidth", Style.IndentWidth); 243 IO.mapOptional("IndentWrappedFunctionNames", 244 Style.IndentWrappedFunctionNames); 245 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 246 Style.KeepEmptyLinesAtTheStartOfBlocks); 247 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 248 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 249 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 250 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 251 IO.mapOptional("ObjCSpaceBeforeProtocolList", 252 Style.ObjCSpaceBeforeProtocolList); 253 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 254 Style.PenaltyBreakBeforeFirstCallParameter); 255 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 256 IO.mapOptional("PenaltyBreakFirstLessLess", 257 Style.PenaltyBreakFirstLessLess); 258 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 259 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 260 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 261 Style.PenaltyReturnTypeOnItsOwnLine); 262 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 263 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 264 IO.mapOptional("SpaceBeforeAssignmentOperators", 265 Style.SpaceBeforeAssignmentOperators); 266 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 267 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 268 IO.mapOptional("SpacesBeforeTrailingComments", 269 Style.SpacesBeforeTrailingComments); 270 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 271 IO.mapOptional("SpacesInContainerLiterals", 272 Style.SpacesInContainerLiterals); 273 IO.mapOptional("SpacesInCStyleCastParentheses", 274 Style.SpacesInCStyleCastParentheses); 275 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 276 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 277 IO.mapOptional("Standard", Style.Standard); 278 IO.mapOptional("TabWidth", Style.TabWidth); 279 IO.mapOptional("UseTab", Style.UseTab); 280 } 281 }; 282 283 // Allows to read vector<FormatStyle> while keeping default values. 284 // IO.getContext() should contain a pointer to the FormatStyle structure, that 285 // will be used to get default values for missing keys. 286 // If the first element has no Language specified, it will be treated as the 287 // default one for the following elements. 288 template <> struct DocumentListTraits<std::vector<FormatStyle>> { 289 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 290 return Seq.size(); 291 } 292 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 293 size_t Index) { 294 if (Index >= Seq.size()) { 295 assert(Index == Seq.size()); 296 FormatStyle Template; 297 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 298 Template = Seq[0]; 299 } else { 300 Template = *((const FormatStyle *)IO.getContext()); 301 Template.Language = FormatStyle::LK_None; 302 } 303 Seq.resize(Index + 1, Template); 304 } 305 return Seq[Index]; 306 } 307 }; 308 } 309 } 310 311 namespace clang { 312 namespace format { 313 314 const std::error_category &getParseCategory() { 315 static ParseErrorCategory C; 316 return C; 317 } 318 std::error_code make_error_code(ParseError e) { 319 return std::error_code(static_cast<int>(e), getParseCategory()); 320 } 321 322 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 323 return "clang-format.parse_error"; 324 } 325 326 std::string ParseErrorCategory::message(int EV) const { 327 switch (static_cast<ParseError>(EV)) { 328 case ParseError::Success: 329 return "Success"; 330 case ParseError::Error: 331 return "Invalid argument"; 332 case ParseError::Unsuitable: 333 return "Unsuitable"; 334 } 335 llvm_unreachable("unexpected parse error"); 336 } 337 338 FormatStyle getLLVMStyle() { 339 FormatStyle LLVMStyle; 340 LLVMStyle.Language = FormatStyle::LK_Cpp; 341 LLVMStyle.AccessModifierOffset = -2; 342 LLVMStyle.AlignEscapedNewlinesLeft = false; 343 LLVMStyle.AlignAfterOpenBracket = true; 344 LLVMStyle.AlignOperands = true; 345 LLVMStyle.AlignTrailingComments = true; 346 LLVMStyle.AlignConsecutiveAssignments = false; 347 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 348 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 349 LLVMStyle.AllowShortBlocksOnASingleLine = false; 350 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 351 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 352 LLVMStyle.AllowShortLoopsOnASingleLine = false; 353 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; 354 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 355 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 356 LLVMStyle.BinPackParameters = true; 357 LLVMStyle.BinPackArguments = true; 358 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 359 LLVMStyle.BreakBeforeTernaryOperators = true; 360 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 361 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 362 LLVMStyle.ColumnLimit = 80; 363 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 364 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 365 LLVMStyle.ConstructorInitializerIndentWidth = 4; 366 LLVMStyle.ContinuationIndentWidth = 4; 367 LLVMStyle.Cpp11BracedListStyle = true; 368 LLVMStyle.DerivePointerAlignment = false; 369 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 370 LLVMStyle.ForEachMacros.push_back("foreach"); 371 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 372 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 373 LLVMStyle.IndentCaseLabels = false; 374 LLVMStyle.IndentWrappedFunctionNames = false; 375 LLVMStyle.IndentWidth = 2; 376 LLVMStyle.TabWidth = 8; 377 LLVMStyle.MaxEmptyLinesToKeep = 1; 378 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 379 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 380 LLVMStyle.ObjCBlockIndentWidth = 2; 381 LLVMStyle.ObjCSpaceAfterProperty = false; 382 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 383 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 384 LLVMStyle.SpacesBeforeTrailingComments = 1; 385 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 386 LLVMStyle.UseTab = FormatStyle::UT_Never; 387 LLVMStyle.SpacesInParentheses = false; 388 LLVMStyle.SpacesInSquareBrackets = false; 389 LLVMStyle.SpaceInEmptyParentheses = false; 390 LLVMStyle.SpacesInContainerLiterals = true; 391 LLVMStyle.SpacesInCStyleCastParentheses = false; 392 LLVMStyle.SpaceAfterCStyleCast = false; 393 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 394 LLVMStyle.SpaceBeforeAssignmentOperators = true; 395 LLVMStyle.SpacesInAngles = false; 396 397 LLVMStyle.PenaltyBreakComment = 300; 398 LLVMStyle.PenaltyBreakFirstLessLess = 120; 399 LLVMStyle.PenaltyBreakString = 1000; 400 LLVMStyle.PenaltyExcessCharacter = 1000000; 401 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 402 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 403 404 LLVMStyle.DisableFormat = false; 405 406 return LLVMStyle; 407 } 408 409 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 410 FormatStyle GoogleStyle = getLLVMStyle(); 411 GoogleStyle.Language = Language; 412 413 GoogleStyle.AccessModifierOffset = -1; 414 GoogleStyle.AlignEscapedNewlinesLeft = true; 415 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 416 GoogleStyle.AllowShortLoopsOnASingleLine = true; 417 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 418 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 419 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 420 GoogleStyle.DerivePointerAlignment = true; 421 GoogleStyle.IndentCaseLabels = true; 422 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 423 GoogleStyle.ObjCSpaceAfterProperty = false; 424 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 425 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 426 GoogleStyle.SpacesBeforeTrailingComments = 2; 427 GoogleStyle.Standard = FormatStyle::LS_Auto; 428 429 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 430 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 431 432 if (Language == FormatStyle::LK_Java) { 433 GoogleStyle.AlignAfterOpenBracket = false; 434 GoogleStyle.AlignOperands = false; 435 GoogleStyle.AlignTrailingComments = false; 436 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; 437 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 438 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 439 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 440 GoogleStyle.ColumnLimit = 100; 441 GoogleStyle.SpaceAfterCStyleCast = true; 442 GoogleStyle.SpacesBeforeTrailingComments = 1; 443 } else if (Language == FormatStyle::LK_JavaScript) { 444 GoogleStyle.BreakBeforeTernaryOperators = false; 445 GoogleStyle.MaxEmptyLinesToKeep = 3; 446 GoogleStyle.SpacesInContainerLiterals = false; 447 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 448 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 449 } else if (Language == FormatStyle::LK_Proto) { 450 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 451 GoogleStyle.SpacesInContainerLiterals = false; 452 } 453 454 return GoogleStyle; 455 } 456 457 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 458 FormatStyle ChromiumStyle = getGoogleStyle(Language); 459 if (Language == FormatStyle::LK_Java) { 460 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; 461 ChromiumStyle.IndentWidth = 4; 462 ChromiumStyle.ContinuationIndentWidth = 8; 463 } else { 464 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 465 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 466 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 467 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 468 ChromiumStyle.BinPackParameters = false; 469 ChromiumStyle.DerivePointerAlignment = false; 470 } 471 return ChromiumStyle; 472 } 473 474 FormatStyle getMozillaStyle() { 475 FormatStyle MozillaStyle = getLLVMStyle(); 476 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 477 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 478 MozillaStyle.AlwaysBreakAfterDefinitionReturnType = 479 FormatStyle::DRTBS_TopLevel; 480 MozillaStyle.AlwaysBreakTemplateDeclarations = true; 481 MozillaStyle.BreakConstructorInitializersBeforeComma = true; 482 MozillaStyle.ConstructorInitializerIndentWidth = 2; 483 MozillaStyle.ContinuationIndentWidth = 2; 484 MozillaStyle.Cpp11BracedListStyle = false; 485 MozillaStyle.IndentCaseLabels = true; 486 MozillaStyle.ObjCSpaceAfterProperty = true; 487 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 488 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 489 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 490 return MozillaStyle; 491 } 492 493 FormatStyle getWebKitStyle() { 494 FormatStyle Style = getLLVMStyle(); 495 Style.AccessModifierOffset = -4; 496 Style.AlignAfterOpenBracket = false; 497 Style.AlignOperands = false; 498 Style.AlignTrailingComments = false; 499 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 500 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 501 Style.BreakConstructorInitializersBeforeComma = true; 502 Style.Cpp11BracedListStyle = false; 503 Style.ColumnLimit = 0; 504 Style.IndentWidth = 4; 505 Style.NamespaceIndentation = FormatStyle::NI_Inner; 506 Style.ObjCBlockIndentWidth = 4; 507 Style.ObjCSpaceAfterProperty = true; 508 Style.PointerAlignment = FormatStyle::PAS_Left; 509 Style.Standard = FormatStyle::LS_Cpp03; 510 return Style; 511 } 512 513 FormatStyle getGNUStyle() { 514 FormatStyle Style = getLLVMStyle(); 515 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; 516 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 517 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 518 Style.BreakBeforeTernaryOperators = true; 519 Style.Cpp11BracedListStyle = false; 520 Style.ColumnLimit = 79; 521 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 522 Style.Standard = FormatStyle::LS_Cpp03; 523 return Style; 524 } 525 526 FormatStyle getNoStyle() { 527 FormatStyle NoStyle = getLLVMStyle(); 528 NoStyle.DisableFormat = true; 529 return NoStyle; 530 } 531 532 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 533 FormatStyle *Style) { 534 if (Name.equals_lower("llvm")) { 535 *Style = getLLVMStyle(); 536 } else if (Name.equals_lower("chromium")) { 537 *Style = getChromiumStyle(Language); 538 } else if (Name.equals_lower("mozilla")) { 539 *Style = getMozillaStyle(); 540 } else if (Name.equals_lower("google")) { 541 *Style = getGoogleStyle(Language); 542 } else if (Name.equals_lower("webkit")) { 543 *Style = getWebKitStyle(); 544 } else if (Name.equals_lower("gnu")) { 545 *Style = getGNUStyle(); 546 } else if (Name.equals_lower("none")) { 547 *Style = getNoStyle(); 548 } else { 549 return false; 550 } 551 552 Style->Language = Language; 553 return true; 554 } 555 556 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 557 assert(Style); 558 FormatStyle::LanguageKind Language = Style->Language; 559 assert(Language != FormatStyle::LK_None); 560 if (Text.trim().empty()) 561 return make_error_code(ParseError::Error); 562 563 std::vector<FormatStyle> Styles; 564 llvm::yaml::Input Input(Text); 565 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 566 // values for the fields, keys for which are missing from the configuration. 567 // Mapping also uses the context to get the language to find the correct 568 // base style. 569 Input.setContext(Style); 570 Input >> Styles; 571 if (Input.error()) 572 return Input.error(); 573 574 for (unsigned i = 0; i < Styles.size(); ++i) { 575 // Ensures that only the first configuration can skip the Language option. 576 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 577 return make_error_code(ParseError::Error); 578 // Ensure that each language is configured at most once. 579 for (unsigned j = 0; j < i; ++j) { 580 if (Styles[i].Language == Styles[j].Language) { 581 DEBUG(llvm::dbgs() 582 << "Duplicate languages in the config file on positions " << j 583 << " and " << i << "\n"); 584 return make_error_code(ParseError::Error); 585 } 586 } 587 } 588 // Look for a suitable configuration starting from the end, so we can 589 // find the configuration for the specific language first, and the default 590 // configuration (which can only be at slot 0) after it. 591 for (int i = Styles.size() - 1; i >= 0; --i) { 592 if (Styles[i].Language == Language || 593 Styles[i].Language == FormatStyle::LK_None) { 594 *Style = Styles[i]; 595 Style->Language = Language; 596 return make_error_code(ParseError::Success); 597 } 598 } 599 return make_error_code(ParseError::Unsuitable); 600 } 601 602 std::string configurationAsText(const FormatStyle &Style) { 603 std::string Text; 604 llvm::raw_string_ostream Stream(Text); 605 llvm::yaml::Output Output(Stream); 606 // We use the same mapping method for input and output, so we need a non-const 607 // reference here. 608 FormatStyle NonConstStyle = Style; 609 Output << NonConstStyle; 610 return Stream.str(); 611 } 612 613 namespace { 614 615 class FormatTokenLexer { 616 public: 617 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 618 encoding::Encoding Encoding) 619 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 620 LessStashed(false), Column(0), TrailingWhitespace(0), 621 SourceMgr(SourceMgr), ID(ID), Style(Style), 622 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), 623 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) { 624 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 625 getFormattingLangOpts(Style))); 626 Lex->SetKeepWhitespaceMode(true); 627 628 for (const std::string &ForEachMacro : Style.ForEachMacros) 629 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 630 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 631 } 632 633 ArrayRef<FormatToken *> lex() { 634 assert(Tokens.empty()); 635 assert(FirstInLineIndex == 0); 636 do { 637 Tokens.push_back(getNextToken()); 638 tryMergePreviousTokens(); 639 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) 640 FirstInLineIndex = Tokens.size() - 1; 641 } while (Tokens.back()->Tok.isNot(tok::eof)); 642 return Tokens; 643 } 644 645 const AdditionalKeywords &getKeywords() { return Keywords; } 646 647 private: 648 void tryMergePreviousTokens() { 649 if (tryMerge_TMacro()) 650 return; 651 if (tryMergeConflictMarkers()) 652 return; 653 if (tryMergeLessLess()) 654 return; 655 656 if (Style.Language == FormatStyle::LK_JavaScript) { 657 if (tryMergeJSRegexLiteral()) 658 return; 659 if (tryMergeEscapeSequence()) 660 return; 661 if (tryMergeTemplateString()) 662 return; 663 664 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; 665 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, 666 tok::equal}; 667 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, 668 tok::greaterequal}; 669 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; 670 // FIXME: Investigate what token type gives the correct operator priority. 671 if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) 672 return; 673 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) 674 return; 675 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) 676 return; 677 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) 678 return; 679 } 680 } 681 682 bool tryMergeLessLess() { 683 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. 684 if (Tokens.size() < 3) 685 return false; 686 687 bool FourthTokenIsLess = false; 688 if (Tokens.size() > 3) 689 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); 690 691 auto First = Tokens.end() - 3; 692 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || 693 First[0]->isNot(tok::less) || FourthTokenIsLess) 694 return false; 695 696 // Only merge if there currently is no whitespace between the two "<". 697 if (First[1]->WhitespaceRange.getBegin() != 698 First[1]->WhitespaceRange.getEnd()) 699 return false; 700 701 First[0]->Tok.setKind(tok::lessless); 702 First[0]->TokenText = "<<"; 703 First[0]->ColumnWidth += 1; 704 Tokens.erase(Tokens.end() - 2); 705 return true; 706 } 707 708 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { 709 if (Tokens.size() < Kinds.size()) 710 return false; 711 712 SmallVectorImpl<FormatToken *>::const_iterator First = 713 Tokens.end() - Kinds.size(); 714 if (!First[0]->is(Kinds[0])) 715 return false; 716 unsigned AddLength = 0; 717 for (unsigned i = 1; i < Kinds.size(); ++i) { 718 if (!First[i]->is(Kinds[i]) || 719 First[i]->WhitespaceRange.getBegin() != 720 First[i]->WhitespaceRange.getEnd()) 721 return false; 722 AddLength += First[i]->TokenText.size(); 723 } 724 Tokens.resize(Tokens.size() - Kinds.size() + 1); 725 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 726 First[0]->TokenText.size() + AddLength); 727 First[0]->ColumnWidth += AddLength; 728 First[0]->Type = NewType; 729 return true; 730 } 731 732 // Tries to merge an escape sequence, i.e. a "\\" and the following 733 // character. Use e.g. inside JavaScript regex literals. 734 bool tryMergeEscapeSequence() { 735 if (Tokens.size() < 2) 736 return false; 737 FormatToken *Previous = Tokens[Tokens.size() - 2]; 738 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 739 return false; 740 ++Previous->ColumnWidth; 741 StringRef Text = Previous->TokenText; 742 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 743 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 744 Tokens.resize(Tokens.size() - 1); 745 Column = Previous->OriginalColumn + Previous->ColumnWidth; 746 return true; 747 } 748 749 // Try to determine whether the current token ends a JavaScript regex literal. 750 // We heuristically assume that this is a regex literal if we find two 751 // unescaped slashes on a line and the token before the first slash is one of 752 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 753 // a division. 754 bool tryMergeJSRegexLiteral() { 755 if (Tokens.size() < 2) 756 return false; 757 758 // If this is a string literal with a slash inside, compute the slash's 759 // offset and try to find the beginning of the regex literal. 760 // Also look at tok::unknown, as it can be an unterminated char literal. 761 size_t SlashInStringPos = StringRef::npos; 762 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, 763 tok::unknown)) { 764 // Start search from position 1 as otherwise, this is an unknown token 765 // for an unterminated /*-comment which is handled elsewhere. 766 SlashInStringPos = Tokens.back()->TokenText.find('/', 1); 767 if (SlashInStringPos == StringRef::npos) 768 return false; 769 } 770 771 // If a regex literal ends in "\//", this gets represented by an unknown 772 // token "\" and a comment. 773 bool MightEndWithEscapedSlash = 774 Tokens.back()->is(tok::comment) && 775 Tokens.back()->TokenText.startswith("//") && 776 Tokens[Tokens.size() - 2]->TokenText == "\\"; 777 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && 778 (Tokens.back()->isNot(tok::slash) || 779 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 780 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 781 return false; 782 783 unsigned TokenCount = 0; 784 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 785 ++TokenCount; 786 if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E && 787 (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, 788 tok::exclaim, tok::l_square, tok::colon, tok::comma, 789 tok::question, tok::kw_return) || 790 I[1]->isBinaryOperator())) { 791 unsigned LastColumn = Tokens.back()->OriginalColumn; 792 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 793 if (MightEndWithEscapedSlash) { 794 // This regex literal ends in '\//'. Skip past the '//' of the last 795 // token and re-start lexing from there. 796 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 797 } else if (SlashInStringPos != StringRef::npos) { 798 // This regex literal ends in a string_literal with a slash inside. 799 // Calculate end column and reset lexer appropriately. 800 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); 801 LastColumn += SlashInStringPos; 802 } 803 Tokens.resize(Tokens.size() - TokenCount); 804 Tokens.back()->Tok.setKind(tok::unknown); 805 Tokens.back()->Type = TT_RegexLiteral; 806 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 807 return true; 808 } 809 810 // There can't be a newline inside a regex literal. 811 if (I[0]->NewlinesBefore > 0) 812 return false; 813 } 814 return false; 815 } 816 817 bool tryMergeTemplateString() { 818 if (Tokens.size() < 2) 819 return false; 820 821 FormatToken *EndBacktick = Tokens.back(); 822 // Backticks get lexed as tok::unknown tokens. If a template string contains 823 // a comment start, it gets lexed as a tok::comment, or tok::unknown if 824 // unterminated. 825 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, 826 tok::char_constant, tok::unknown)) 827 return false; 828 size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); 829 // Unknown token that's not actually a backtick, or a comment that doesn't 830 // contain a backtick. 831 if (CommentBacktickPos == StringRef::npos) 832 return false; 833 834 unsigned TokenCount = 0; 835 bool IsMultiline = false; 836 unsigned EndColumnInFirstLine = 837 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; 838 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { 839 ++TokenCount; 840 if (I[0]->IsMultiline) 841 IsMultiline = true; 842 843 // If there was a preceding template string, this must be the start of a 844 // template string, not the end. 845 if (I[0]->is(TT_TemplateString)) 846 return false; 847 848 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { 849 // Keep track of the rhs offset of the last token to wrap across lines - 850 // its the rhs offset of the first line of the template string, used to 851 // determine its width. 852 if (I[0]->IsMultiline) 853 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; 854 // If the token has newlines, the token before it (if it exists) is the 855 // rhs end of the previous line. 856 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { 857 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; 858 IsMultiline = true; 859 } 860 continue; 861 } 862 863 Tokens.resize(Tokens.size() - TokenCount); 864 Tokens.back()->Type = TT_TemplateString; 865 const char *EndOffset = 866 EndBacktick->TokenText.data() + 1 + CommentBacktickPos; 867 if (CommentBacktickPos != 0) { 868 // If the backtick was not the first character (e.g. in a comment), 869 // re-lex after the backtick position. 870 SourceLocation Loc = EndBacktick->Tok.getLocation(); 871 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); 872 } 873 Tokens.back()->TokenText = 874 StringRef(Tokens.back()->TokenText.data(), 875 EndOffset - Tokens.back()->TokenText.data()); 876 877 unsigned EndOriginalColumn = EndBacktick->OriginalColumn; 878 if (EndOriginalColumn == 0) { 879 SourceLocation Loc = EndBacktick->Tok.getLocation(); 880 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); 881 } 882 // If the ` is further down within the token (e.g. in a comment). 883 EndOriginalColumn += CommentBacktickPos; 884 885 if (IsMultiline) { 886 // ColumnWidth is from backtick to last token in line. 887 // LastLineColumnWidth is 0 to backtick. 888 // x = `some content 889 // until here`; 890 Tokens.back()->ColumnWidth = 891 EndColumnInFirstLine - Tokens.back()->OriginalColumn; 892 // +1 for the ` itself. 893 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; 894 Tokens.back()->IsMultiline = true; 895 } else { 896 // Token simply spans from start to end, +1 for the ` itself. 897 Tokens.back()->ColumnWidth = 898 EndOriginalColumn - Tokens.back()->OriginalColumn + 1; 899 } 900 return true; 901 } 902 return false; 903 } 904 905 bool tryMerge_TMacro() { 906 if (Tokens.size() < 4) 907 return false; 908 FormatToken *Last = Tokens.back(); 909 if (!Last->is(tok::r_paren)) 910 return false; 911 912 FormatToken *String = Tokens[Tokens.size() - 2]; 913 if (!String->is(tok::string_literal) || String->IsMultiline) 914 return false; 915 916 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 917 return false; 918 919 FormatToken *Macro = Tokens[Tokens.size() - 4]; 920 if (Macro->TokenText != "_T") 921 return false; 922 923 const char *Start = Macro->TokenText.data(); 924 const char *End = Last->TokenText.data() + Last->TokenText.size(); 925 String->TokenText = StringRef(Start, End - Start); 926 String->IsFirst = Macro->IsFirst; 927 String->LastNewlineOffset = Macro->LastNewlineOffset; 928 String->WhitespaceRange = Macro->WhitespaceRange; 929 String->OriginalColumn = Macro->OriginalColumn; 930 String->ColumnWidth = encoding::columnWidthWithTabs( 931 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 932 String->NewlinesBefore = Macro->NewlinesBefore; 933 String->HasUnescapedNewline = Macro->HasUnescapedNewline; 934 935 Tokens.pop_back(); 936 Tokens.pop_back(); 937 Tokens.pop_back(); 938 Tokens.back() = String; 939 return true; 940 } 941 942 bool tryMergeConflictMarkers() { 943 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 944 return false; 945 946 // Conflict lines look like: 947 // <marker> <text from the vcs> 948 // For example: 949 // >>>>>>> /file/in/file/system at revision 1234 950 // 951 // We merge all tokens in a line that starts with a conflict marker 952 // into a single token with a special token type that the unwrapped line 953 // parser will use to correctly rebuild the underlying code. 954 955 FileID ID; 956 // Get the position of the first token in the line. 957 unsigned FirstInLineOffset; 958 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 959 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 960 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 961 // Calculate the offset of the start of the current line. 962 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 963 if (LineOffset == StringRef::npos) { 964 LineOffset = 0; 965 } else { 966 ++LineOffset; 967 } 968 969 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 970 StringRef LineStart; 971 if (FirstSpace == StringRef::npos) { 972 LineStart = Buffer.substr(LineOffset); 973 } else { 974 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 975 } 976 977 TokenType Type = TT_Unknown; 978 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 979 Type = TT_ConflictStart; 980 } else if (LineStart == "|||||||" || LineStart == "=======" || 981 LineStart == "====") { 982 Type = TT_ConflictAlternative; 983 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 984 Type = TT_ConflictEnd; 985 } 986 987 if (Type != TT_Unknown) { 988 FormatToken *Next = Tokens.back(); 989 990 Tokens.resize(FirstInLineIndex + 1); 991 // We do not need to build a complete token here, as we will skip it 992 // during parsing anyway (as we must not touch whitespace around conflict 993 // markers). 994 Tokens.back()->Type = Type; 995 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 996 997 Tokens.push_back(Next); 998 return true; 999 } 1000 1001 return false; 1002 } 1003 1004 FormatToken *getStashedToken() { 1005 // Create a synthesized second '>' or '<' token. 1006 Token Tok = FormatTok->Tok; 1007 StringRef TokenText = FormatTok->TokenText; 1008 1009 unsigned OriginalColumn = FormatTok->OriginalColumn; 1010 FormatTok = new (Allocator.Allocate()) FormatToken; 1011 FormatTok->Tok = Tok; 1012 SourceLocation TokLocation = 1013 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); 1014 FormatTok->Tok.setLocation(TokLocation); 1015 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); 1016 FormatTok->TokenText = TokenText; 1017 FormatTok->ColumnWidth = 1; 1018 FormatTok->OriginalColumn = OriginalColumn + 1; 1019 1020 return FormatTok; 1021 } 1022 1023 FormatToken *getNextToken() { 1024 if (GreaterStashed) { 1025 GreaterStashed = false; 1026 return getStashedToken(); 1027 } 1028 if (LessStashed) { 1029 LessStashed = false; 1030 return getStashedToken(); 1031 } 1032 1033 FormatTok = new (Allocator.Allocate()) FormatToken; 1034 readRawToken(*FormatTok); 1035 SourceLocation WhitespaceStart = 1036 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1037 FormatTok->IsFirst = IsFirstToken; 1038 IsFirstToken = false; 1039 1040 // Consume and record whitespace until we find a significant token. 1041 unsigned WhitespaceLength = TrailingWhitespace; 1042 while (FormatTok->Tok.is(tok::unknown)) { 1043 StringRef Text = FormatTok->TokenText; 1044 auto EscapesNewline = [&](int pos) { 1045 // A '\r' here is just part of '\r\n'. Skip it. 1046 if (pos >= 0 && Text[pos] == '\r') 1047 --pos; 1048 // See whether there is an odd number of '\' before this. 1049 unsigned count = 0; 1050 for (; pos >= 0; --pos, ++count) 1051 if (Text[pos] != '\\') 1052 break; 1053 return count & 1; 1054 }; 1055 // FIXME: This miscounts tok:unknown tokens that are not just 1056 // whitespace, e.g. a '`' character. 1057 for (int i = 0, e = Text.size(); i != e; ++i) { 1058 switch (Text[i]) { 1059 case '\n': 1060 ++FormatTok->NewlinesBefore; 1061 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); 1062 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1063 Column = 0; 1064 break; 1065 case '\r': 1066 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1067 Column = 0; 1068 break; 1069 case '\f': 1070 case '\v': 1071 Column = 0; 1072 break; 1073 case ' ': 1074 ++Column; 1075 break; 1076 case '\t': 1077 Column += Style.TabWidth - Column % Style.TabWidth; 1078 break; 1079 case '\\': 1080 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) 1081 FormatTok->Type = TT_ImplicitStringLiteral; 1082 break; 1083 default: 1084 FormatTok->Type = TT_ImplicitStringLiteral; 1085 break; 1086 } 1087 } 1088 1089 if (FormatTok->is(TT_ImplicitStringLiteral)) 1090 break; 1091 WhitespaceLength += FormatTok->Tok.getLength(); 1092 1093 readRawToken(*FormatTok); 1094 } 1095 1096 // In case the token starts with escaped newlines, we want to 1097 // take them into account as whitespace - this pattern is quite frequent 1098 // in macro definitions. 1099 // FIXME: Add a more explicit test. 1100 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1101 FormatTok->TokenText[1] == '\n') { 1102 ++FormatTok->NewlinesBefore; 1103 WhitespaceLength += 2; 1104 FormatTok->LastNewlineOffset = 2; 1105 Column = 0; 1106 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1107 } 1108 1109 FormatTok->WhitespaceRange = SourceRange( 1110 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1111 1112 FormatTok->OriginalColumn = Column; 1113 1114 TrailingWhitespace = 0; 1115 if (FormatTok->Tok.is(tok::comment)) { 1116 // FIXME: Add the trimmed whitespace to Column. 1117 StringRef UntrimmedText = FormatTok->TokenText; 1118 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1119 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1120 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1121 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1122 FormatTok->Tok.setIdentifierInfo(&Info); 1123 FormatTok->Tok.setKind(Info.getTokenID()); 1124 if (Style.Language == FormatStyle::LK_Java && 1125 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { 1126 FormatTok->Tok.setKind(tok::identifier); 1127 FormatTok->Tok.setIdentifierInfo(nullptr); 1128 } 1129 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1130 FormatTok->Tok.setKind(tok::greater); 1131 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1132 GreaterStashed = true; 1133 } else if (FormatTok->Tok.is(tok::lessless)) { 1134 FormatTok->Tok.setKind(tok::less); 1135 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1136 LessStashed = true; 1137 } 1138 1139 // Now FormatTok is the next non-whitespace token. 1140 1141 StringRef Text = FormatTok->TokenText; 1142 size_t FirstNewlinePos = Text.find('\n'); 1143 if (FirstNewlinePos == StringRef::npos) { 1144 // FIXME: ColumnWidth actually depends on the start column, we need to 1145 // take this into account when the token is moved. 1146 FormatTok->ColumnWidth = 1147 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1148 Column += FormatTok->ColumnWidth; 1149 } else { 1150 FormatTok->IsMultiline = true; 1151 // FIXME: ColumnWidth actually depends on the start column, we need to 1152 // take this into account when the token is moved. 1153 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1154 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1155 1156 // The last line of the token always starts in column 0. 1157 // Thus, the length can be precomputed even in the presence of tabs. 1158 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1159 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1160 Encoding); 1161 Column = FormatTok->LastLineColumnWidth; 1162 } 1163 1164 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && 1165 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == 1166 tok::pp_define) && 1167 std::find(ForEachMacros.begin(), ForEachMacros.end(), 1168 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) 1169 FormatTok->Type = TT_ForEachMacro; 1170 1171 return FormatTok; 1172 } 1173 1174 FormatToken *FormatTok; 1175 bool IsFirstToken; 1176 bool GreaterStashed, LessStashed; 1177 unsigned Column; 1178 unsigned TrailingWhitespace; 1179 std::unique_ptr<Lexer> Lex; 1180 SourceManager &SourceMgr; 1181 FileID ID; 1182 FormatStyle &Style; 1183 IdentifierTable IdentTable; 1184 AdditionalKeywords Keywords; 1185 encoding::Encoding Encoding; 1186 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1187 // Index (in 'Tokens') of the last token that starts a new line. 1188 unsigned FirstInLineIndex; 1189 SmallVector<FormatToken *, 16> Tokens; 1190 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1191 1192 bool FormattingDisabled; 1193 1194 void readRawToken(FormatToken &Tok) { 1195 Lex->LexFromRawLexer(Tok.Tok); 1196 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1197 Tok.Tok.getLength()); 1198 // For formatting, treat unterminated string literals like normal string 1199 // literals. 1200 if (Tok.is(tok::unknown)) { 1201 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1202 Tok.Tok.setKind(tok::string_literal); 1203 Tok.IsUnterminatedLiteral = true; 1204 } else if (Style.Language == FormatStyle::LK_JavaScript && 1205 Tok.TokenText == "''") { 1206 Tok.Tok.setKind(tok::char_constant); 1207 } 1208 } 1209 1210 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1211 Tok.TokenText == "/* clang-format on */")) { 1212 FormattingDisabled = false; 1213 } 1214 1215 Tok.Finalized = FormattingDisabled; 1216 1217 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1218 Tok.TokenText == "/* clang-format off */")) { 1219 FormattingDisabled = true; 1220 } 1221 } 1222 1223 void resetLexer(unsigned Offset) { 1224 StringRef Buffer = SourceMgr.getBufferData(ID); 1225 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1226 getFormattingLangOpts(Style), Buffer.begin(), 1227 Buffer.begin() + Offset, Buffer.end())); 1228 Lex->SetKeepWhitespaceMode(true); 1229 } 1230 }; 1231 1232 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1233 switch (Language) { 1234 case FormatStyle::LK_Cpp: 1235 return "C++"; 1236 case FormatStyle::LK_Java: 1237 return "Java"; 1238 case FormatStyle::LK_JavaScript: 1239 return "JavaScript"; 1240 case FormatStyle::LK_Proto: 1241 return "Proto"; 1242 default: 1243 return "Unknown"; 1244 } 1245 } 1246 1247 class Formatter : public UnwrappedLineConsumer { 1248 public: 1249 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1250 ArrayRef<CharSourceRange> Ranges) 1251 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1252 Whitespaces(SourceMgr, Style, 1253 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1254 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1255 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1256 DEBUG(llvm::dbgs() << "File encoding: " 1257 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1258 : "unknown") 1259 << "\n"); 1260 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1261 << "\n"); 1262 } 1263 1264 tooling::Replacements format(bool *IncompleteFormat) { 1265 tooling::Replacements Result; 1266 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1267 1268 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1269 *this); 1270 Parser.parse(); 1271 assert(UnwrappedLines.rbegin()->empty()); 1272 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1273 ++Run) { 1274 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1275 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1276 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1277 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1278 } 1279 tooling::Replacements RunResult = 1280 format(AnnotatedLines, Tokens, IncompleteFormat); 1281 DEBUG({ 1282 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1283 for (tooling::Replacements::iterator I = RunResult.begin(), 1284 E = RunResult.end(); 1285 I != E; ++I) { 1286 llvm::dbgs() << I->toString() << "\n"; 1287 } 1288 }); 1289 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1290 delete AnnotatedLines[i]; 1291 } 1292 Result.insert(RunResult.begin(), RunResult.end()); 1293 Whitespaces.reset(); 1294 } 1295 return Result; 1296 } 1297 1298 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1299 FormatTokenLexer &Tokens, 1300 bool *IncompleteFormat) { 1301 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1302 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1303 Annotator.annotate(*AnnotatedLines[i]); 1304 } 1305 deriveLocalStyle(AnnotatedLines); 1306 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1307 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1308 } 1309 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1310 1311 Annotator.setCommentLineLevels(AnnotatedLines); 1312 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1313 Whitespaces, Encoding, 1314 BinPackInconclusiveFunctions); 1315 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), 1316 IncompleteFormat) 1317 .format(AnnotatedLines); 1318 return Whitespaces.generateReplacements(); 1319 } 1320 1321 private: 1322 // Determines which lines are affected by the SourceRanges given as input. 1323 // Returns \c true if at least one line between I and E or one of their 1324 // children is affected. 1325 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1326 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1327 bool SomeLineAffected = false; 1328 const AnnotatedLine *PreviousLine = nullptr; 1329 while (I != E) { 1330 AnnotatedLine *Line = *I; 1331 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1332 1333 // If a line is part of a preprocessor directive, it needs to be formatted 1334 // if any token within the directive is affected. 1335 if (Line->InPPDirective) { 1336 FormatToken *Last = Line->Last; 1337 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1338 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1339 Last = (*PPEnd)->Last; 1340 ++PPEnd; 1341 } 1342 1343 if (affectsTokenRange(*Line->First, *Last, 1344 /*IncludeLeadingNewlines=*/false)) { 1345 SomeLineAffected = true; 1346 markAllAsAffected(I, PPEnd); 1347 } 1348 I = PPEnd; 1349 continue; 1350 } 1351 1352 if (nonPPLineAffected(Line, PreviousLine)) 1353 SomeLineAffected = true; 1354 1355 PreviousLine = Line; 1356 ++I; 1357 } 1358 return SomeLineAffected; 1359 } 1360 1361 // Determines whether 'Line' is affected by the SourceRanges given as input. 1362 // Returns \c true if line or one if its children is affected. 1363 bool nonPPLineAffected(AnnotatedLine *Line, 1364 const AnnotatedLine *PreviousLine) { 1365 bool SomeLineAffected = false; 1366 Line->ChildrenAffected = 1367 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1368 if (Line->ChildrenAffected) 1369 SomeLineAffected = true; 1370 1371 // Stores whether one of the line's tokens is directly affected. 1372 bool SomeTokenAffected = false; 1373 // Stores whether we need to look at the leading newlines of the next token 1374 // in order to determine whether it was affected. 1375 bool IncludeLeadingNewlines = false; 1376 1377 // Stores whether the first child line of any of this line's tokens is 1378 // affected. 1379 bool SomeFirstChildAffected = false; 1380 1381 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1382 // Determine whether 'Tok' was affected. 1383 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1384 SomeTokenAffected = true; 1385 1386 // Determine whether the first child of 'Tok' was affected. 1387 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1388 SomeFirstChildAffected = true; 1389 1390 IncludeLeadingNewlines = Tok->Children.empty(); 1391 } 1392 1393 // Was this line moved, i.e. has it previously been on the same line as an 1394 // affected line? 1395 bool LineMoved = PreviousLine && PreviousLine->Affected && 1396 Line->First->NewlinesBefore == 0; 1397 1398 bool IsContinuedComment = 1399 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1400 Line->First->NewlinesBefore < 2 && PreviousLine && 1401 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1402 1403 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1404 IsContinuedComment) { 1405 Line->Affected = true; 1406 SomeLineAffected = true; 1407 } 1408 return SomeLineAffected; 1409 } 1410 1411 // Marks all lines between I and E as well as all their children as affected. 1412 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1413 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1414 while (I != E) { 1415 (*I)->Affected = true; 1416 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1417 ++I; 1418 } 1419 } 1420 1421 // Returns true if the range from 'First' to 'Last' intersects with one of the 1422 // input ranges. 1423 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1424 bool IncludeLeadingNewlines) { 1425 SourceLocation Start = First.WhitespaceRange.getBegin(); 1426 if (!IncludeLeadingNewlines) 1427 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1428 SourceLocation End = Last.getStartOfNonWhitespace(); 1429 End = End.getLocWithOffset(Last.TokenText.size()); 1430 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1431 return affectsCharSourceRange(Range); 1432 } 1433 1434 // Returns true if one of the input ranges intersect the leading empty lines 1435 // before 'Tok'. 1436 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1437 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1438 Tok.WhitespaceRange.getBegin(), 1439 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1440 return affectsCharSourceRange(EmptyLineRange); 1441 } 1442 1443 // Returns true if 'Range' intersects with one of the input ranges. 1444 bool affectsCharSourceRange(const CharSourceRange &Range) { 1445 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1446 E = Ranges.end(); 1447 I != E; ++I) { 1448 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1449 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1450 return true; 1451 } 1452 return false; 1453 } 1454 1455 static bool inputUsesCRLF(StringRef Text) { 1456 return Text.count('\r') * 2 > Text.count('\n'); 1457 } 1458 1459 void 1460 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1461 unsigned CountBoundToVariable = 0; 1462 unsigned CountBoundToType = 0; 1463 bool HasCpp03IncompatibleFormat = false; 1464 bool HasBinPackedFunction = false; 1465 bool HasOnePerLineFunction = false; 1466 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1467 if (!AnnotatedLines[i]->First->Next) 1468 continue; 1469 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1470 while (Tok->Next) { 1471 if (Tok->is(TT_PointerOrReference)) { 1472 bool SpacesBefore = 1473 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1474 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1475 Tok->Next->WhitespaceRange.getEnd(); 1476 if (SpacesBefore && !SpacesAfter) 1477 ++CountBoundToVariable; 1478 else if (!SpacesBefore && SpacesAfter) 1479 ++CountBoundToType; 1480 } 1481 1482 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1483 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) 1484 HasCpp03IncompatibleFormat = true; 1485 if (Tok->is(TT_TemplateCloser) && 1486 Tok->Previous->is(TT_TemplateCloser)) 1487 HasCpp03IncompatibleFormat = true; 1488 } 1489 1490 if (Tok->PackingKind == PPK_BinPacked) 1491 HasBinPackedFunction = true; 1492 if (Tok->PackingKind == PPK_OnePerLine) 1493 HasOnePerLineFunction = true; 1494 1495 Tok = Tok->Next; 1496 } 1497 } 1498 if (Style.DerivePointerAlignment) { 1499 if (CountBoundToType > CountBoundToVariable) 1500 Style.PointerAlignment = FormatStyle::PAS_Left; 1501 else if (CountBoundToType < CountBoundToVariable) 1502 Style.PointerAlignment = FormatStyle::PAS_Right; 1503 } 1504 if (Style.Standard == FormatStyle::LS_Auto) { 1505 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1506 : FormatStyle::LS_Cpp03; 1507 } 1508 BinPackInconclusiveFunctions = 1509 HasBinPackedFunction || !HasOnePerLineFunction; 1510 } 1511 1512 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1513 assert(!UnwrappedLines.empty()); 1514 UnwrappedLines.back().push_back(TheLine); 1515 } 1516 1517 void finishRun() override { 1518 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1519 } 1520 1521 FormatStyle Style; 1522 FileID ID; 1523 SourceManager &SourceMgr; 1524 WhitespaceManager Whitespaces; 1525 SmallVector<CharSourceRange, 8> Ranges; 1526 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1527 1528 encoding::Encoding Encoding; 1529 bool BinPackInconclusiveFunctions; 1530 }; 1531 1532 } // end anonymous namespace 1533 1534 tooling::Replacements reformat(const FormatStyle &Style, 1535 SourceManager &SourceMgr, FileID ID, 1536 ArrayRef<CharSourceRange> Ranges, 1537 bool *IncompleteFormat) { 1538 if (Style.DisableFormat) 1539 return tooling::Replacements(); 1540 Formatter formatter(Style, SourceMgr, ID, Ranges); 1541 return formatter.format(IncompleteFormat); 1542 } 1543 1544 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1545 ArrayRef<tooling::Range> Ranges, 1546 StringRef FileName, bool *IncompleteFormat) { 1547 if (Style.DisableFormat) 1548 return tooling::Replacements(); 1549 1550 FileManager Files((FileSystemOptions())); 1551 DiagnosticsEngine Diagnostics( 1552 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1553 new DiagnosticOptions); 1554 SourceManager SourceMgr(Diagnostics, Files); 1555 std::unique_ptr<llvm::MemoryBuffer> Buf = 1556 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1557 const clang::FileEntry *Entry = 1558 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1559 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 1560 FileID ID = 1561 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1562 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1563 std::vector<CharSourceRange> CharRanges; 1564 for (const tooling::Range &Range : Ranges) { 1565 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 1566 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 1567 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1568 } 1569 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); 1570 } 1571 1572 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 1573 LangOptions LangOpts; 1574 LangOpts.CPlusPlus = 1; 1575 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1576 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1577 LangOpts.LineComment = 1; 1578 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; 1579 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; 1580 LangOpts.Bool = 1; 1581 LangOpts.ObjC1 = 1; 1582 LangOpts.ObjC2 = 1; 1583 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 1584 return LangOpts; 1585 } 1586 1587 const char *StyleOptionHelpDescription = 1588 "Coding style, currently supports:\n" 1589 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1590 "Use -style=file to load style configuration from\n" 1591 ".clang-format file located in one of the parent\n" 1592 "directories of the source file (or current\n" 1593 "directory for stdin).\n" 1594 "Use -style=\"{key: value, ...}\" to set specific\n" 1595 "parameters, e.g.:\n" 1596 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1597 1598 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1599 if (FileName.endswith(".java")) { 1600 return FormatStyle::LK_Java; 1601 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { 1602 // JavaScript or TypeScript. 1603 return FormatStyle::LK_JavaScript; 1604 } else if (FileName.endswith_lower(".proto") || 1605 FileName.endswith_lower(".protodevel")) { 1606 return FormatStyle::LK_Proto; 1607 } 1608 return FormatStyle::LK_Cpp; 1609 } 1610 1611 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1612 StringRef FallbackStyle) { 1613 FormatStyle Style = getLLVMStyle(); 1614 Style.Language = getLanguageByFileName(FileName); 1615 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1616 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1617 << "\" using LLVM style\n"; 1618 return Style; 1619 } 1620 1621 if (StyleName.startswith("{")) { 1622 // Parse YAML/JSON style from the command line. 1623 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 1624 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1625 << FallbackStyle << " style\n"; 1626 } 1627 return Style; 1628 } 1629 1630 if (!StyleName.equals_lower("file")) { 1631 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1632 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1633 << " style\n"; 1634 return Style; 1635 } 1636 1637 // Look for .clang-format/_clang-format file in the file's parent directories. 1638 SmallString<128> UnsuitableConfigFiles; 1639 SmallString<128> Path(FileName); 1640 llvm::sys::fs::make_absolute(Path); 1641 for (StringRef Directory = Path; !Directory.empty(); 1642 Directory = llvm::sys::path::parent_path(Directory)) { 1643 if (!llvm::sys::fs::is_directory(Directory)) 1644 continue; 1645 SmallString<128> ConfigFile(Directory); 1646 1647 llvm::sys::path::append(ConfigFile, ".clang-format"); 1648 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1649 bool IsFile = false; 1650 // Ignore errors from is_regular_file: we only need to know if we can read 1651 // the file or not. 1652 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1653 1654 if (!IsFile) { 1655 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1656 ConfigFile = Directory; 1657 llvm::sys::path::append(ConfigFile, "_clang-format"); 1658 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1659 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1660 } 1661 1662 if (IsFile) { 1663 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1664 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 1665 if (std::error_code EC = Text.getError()) { 1666 llvm::errs() << EC.message() << "\n"; 1667 break; 1668 } 1669 if (std::error_code ec = 1670 parseConfiguration(Text.get()->getBuffer(), &Style)) { 1671 if (ec == ParseError::Unsuitable) { 1672 if (!UnsuitableConfigFiles.empty()) 1673 UnsuitableConfigFiles.append(", "); 1674 UnsuitableConfigFiles.append(ConfigFile); 1675 continue; 1676 } 1677 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1678 << "\n"; 1679 break; 1680 } 1681 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1682 return Style; 1683 } 1684 } 1685 if (!UnsuitableConfigFiles.empty()) { 1686 llvm::errs() << "Configuration file(s) do(es) not support " 1687 << getLanguageName(Style.Language) << ": " 1688 << UnsuitableConfigFiles << "\n"; 1689 } 1690 return Style; 1691 } 1692 1693 } // namespace format 1694 } // namespace clang 1695