1 //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the SarifDiagnostics object. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Analysis/PathDiagnostic.h" 14 #include "clang/Basic/FileManager.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" 18 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/Support/ConvertUTF.h" 22 #include "llvm/Support/JSON.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace llvm; 26 using namespace clang; 27 using namespace ento; 28 29 namespace { 30 class SarifDiagnostics : public PathDiagnosticConsumer { 31 std::string OutputFile; 32 const LangOptions &LO; 33 34 public: 35 SarifDiagnostics(AnalyzerOptions &, const std::string &Output, 36 const LangOptions &LO) 37 : OutputFile(Output), LO(LO) {} 38 ~SarifDiagnostics() override = default; 39 40 void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, 41 FilesMade *FM) override; 42 43 StringRef getName() const override { return "SarifDiagnostics"; } 44 PathGenerationScheme getGenerationScheme() const override { return Minimal; } 45 bool supportsLogicalOpControlFlow() const override { return true; } 46 bool supportsCrossFileDiagnostics() const override { return true; } 47 }; 48 } // end anonymous namespace 49 50 void ento::createSarifDiagnosticConsumer( 51 AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C, 52 const std::string &Output, const Preprocessor &PP, 53 const cross_tu::CrossTranslationUnitContext &) { 54 C.push_back(new SarifDiagnostics(AnalyzerOpts, Output, PP.getLangOpts())); 55 } 56 57 static StringRef getFileName(const FileEntry &FE) { 58 StringRef Filename = FE.tryGetRealPathName(); 59 if (Filename.empty()) 60 Filename = FE.getName(); 61 return Filename; 62 } 63 64 static std::string percentEncodeURICharacter(char C) { 65 // RFC 3986 claims alpha, numeric, and this handful of 66 // characters are not reserved for the path component and 67 // should be written out directly. Otherwise, percent 68 // encode the character and write that out instead of the 69 // reserved character. 70 if (llvm::isAlnum(C) || 71 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 72 return std::string(&C, 1); 73 return "%" + llvm::toHex(StringRef(&C, 1)); 74 } 75 76 static std::string fileNameToURI(StringRef Filename) { 77 llvm::SmallString<32> Ret = StringRef("file://"); 78 79 // Get the root name to see if it has a URI authority. 80 StringRef Root = sys::path::root_name(Filename); 81 if (Root.startswith("//")) { 82 // There is an authority, so add it to the URI. 83 Ret += Root.drop_front(2).str(); 84 } else if (!Root.empty()) { 85 // There is no authority, so end the component and add the root to the URI. 86 Ret += Twine("/" + Root).str(); 87 } 88 89 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 90 assert(Iter != End && "Expected there to be a non-root path component."); 91 // Add the rest of the path components, encoding any reserved characters; 92 // we skip past the first path component, as it was handled it above. 93 std::for_each(++Iter, End, [&Ret](StringRef Component) { 94 // For reasons unknown to me, we may get a backslash with Windows native 95 // paths for the initial backslash following the drive component, which 96 // we need to ignore as a URI path part. 97 if (Component == "\\") 98 return; 99 100 // Add the separator between the previous path part and the one being 101 // currently processed. 102 Ret += "/"; 103 104 // URI encode the part. 105 for (char C : Component) { 106 Ret += percentEncodeURICharacter(C); 107 } 108 }); 109 110 return std::string(Ret); 111 } 112 113 static json::Object createArtifactLocation(const FileEntry &FE) { 114 return json::Object{{"uri", fileNameToURI(getFileName(FE))}}; 115 } 116 117 static json::Object createArtifact(const FileEntry &FE) { 118 return json::Object{{"location", createArtifactLocation(FE)}, 119 {"roles", json::Array{"resultFile"}}, 120 {"length", FE.getSize()}, 121 {"mimeType", "text/plain"}}; 122 } 123 124 static json::Object createArtifactLocation(const FileEntry &FE, 125 json::Array &Artifacts) { 126 std::string FileURI = fileNameToURI(getFileName(FE)); 127 128 // See if the Artifacts array contains this URI already. If it does not, 129 // create a new artifact object to add to the array. 130 auto I = llvm::find_if(Artifacts, [&](const json::Value &File) { 131 if (const json::Object *Obj = File.getAsObject()) { 132 if (const json::Object *FileLoc = Obj->getObject("location")) { 133 Optional<StringRef> URI = FileLoc->getString("uri"); 134 return URI && URI->equals(FileURI); 135 } 136 } 137 return false; 138 }); 139 140 // Calculate the index within the artifact array so it can be stored in 141 // the JSON object. 142 auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I)); 143 if (I == Artifacts.end()) 144 Artifacts.push_back(createArtifact(FE)); 145 146 return json::Object{{"uri", FileURI}, {"index", Index}}; 147 } 148 149 static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc, 150 unsigned int TokenLen = 0) { 151 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 152 153 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc); 154 assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) && 155 "position in file is before column number?"); 156 157 bool InvalidBuffer = false; 158 const MemoryBuffer *Buf = SM.getBuffer(LocInfo.first, &InvalidBuffer); 159 assert(!InvalidBuffer && "got an invalid buffer for the location's file"); 160 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 161 "token extends past end of buffer?"); 162 163 // Adjust the offset to be the start of the line, since we'll be counting 164 // Unicode characters from there until our column offset. 165 unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1); 166 unsigned int Ret = 1; 167 while (Off < (LocInfo.second + TokenLen)) { 168 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 169 Ret++; 170 } 171 172 return Ret; 173 } 174 175 static json::Object createTextRegion(const LangOptions &LO, SourceRange R, 176 const SourceManager &SM) { 177 json::Object Region{ 178 {"startLine", SM.getExpansionLineNumber(R.getBegin())}, 179 {"startColumn", adjustColumnPos(SM, R.getBegin())}, 180 }; 181 if (R.getBegin() == R.getEnd()) { 182 Region["endColumn"] = adjustColumnPos(SM, R.getBegin()); 183 } else { 184 Region["endLine"] = SM.getExpansionLineNumber(R.getEnd()); 185 Region["endColumn"] = adjustColumnPos( 186 SM, R.getEnd(), 187 Lexer::MeasureTokenLength(R.getEnd(), SM, LO)); 188 } 189 return Region; 190 } 191 192 static json::Object createPhysicalLocation(const LangOptions &LO, 193 SourceRange R, const FileEntry &FE, 194 const SourceManager &SMgr, 195 json::Array &Artifacts) { 196 return json::Object{ 197 {{"artifactLocation", createArtifactLocation(FE, Artifacts)}, 198 {"region", createTextRegion(LO, R, SMgr)}}}; 199 } 200 201 enum class Importance { Important, Essential, Unimportant }; 202 203 static StringRef importanceToStr(Importance I) { 204 switch (I) { 205 case Importance::Important: 206 return "important"; 207 case Importance::Essential: 208 return "essential"; 209 case Importance::Unimportant: 210 return "unimportant"; 211 } 212 llvm_unreachable("Fully covered switch is not so fully covered"); 213 } 214 215 static json::Object createThreadFlowLocation(json::Object &&Location, 216 Importance I) { 217 return json::Object{{"location", std::move(Location)}, 218 {"importance", importanceToStr(I)}}; 219 } 220 221 static json::Object createMessage(StringRef Text) { 222 return json::Object{{"text", Text.str()}}; 223 } 224 225 static json::Object createLocation(json::Object &&PhysicalLocation, 226 StringRef Message = "") { 227 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 228 if (!Message.empty()) 229 Ret.insert({"message", createMessage(Message)}); 230 return Ret; 231 } 232 233 static Importance calculateImportance(const PathDiagnosticPiece &Piece) { 234 switch (Piece.getKind()) { 235 case PathDiagnosticPiece::Call: 236 case PathDiagnosticPiece::Macro: 237 case PathDiagnosticPiece::Note: 238 case PathDiagnosticPiece::PopUp: 239 // FIXME: What should be reported here? 240 break; 241 case PathDiagnosticPiece::Event: 242 return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important 243 : Importance::Essential; 244 case PathDiagnosticPiece::ControlFlow: 245 return Importance::Unimportant; 246 } 247 return Importance::Unimportant; 248 } 249 250 static json::Object createThreadFlow(const LangOptions &LO, 251 const PathPieces &Pieces, 252 json::Array &Artifacts) { 253 const SourceManager &SMgr = Pieces.front()->getLocation().getManager(); 254 json::Array Locations; 255 for (const auto &Piece : Pieces) { 256 const PathDiagnosticLocation &P = Piece->getLocation(); 257 Locations.push_back(createThreadFlowLocation( 258 createLocation(createPhysicalLocation( 259 LO, P.asRange(), 260 *P.asLocation().getExpansionLoc().getFileEntry(), 261 SMgr, Artifacts), 262 Piece->getString()), 263 calculateImportance(*Piece))); 264 } 265 return json::Object{{"locations", std::move(Locations)}}; 266 } 267 268 static json::Object createCodeFlow(const LangOptions &LO, 269 const PathPieces &Pieces, 270 json::Array &Artifacts) { 271 return json::Object{ 272 {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}}; 273 } 274 275 static json::Object createResult(const LangOptions &LO, 276 const PathDiagnostic &Diag, 277 json::Array &Artifacts, 278 const StringMap<unsigned> &RuleMapping) { 279 const PathPieces &Path = Diag.path.flatten(false); 280 const SourceManager &SMgr = Path.front()->getLocation().getManager(); 281 282 auto Iter = RuleMapping.find(Diag.getCheckerName()); 283 assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?"); 284 285 return json::Object{ 286 {"message", createMessage(Diag.getVerboseDescription())}, 287 {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}}, 288 {"locations", 289 json::Array{createLocation(createPhysicalLocation( 290 LO, Diag.getLocation().asRange(), 291 *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(), 292 SMgr, Artifacts))}}, 293 {"ruleIndex", Iter->getValue()}, 294 {"ruleId", Diag.getCheckerName()}}; 295 } 296 297 static StringRef getRuleDescription(StringRef CheckName) { 298 return llvm::StringSwitch<StringRef>(CheckName) 299 #define GET_CHECKERS 300 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ 301 .Case(FULLNAME, HELPTEXT) 302 #include "clang/StaticAnalyzer/Checkers/Checkers.inc" 303 #undef CHECKER 304 #undef GET_CHECKERS 305 ; 306 } 307 308 static StringRef getRuleHelpURIStr(StringRef CheckName) { 309 return llvm::StringSwitch<StringRef>(CheckName) 310 #define GET_CHECKERS 311 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ 312 .Case(FULLNAME, DOC_URI) 313 #include "clang/StaticAnalyzer/Checkers/Checkers.inc" 314 #undef CHECKER 315 #undef GET_CHECKERS 316 ; 317 } 318 319 static json::Object createRule(const PathDiagnostic &Diag) { 320 StringRef CheckName = Diag.getCheckerName(); 321 json::Object Ret{ 322 {"fullDescription", createMessage(getRuleDescription(CheckName))}, 323 {"name", CheckName}, 324 {"id", CheckName}}; 325 326 std::string RuleURI = std::string(getRuleHelpURIStr(CheckName)); 327 if (!RuleURI.empty()) 328 Ret["helpUri"] = RuleURI; 329 330 return Ret; 331 } 332 333 static json::Array createRules(std::vector<const PathDiagnostic *> &Diags, 334 StringMap<unsigned> &RuleMapping) { 335 json::Array Rules; 336 llvm::StringSet<> Seen; 337 338 llvm::for_each(Diags, [&](const PathDiagnostic *D) { 339 StringRef RuleID = D->getCheckerName(); 340 std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID); 341 if (P.second) { 342 RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index. 343 Rules.push_back(createRule(*D)); 344 } 345 }); 346 347 return Rules; 348 } 349 350 static json::Object createTool(std::vector<const PathDiagnostic *> &Diags, 351 StringMap<unsigned> &RuleMapping) { 352 return json::Object{ 353 {"driver", json::Object{{"name", "clang"}, 354 {"fullName", "clang static analyzer"}, 355 {"language", "en-US"}, 356 {"version", getClangFullVersion()}, 357 {"rules", createRules(Diags, RuleMapping)}}}}; 358 } 359 360 static json::Object createRun(const LangOptions &LO, 361 std::vector<const PathDiagnostic *> &Diags) { 362 json::Array Results, Artifacts; 363 StringMap<unsigned> RuleMapping; 364 json::Object Tool = createTool(Diags, RuleMapping); 365 366 llvm::for_each(Diags, [&](const PathDiagnostic *D) { 367 Results.push_back(createResult(LO, *D, Artifacts, RuleMapping)); 368 }); 369 370 return json::Object{{"tool", std::move(Tool)}, 371 {"results", std::move(Results)}, 372 {"artifacts", std::move(Artifacts)}, 373 {"columnKind", "unicodeCodePoints"}}; 374 } 375 376 void SarifDiagnostics::FlushDiagnosticsImpl( 377 std::vector<const PathDiagnostic *> &Diags, FilesMade *) { 378 // We currently overwrite the file if it already exists. However, it may be 379 // useful to add a feature someday that allows the user to append a run to an 380 // existing SARIF file. One danger from that approach is that the size of the 381 // file can become large very quickly, so decoding into JSON to append a run 382 // may be an expensive operation. 383 std::error_code EC; 384 llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text); 385 if (EC) { 386 llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; 387 return; 388 } 389 json::Object Sarif{ 390 {"$schema", 391 "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"}, 392 {"version", "2.1.0"}, 393 {"runs", json::Array{createRun(LO, Diags)}}}; 394 OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif))); 395 } 396