1 //===- YAMLRemarkParser.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides utility methods used by clients that want to use the
10 // parser for remark diagnostics in LLVM.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "YAMLRemarkParser.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/Remarks/RemarkParser.h"
17 #include "llvm/Support/Endian.h"
18 
19 using namespace llvm;
20 using namespace llvm::remarks;
21 
22 char YAMLParseError::ID = 0;
23 
24 static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
25   assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
26   std::string &Message = *static_cast<std::string *>(Ctx);
27   assert(Message.empty() && "Expected an empty string.");
28   raw_string_ostream OS(Message);
29   Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
30              /*ShowKindLabels*/ true);
31   OS << '\n';
32   OS.flush();
33 }
34 
35 YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
36                                yaml::Stream &Stream, yaml::Node &Node) {
37   // 1) Set up a diagnostic handler to avoid errors being printed out to
38   // stderr.
39   // 2) Use the stream to print the error with the associated node.
40   // 3) The stream will use the source manager to print the error, which will
41   // call the diagnostic handler.
42   // 4) The diagnostic handler will stream the error directly into this object's
43   // Message member, which is used when logging is asked for.
44   auto OldDiagHandler = SM.getDiagHandler();
45   auto OldDiagCtx = SM.getDiagContext();
46   SM.setDiagHandler(handleDiagnostic, &Message);
47   Stream.printError(&Node, Twine(Msg) + Twine('\n'));
48   // Restore the old handlers.
49   SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
50 }
51 
52 static SourceMgr setupSM(std::string &LastErrorMessage) {
53   SourceMgr SM;
54   SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
55   return SM;
56 }
57 
58 // Parse the magic number. This function returns true if this represents remark
59 // metadata, false otherwise.
60 static Expected<bool> parseMagic(StringRef &Buf) {
61   if (!Buf.consume_front(remarks::Magic))
62     return false;
63 
64   if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1)))
65     return createStringError(std::errc::illegal_byte_sequence,
66                              "Expecting \\0 after magic number.");
67   return true;
68 }
69 
70 static Expected<uint64_t> parseVersion(StringRef &Buf) {
71   if (Buf.size() < sizeof(uint64_t))
72     return createStringError(std::errc::illegal_byte_sequence,
73                              "Expecting version number.");
74 
75   uint64_t Version =
76       support::endian::read<uint64_t, support::little, support::unaligned>(
77           Buf.data());
78   if (Version != remarks::CurrentRemarkVersion)
79     return createStringError(std::errc::illegal_byte_sequence,
80                              "Mismatching remark version. Got %" PRId64
81                              ", expected %" PRId64 ".",
82                              Version, remarks::CurrentRemarkVersion);
83   Buf = Buf.drop_front(sizeof(uint64_t));
84   return Version;
85 }
86 
87 static Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
88   if (Buf.size() < sizeof(uint64_t))
89     return createStringError(std::errc::illegal_byte_sequence,
90                              "Expecting string table size.");
91   uint64_t StrTabSize =
92       support::endian::read<uint64_t, support::little, support::unaligned>(
93           Buf.data());
94   Buf = Buf.drop_front(sizeof(uint64_t));
95   return StrTabSize;
96 }
97 
98 static Expected<ParsedStringTable> parseStrTab(StringRef &Buf,
99                                                uint64_t StrTabSize) {
100   if (Buf.size() < StrTabSize)
101     return createStringError(std::errc::illegal_byte_sequence,
102                              "Expecting string table.");
103 
104   // Attach the string table to the parser.
105   ParsedStringTable Result(StringRef(Buf.data(), StrTabSize));
106   Buf = Buf.drop_front(StrTabSize);
107   return Expected<ParsedStringTable>(std::move(Result));
108 }
109 
110 Expected<std::unique_ptr<YAMLRemarkParser>>
111 remarks::createYAMLParserFromMeta(StringRef Buf,
112                                   Optional<ParsedStringTable> StrTab) {
113   // We now have a magic number. The metadata has to be correct.
114   Expected<bool> isMeta = parseMagic(Buf);
115   if (!isMeta)
116     return isMeta.takeError();
117   // If it's not recognized as metadata, roll back.
118   std::unique_ptr<MemoryBuffer> SeparateBuf;
119   if (*isMeta) {
120     Expected<uint64_t> Version = parseVersion(Buf);
121     if (!Version)
122       return Version.takeError();
123 
124     Expected<uint64_t> StrTabSize = parseStrTabSize(Buf);
125     if (!StrTabSize)
126       return StrTabSize.takeError();
127 
128     // If the size of string table is not 0, try to build one.
129     if (*StrTabSize != 0) {
130       if (StrTab)
131         return createStringError(std::errc::illegal_byte_sequence,
132                                  "String table already provided.");
133       Expected<ParsedStringTable> MaybeStrTab = parseStrTab(Buf, *StrTabSize);
134       if (!MaybeStrTab)
135         return MaybeStrTab.takeError();
136       StrTab = std::move(*MaybeStrTab);
137     }
138     // If it starts with "---", there is no external file.
139     if (!Buf.startswith("---")) {
140       // At this point, we expect Buf to contain the external file path.
141       // Try to open the file and start parsing from there.
142       ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
143           MemoryBuffer::getFile(Buf);
144       if (std::error_code EC = BufferOrErr.getError())
145         return errorCodeToError(EC);
146 
147       // Keep the buffer alive.
148       SeparateBuf = std::move(*BufferOrErr);
149       Buf = SeparateBuf->getBuffer();
150     }
151   }
152 
153   std::unique_ptr<YAMLRemarkParser> Result =
154       StrTab
155           ? std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(*StrTab))
156           : std::make_unique<YAMLRemarkParser>(Buf);
157   if (SeparateBuf)
158     Result->SeparateBuf = std::move(SeparateBuf);
159   return std::move(Result);
160 }
161 
162 YAMLRemarkParser::YAMLRemarkParser(StringRef Buf)
163     : YAMLRemarkParser(Buf, None) {}
164 
165 YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
166                                    Optional<ParsedStringTable> StrTab)
167     : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(),
168       SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
169 
170 Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
171   return make_error<YAMLParseError>(Message, SM, Stream, Node);
172 }
173 
174 Error YAMLRemarkParser::error() {
175   if (LastErrorMessage.empty())
176     return Error::success();
177   Error E = make_error<YAMLParseError>(LastErrorMessage);
178   LastErrorMessage.clear();
179   return E;
180 }
181 
182 Expected<std::unique_ptr<Remark>>
183 YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
184   if (Error E = error())
185     return std::move(E);
186 
187   yaml::Node *YAMLRoot = RemarkEntry.getRoot();
188   if (!YAMLRoot) {
189     return createStringError(std::make_error_code(std::errc::invalid_argument),
190                              "not a valid YAML file.");
191   }
192 
193   auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
194   if (!Root)
195     return error("document root is not of mapping type.", *YAMLRoot);
196 
197   std::unique_ptr<Remark> Result = std::make_unique<Remark>();
198   Remark &TheRemark = *Result;
199 
200   // First, the type. It needs special handling since is not part of the
201   // key-value stream.
202   Expected<Type> T = parseType(*Root);
203   if (!T)
204     return T.takeError();
205   else
206     TheRemark.RemarkType = *T;
207 
208   // Then, parse the fields, one by one.
209   for (yaml::KeyValueNode &RemarkField : *Root) {
210     Expected<StringRef> MaybeKey = parseKey(RemarkField);
211     if (!MaybeKey)
212       return MaybeKey.takeError();
213     StringRef KeyName = *MaybeKey;
214 
215     if (KeyName == "Pass") {
216       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
217         TheRemark.PassName = *MaybeStr;
218       else
219         return MaybeStr.takeError();
220     } else if (KeyName == "Name") {
221       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
222         TheRemark.RemarkName = *MaybeStr;
223       else
224         return MaybeStr.takeError();
225     } else if (KeyName == "Function") {
226       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
227         TheRemark.FunctionName = *MaybeStr;
228       else
229         return MaybeStr.takeError();
230     } else if (KeyName == "Hotness") {
231       if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
232         TheRemark.Hotness = *MaybeU;
233       else
234         return MaybeU.takeError();
235     } else if (KeyName == "DebugLoc") {
236       if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
237         TheRemark.Loc = *MaybeLoc;
238       else
239         return MaybeLoc.takeError();
240     } else if (KeyName == "Args") {
241       auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
242       if (!Args)
243         return error("wrong value type for key.", RemarkField);
244 
245       for (yaml::Node &Arg : *Args) {
246         if (Expected<Argument> MaybeArg = parseArg(Arg))
247           TheRemark.Args.push_back(*MaybeArg);
248         else
249           return MaybeArg.takeError();
250       }
251     } else {
252       return error("unknown key.", RemarkField);
253     }
254   }
255 
256   // Check if any of the mandatory fields are missing.
257   if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
258       TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
259     return error("Type, Pass, Name or Function missing.",
260                  *RemarkEntry.getRoot());
261 
262   return std::move(Result);
263 }
264 
265 Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
266   auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
267                   .Case("!Passed", remarks::Type::Passed)
268                   .Case("!Missed", remarks::Type::Missed)
269                   .Case("!Analysis", remarks::Type::Analysis)
270                   .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
271                   .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
272                   .Case("!Failure", remarks::Type::Failure)
273                   .Default(remarks::Type::Unknown);
274   if (Type == remarks::Type::Unknown)
275     return error("expected a remark tag.", Node);
276   return Type;
277 }
278 
279 Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
280   if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
281     return Key->getRawValue();
282 
283   return error("key is not a string.", Node);
284 }
285 
286 Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
287   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
288   if (!Value)
289     return error("expected a value of scalar type.", Node);
290   StringRef Result = Value->getRawValue();
291 
292   if (Result.front() == '\'')
293     Result = Result.drop_front();
294 
295   if (Result.back() == '\'')
296     Result = Result.drop_back();
297 
298   return Result;
299 }
300 
301 Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
302   SmallVector<char, 4> Tmp;
303   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
304   if (!Value)
305     return error("expected a value of scalar type.", Node);
306   unsigned UnsignedValue = 0;
307   if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
308     return error("expected a value of integer type.", *Value);
309   return UnsignedValue;
310 }
311 
312 Expected<RemarkLocation>
313 YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
314   auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
315   if (!DebugLoc)
316     return error("expected a value of mapping type.", Node);
317 
318   Optional<StringRef> File;
319   Optional<unsigned> Line;
320   Optional<unsigned> Column;
321 
322   for (yaml::KeyValueNode &DLNode : *DebugLoc) {
323     Expected<StringRef> MaybeKey = parseKey(DLNode);
324     if (!MaybeKey)
325       return MaybeKey.takeError();
326     StringRef KeyName = *MaybeKey;
327 
328     if (KeyName == "File") {
329       if (Expected<StringRef> MaybeStr = parseStr(DLNode))
330         File = *MaybeStr;
331       else
332         return MaybeStr.takeError();
333     } else if (KeyName == "Column") {
334       if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
335         Column = *MaybeU;
336       else
337         return MaybeU.takeError();
338     } else if (KeyName == "Line") {
339       if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
340         Line = *MaybeU;
341       else
342         return MaybeU.takeError();
343     } else {
344       return error("unknown entry in DebugLoc map.", DLNode);
345     }
346   }
347 
348   // If any of the debug loc fields is missing, return an error.
349   if (!File || !Line || !Column)
350     return error("DebugLoc node incomplete.", Node);
351 
352   return RemarkLocation{*File, *Line, *Column};
353 }
354 
355 Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
356   auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
357   if (!ArgMap)
358     return error("expected a value of mapping type.", Node);
359 
360   Optional<StringRef> KeyStr;
361   Optional<StringRef> ValueStr;
362   Optional<RemarkLocation> Loc;
363 
364   for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
365     Expected<StringRef> MaybeKey = parseKey(ArgEntry);
366     if (!MaybeKey)
367       return MaybeKey.takeError();
368     StringRef KeyName = *MaybeKey;
369 
370     // Try to parse debug locs.
371     if (KeyName == "DebugLoc") {
372       // Can't have multiple DebugLoc entries per argument.
373       if (Loc)
374         return error("only one DebugLoc entry is allowed per argument.",
375                      ArgEntry);
376 
377       if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
378         Loc = *MaybeLoc;
379         continue;
380       } else
381         return MaybeLoc.takeError();
382     }
383 
384     // If we already have a string, error out.
385     if (ValueStr)
386       return error("only one string entry is allowed per argument.", ArgEntry);
387 
388     // Try to parse the value.
389     if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
390       ValueStr = *MaybeStr;
391     else
392       return MaybeStr.takeError();
393 
394     // Keep the key from the string.
395     KeyStr = KeyName;
396   }
397 
398   if (!KeyStr)
399     return error("argument key is missing.", *ArgMap);
400   if (!ValueStr)
401     return error("argument value is missing.", *ArgMap);
402 
403   return Argument{*KeyStr, *ValueStr, Loc};
404 }
405 
406 Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
407   if (YAMLIt == Stream.end())
408     return make_error<EndOfFileError>();
409 
410   Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
411   if (!MaybeResult) {
412     // Avoid garbage input, set the iterator to the end.
413     YAMLIt = Stream.end();
414     return MaybeResult.takeError();
415   }
416 
417   ++YAMLIt;
418 
419   return std::move(*MaybeResult);
420 }
421 
422 Expected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
423   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
424   if (!Value)
425     return error("expected a value of scalar type.", Node);
426   StringRef Result;
427   // If we have a string table, parse it as an unsigned.
428   unsigned StrID = 0;
429   if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
430     StrID = *MaybeStrID;
431   else
432     return MaybeStrID.takeError();
433 
434   if (Expected<StringRef> Str = (*StrTab)[StrID])
435     Result = *Str;
436   else
437     return Str.takeError();
438 
439   if (Result.front() == '\'')
440     Result = Result.drop_front();
441 
442   if (Result.back() == '\'')
443     Result = Result.drop_back();
444 
445   return Result;
446 }
447