1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the log symbolizer markup data model and parser.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/DebugInfo/Symbolize/Markup.h"
15 
16 #include "llvm/ADT/StringExtras.h"
17 
18 namespace llvm {
19 namespace symbolize {
20 
21 // Matches the following:
22 //   "\033[0m"
23 //   "\033[1m"
24 //   "\033[30m" -- "\033[37m"
25 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
26 
27 MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {}
28 
29 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
30   return Str.take_front(Pos - Str.begin());
31 }
32 static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
33   Str = Str.drop_front(Pos - Str.begin());
34 }
35 
36 void MarkupParser::parseLine(StringRef Line) {
37   Buffer.clear();
38   while (!Line.empty()) {
39     // Find the first valid markup element, if any.
40     if (Optional<MarkupNode> Element = parseElement(Line)) {
41       parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
42       Buffer.push_back(std::move(*Element));
43       advanceTo(Line, Element->Text.end());
44     } else {
45       // The line doesn't contain any more markup elements, so emit it as text.
46       parseTextOutsideMarkup(Line);
47       return;
48     }
49   }
50 }
51 
52 // Finds and returns the next valid markup element in the given line. Returns
53 // None if the line contains no valid elements.
54 Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
55   while (true) {
56     // Find next element using begin and end markers.
57     size_t BeginPos = Line.find("{{{");
58     if (BeginPos == StringRef::npos)
59       return None;
60     size_t EndPos = Line.find("}}}", BeginPos + 3);
61     if (EndPos == StringRef::npos)
62       return None;
63     EndPos += 3;
64     MarkupNode Element;
65     Element.Text = Line.slice(BeginPos, EndPos);
66     Line = Line.substr(EndPos);
67 
68     // Parse tag.
69     StringRef Content = Element.Text.drop_front(3).drop_back(3);
70     StringRef FieldsContent;
71     std::tie(Element.Tag, FieldsContent) = Content.split(':');
72     if (Element.Tag.empty())
73       continue;
74 
75     // Parse fields.
76     if (!FieldsContent.empty())
77       FieldsContent.split(Element.Fields, ":");
78     else if (Content.back() == ':')
79       Element.Fields.push_back(FieldsContent);
80 
81     return Element;
82   }
83 }
84 
85 static MarkupNode textNode(StringRef Text) {
86   MarkupNode Node;
87   Node.Text = Text;
88   return Node;
89 }
90 
91 // Parses a region of text known to be outside any markup elements. Such text
92 // may still contain SGR control codes, so the region is further subdivided into
93 // control codes and true text regions.
94 void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
95   if (Text.empty())
96     return;
97   SmallVector<StringRef> Matches;
98   while (SGRSyntax.match(Text, &Matches)) {
99     // Emit any text before the SGR element.
100     if (Matches.begin()->begin() != Text.begin())
101       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
102 
103     Buffer.push_back(textNode(*Matches.begin()));
104     advanceTo(Text, Matches.begin()->end());
105   }
106   if (!Text.empty())
107     Buffer.push_back(textNode(Text));
108 }
109 
110 } // end namespace symbolize
111 } // end namespace llvm
112