12040b6dfSDaniel Thornburgh //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
22040b6dfSDaniel Thornburgh //
32040b6dfSDaniel Thornburgh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42040b6dfSDaniel Thornburgh // See https://llvm.org/LICENSE.txt for license information.
52040b6dfSDaniel Thornburgh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62040b6dfSDaniel Thornburgh //
72040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
82040b6dfSDaniel Thornburgh ///
92040b6dfSDaniel Thornburgh /// \file
102040b6dfSDaniel Thornburgh /// This file defines the log symbolizer markup data model and parser.
112040b6dfSDaniel Thornburgh ///
122040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
132040b6dfSDaniel Thornburgh
142040b6dfSDaniel Thornburgh #include "llvm/DebugInfo/Symbolize/Markup.h"
152040b6dfSDaniel Thornburgh
168bd078b5SDaniel Thornburgh #include "llvm/ADT/STLExtras.h"
172040b6dfSDaniel Thornburgh #include "llvm/ADT/StringExtras.h"
182040b6dfSDaniel Thornburgh
192040b6dfSDaniel Thornburgh namespace llvm {
202040b6dfSDaniel Thornburgh namespace symbolize {
212040b6dfSDaniel Thornburgh
222040b6dfSDaniel Thornburgh // Matches the following:
232040b6dfSDaniel Thornburgh // "\033[0m"
242040b6dfSDaniel Thornburgh // "\033[1m"
252040b6dfSDaniel Thornburgh // "\033[30m" -- "\033[37m"
262040b6dfSDaniel Thornburgh static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
272040b6dfSDaniel Thornburgh
MarkupParser(StringSet<> MultilineTags)288bd078b5SDaniel Thornburgh MarkupParser::MarkupParser(StringSet<> MultilineTags)
298bd078b5SDaniel Thornburgh : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
302040b6dfSDaniel Thornburgh
takeTo(StringRef Str,StringRef::iterator Pos)312040b6dfSDaniel Thornburgh static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
322040b6dfSDaniel Thornburgh return Str.take_front(Pos - Str.begin());
332040b6dfSDaniel Thornburgh }
advanceTo(StringRef & Str,StringRef::iterator Pos)342040b6dfSDaniel Thornburgh static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
352040b6dfSDaniel Thornburgh Str = Str.drop_front(Pos - Str.begin());
362040b6dfSDaniel Thornburgh }
372040b6dfSDaniel Thornburgh
parseLine(StringRef Line)382040b6dfSDaniel Thornburgh void MarkupParser::parseLine(StringRef Line) {
392040b6dfSDaniel Thornburgh Buffer.clear();
408bd078b5SDaniel Thornburgh NextIdx = 0;
418bd078b5SDaniel Thornburgh FinishedMultiline.clear();
428bd078b5SDaniel Thornburgh this->Line = Line;
438bd078b5SDaniel Thornburgh }
448bd078b5SDaniel Thornburgh
nextNode()458bd078b5SDaniel Thornburgh Optional<MarkupNode> MarkupParser::nextNode() {
468bd078b5SDaniel Thornburgh // Pull something out of the buffer if possible.
478bd078b5SDaniel Thornburgh if (!Buffer.empty()) {
488bd078b5SDaniel Thornburgh if (NextIdx < Buffer.size())
498bd078b5SDaniel Thornburgh return std::move(Buffer[NextIdx++]);
508bd078b5SDaniel Thornburgh NextIdx = 0;
518bd078b5SDaniel Thornburgh Buffer.clear();
528bd078b5SDaniel Thornburgh }
538bd078b5SDaniel Thornburgh
548bd078b5SDaniel Thornburgh // The buffer is empty, so parse the next bit of the line.
558bd078b5SDaniel Thornburgh
568bd078b5SDaniel Thornburgh if (Line.empty())
578bd078b5SDaniel Thornburgh return None;
588bd078b5SDaniel Thornburgh
598bd078b5SDaniel Thornburgh if (!InProgressMultiline.empty()) {
608bd078b5SDaniel Thornburgh if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
618bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, *MultilineEnd);
628bd078b5SDaniel Thornburgh assert(FinishedMultiline.empty() &&
638bd078b5SDaniel Thornburgh "At most one multi-line element can be finished at a time.");
648bd078b5SDaniel Thornburgh FinishedMultiline.swap(InProgressMultiline);
658bd078b5SDaniel Thornburgh // Parse the multi-line element as if it were contiguous.
668bd078b5SDaniel Thornburgh advanceTo(Line, MultilineEnd->end());
678bd078b5SDaniel Thornburgh return *parseElement(FinishedMultiline);
688bd078b5SDaniel Thornburgh }
698bd078b5SDaniel Thornburgh
708bd078b5SDaniel Thornburgh // The whole line is part of the multi-line element.
718bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, Line);
728bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size());
738bd078b5SDaniel Thornburgh return None;
748bd078b5SDaniel Thornburgh }
758bd078b5SDaniel Thornburgh
762040b6dfSDaniel Thornburgh // Find the first valid markup element, if any.
772040b6dfSDaniel Thornburgh if (Optional<MarkupNode> Element = parseElement(Line)) {
782040b6dfSDaniel Thornburgh parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
792040b6dfSDaniel Thornburgh Buffer.push_back(std::move(*Element));
802040b6dfSDaniel Thornburgh advanceTo(Line, Element->Text.end());
818bd078b5SDaniel Thornburgh return nextNode();
828bd078b5SDaniel Thornburgh }
838bd078b5SDaniel Thornburgh
848bd078b5SDaniel Thornburgh // Since there were no valid elements remaining, see if the line opens a
858bd078b5SDaniel Thornburgh // multi-line element.
868bd078b5SDaniel Thornburgh if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
878bd078b5SDaniel Thornburgh // Emit any text before the element.
888bd078b5SDaniel Thornburgh parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
898bd078b5SDaniel Thornburgh
908bd078b5SDaniel Thornburgh // Begin recording the multi-line element.
918bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, *MultilineBegin);
928bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size());
938bd078b5SDaniel Thornburgh return nextNode();
948bd078b5SDaniel Thornburgh }
958bd078b5SDaniel Thornburgh
962040b6dfSDaniel Thornburgh // The line doesn't contain any more markup elements, so emit it as text.
972040b6dfSDaniel Thornburgh parseTextOutsideMarkup(Line);
988bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size());
998bd078b5SDaniel Thornburgh return nextNode();
1008bd078b5SDaniel Thornburgh }
1018bd078b5SDaniel Thornburgh
flush()1028bd078b5SDaniel Thornburgh void MarkupParser::flush() {
103*17e4c217SDaniel Thornburgh Buffer.clear();
104*17e4c217SDaniel Thornburgh NextIdx = 0;
105*17e4c217SDaniel Thornburgh Line = {};
1068bd078b5SDaniel Thornburgh if (InProgressMultiline.empty())
1072040b6dfSDaniel Thornburgh return;
1088bd078b5SDaniel Thornburgh FinishedMultiline.swap(InProgressMultiline);
1098bd078b5SDaniel Thornburgh parseTextOutsideMarkup(FinishedMultiline);
1102040b6dfSDaniel Thornburgh }
1112040b6dfSDaniel Thornburgh
1122040b6dfSDaniel Thornburgh // Finds and returns the next valid markup element in the given line. Returns
1132040b6dfSDaniel Thornburgh // None if the line contains no valid elements.
parseElement(StringRef Line)1142040b6dfSDaniel Thornburgh Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
1152040b6dfSDaniel Thornburgh while (true) {
1162040b6dfSDaniel Thornburgh // Find next element using begin and end markers.
1172040b6dfSDaniel Thornburgh size_t BeginPos = Line.find("{{{");
1182040b6dfSDaniel Thornburgh if (BeginPos == StringRef::npos)
1192040b6dfSDaniel Thornburgh return None;
1202040b6dfSDaniel Thornburgh size_t EndPos = Line.find("}}}", BeginPos + 3);
1212040b6dfSDaniel Thornburgh if (EndPos == StringRef::npos)
1222040b6dfSDaniel Thornburgh return None;
1232040b6dfSDaniel Thornburgh EndPos += 3;
1242040b6dfSDaniel Thornburgh MarkupNode Element;
1252040b6dfSDaniel Thornburgh Element.Text = Line.slice(BeginPos, EndPos);
1262040b6dfSDaniel Thornburgh Line = Line.substr(EndPos);
1272040b6dfSDaniel Thornburgh
1282040b6dfSDaniel Thornburgh // Parse tag.
1292040b6dfSDaniel Thornburgh StringRef Content = Element.Text.drop_front(3).drop_back(3);
1302040b6dfSDaniel Thornburgh StringRef FieldsContent;
1312040b6dfSDaniel Thornburgh std::tie(Element.Tag, FieldsContent) = Content.split(':');
1322040b6dfSDaniel Thornburgh if (Element.Tag.empty())
1332040b6dfSDaniel Thornburgh continue;
1342040b6dfSDaniel Thornburgh
1352040b6dfSDaniel Thornburgh // Parse fields.
1362040b6dfSDaniel Thornburgh if (!FieldsContent.empty())
1372040b6dfSDaniel Thornburgh FieldsContent.split(Element.Fields, ":");
1382040b6dfSDaniel Thornburgh else if (Content.back() == ':')
1392040b6dfSDaniel Thornburgh Element.Fields.push_back(FieldsContent);
1402040b6dfSDaniel Thornburgh
1412040b6dfSDaniel Thornburgh return Element;
1422040b6dfSDaniel Thornburgh }
1432040b6dfSDaniel Thornburgh }
1442040b6dfSDaniel Thornburgh
textNode(StringRef Text)1452040b6dfSDaniel Thornburgh static MarkupNode textNode(StringRef Text) {
1462040b6dfSDaniel Thornburgh MarkupNode Node;
1472040b6dfSDaniel Thornburgh Node.Text = Text;
1482040b6dfSDaniel Thornburgh return Node;
1492040b6dfSDaniel Thornburgh }
1502040b6dfSDaniel Thornburgh
1512040b6dfSDaniel Thornburgh // Parses a region of text known to be outside any markup elements. Such text
1522040b6dfSDaniel Thornburgh // may still contain SGR control codes, so the region is further subdivided into
1532040b6dfSDaniel Thornburgh // control codes and true text regions.
parseTextOutsideMarkup(StringRef Text)1542040b6dfSDaniel Thornburgh void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
1552040b6dfSDaniel Thornburgh if (Text.empty())
1562040b6dfSDaniel Thornburgh return;
1572040b6dfSDaniel Thornburgh SmallVector<StringRef> Matches;
1582040b6dfSDaniel Thornburgh while (SGRSyntax.match(Text, &Matches)) {
1592040b6dfSDaniel Thornburgh // Emit any text before the SGR element.
1602040b6dfSDaniel Thornburgh if (Matches.begin()->begin() != Text.begin())
1612040b6dfSDaniel Thornburgh Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
1622040b6dfSDaniel Thornburgh
1632040b6dfSDaniel Thornburgh Buffer.push_back(textNode(*Matches.begin()));
1642040b6dfSDaniel Thornburgh advanceTo(Text, Matches.begin()->end());
1652040b6dfSDaniel Thornburgh }
1662040b6dfSDaniel Thornburgh if (!Text.empty())
1672040b6dfSDaniel Thornburgh Buffer.push_back(textNode(Text));
1682040b6dfSDaniel Thornburgh }
1692040b6dfSDaniel Thornburgh
1708bd078b5SDaniel Thornburgh // Given that a line doesn't contain any valid markup, see if it ends with the
1718bd078b5SDaniel Thornburgh // start of a multi-line element. If so, returns the beginning.
parseMultiLineBegin(StringRef Line)1728bd078b5SDaniel Thornburgh Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
1738bd078b5SDaniel Thornburgh // A multi-line begin marker must be the last one on the line.
1748bd078b5SDaniel Thornburgh size_t BeginPos = Line.rfind("{{{");
1758bd078b5SDaniel Thornburgh if (BeginPos == StringRef::npos)
1768bd078b5SDaniel Thornburgh return None;
1778bd078b5SDaniel Thornburgh size_t BeginTagPos = BeginPos + 3;
1788bd078b5SDaniel Thornburgh
1798bd078b5SDaniel Thornburgh // If there are any end markers afterwards, the begin marker cannot belong to
1808bd078b5SDaniel Thornburgh // a multi-line element.
1818bd078b5SDaniel Thornburgh size_t EndPos = Line.find("}}}", BeginTagPos);
1828bd078b5SDaniel Thornburgh if (EndPos != StringRef::npos)
1838bd078b5SDaniel Thornburgh return None;
1848bd078b5SDaniel Thornburgh
1858bd078b5SDaniel Thornburgh // Check whether the tag is registered multi-line.
1868bd078b5SDaniel Thornburgh size_t EndTagPos = Line.find(':', BeginTagPos);
1878bd078b5SDaniel Thornburgh if (EndTagPos == StringRef::npos)
1888bd078b5SDaniel Thornburgh return None;
1898bd078b5SDaniel Thornburgh StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
1908bd078b5SDaniel Thornburgh if (!MultilineTags.contains(Tag))
1918bd078b5SDaniel Thornburgh return None;
1928bd078b5SDaniel Thornburgh return Line.substr(BeginPos);
1938bd078b5SDaniel Thornburgh }
1948bd078b5SDaniel Thornburgh
1958bd078b5SDaniel Thornburgh // See if the line begins with the ending of an in-progress multi-line element.
1968bd078b5SDaniel Thornburgh // If so, return the ending.
parseMultiLineEnd(StringRef Line)1978bd078b5SDaniel Thornburgh Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
1988bd078b5SDaniel Thornburgh size_t EndPos = Line.find("}}}");
1998bd078b5SDaniel Thornburgh if (EndPos == StringRef::npos)
2008bd078b5SDaniel Thornburgh return None;
2018bd078b5SDaniel Thornburgh return Line.take_front(EndPos + 3);
2028bd078b5SDaniel Thornburgh }
2038bd078b5SDaniel Thornburgh
2042040b6dfSDaniel Thornburgh } // end namespace symbolize
2052040b6dfSDaniel Thornburgh } // end namespace llvm
206