12040b6dfSDaniel Thornburgh //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
22040b6dfSDaniel Thornburgh //
32040b6dfSDaniel Thornburgh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42040b6dfSDaniel Thornburgh // See https://llvm.org/LICENSE.txt for license information.
52040b6dfSDaniel Thornburgh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62040b6dfSDaniel Thornburgh //
72040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
82040b6dfSDaniel Thornburgh ///
92040b6dfSDaniel Thornburgh /// \file
102040b6dfSDaniel Thornburgh /// This file defines the log symbolizer markup data model and parser.
112040b6dfSDaniel Thornburgh ///
122040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
132040b6dfSDaniel Thornburgh 
142040b6dfSDaniel Thornburgh #include "llvm/DebugInfo/Symbolize/Markup.h"
152040b6dfSDaniel Thornburgh 
168bd078b5SDaniel Thornburgh #include "llvm/ADT/STLExtras.h"
172040b6dfSDaniel Thornburgh #include "llvm/ADT/StringExtras.h"
182040b6dfSDaniel Thornburgh 
192040b6dfSDaniel Thornburgh namespace llvm {
202040b6dfSDaniel Thornburgh namespace symbolize {
212040b6dfSDaniel Thornburgh 
222040b6dfSDaniel Thornburgh // Matches the following:
232040b6dfSDaniel Thornburgh //   "\033[0m"
242040b6dfSDaniel Thornburgh //   "\033[1m"
252040b6dfSDaniel Thornburgh //   "\033[30m" -- "\033[37m"
262040b6dfSDaniel Thornburgh static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
272040b6dfSDaniel Thornburgh 
MarkupParser(StringSet<> MultilineTags)288bd078b5SDaniel Thornburgh MarkupParser::MarkupParser(StringSet<> MultilineTags)
298bd078b5SDaniel Thornburgh     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
302040b6dfSDaniel Thornburgh 
takeTo(StringRef Str,StringRef::iterator Pos)312040b6dfSDaniel Thornburgh static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
322040b6dfSDaniel Thornburgh   return Str.take_front(Pos - Str.begin());
332040b6dfSDaniel Thornburgh }
advanceTo(StringRef & Str,StringRef::iterator Pos)342040b6dfSDaniel Thornburgh static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
352040b6dfSDaniel Thornburgh   Str = Str.drop_front(Pos - Str.begin());
362040b6dfSDaniel Thornburgh }
372040b6dfSDaniel Thornburgh 
parseLine(StringRef Line)382040b6dfSDaniel Thornburgh void MarkupParser::parseLine(StringRef Line) {
392040b6dfSDaniel Thornburgh   Buffer.clear();
408bd078b5SDaniel Thornburgh   NextIdx = 0;
418bd078b5SDaniel Thornburgh   FinishedMultiline.clear();
428bd078b5SDaniel Thornburgh   this->Line = Line;
438bd078b5SDaniel Thornburgh }
448bd078b5SDaniel Thornburgh 
nextNode()458bd078b5SDaniel Thornburgh Optional<MarkupNode> MarkupParser::nextNode() {
468bd078b5SDaniel Thornburgh   // Pull something out of the buffer if possible.
478bd078b5SDaniel Thornburgh   if (!Buffer.empty()) {
488bd078b5SDaniel Thornburgh     if (NextIdx < Buffer.size())
498bd078b5SDaniel Thornburgh       return std::move(Buffer[NextIdx++]);
508bd078b5SDaniel Thornburgh     NextIdx = 0;
518bd078b5SDaniel Thornburgh     Buffer.clear();
528bd078b5SDaniel Thornburgh   }
538bd078b5SDaniel Thornburgh 
548bd078b5SDaniel Thornburgh   // The buffer is empty, so parse the next bit of the line.
558bd078b5SDaniel Thornburgh 
568bd078b5SDaniel Thornburgh   if (Line.empty())
578bd078b5SDaniel Thornburgh     return None;
588bd078b5SDaniel Thornburgh 
598bd078b5SDaniel Thornburgh   if (!InProgressMultiline.empty()) {
608bd078b5SDaniel Thornburgh     if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
618bd078b5SDaniel Thornburgh       llvm::append_range(InProgressMultiline, *MultilineEnd);
628bd078b5SDaniel Thornburgh       assert(FinishedMultiline.empty() &&
638bd078b5SDaniel Thornburgh              "At most one multi-line element can be finished at a time.");
648bd078b5SDaniel Thornburgh       FinishedMultiline.swap(InProgressMultiline);
658bd078b5SDaniel Thornburgh       // Parse the multi-line element as if it were contiguous.
668bd078b5SDaniel Thornburgh       advanceTo(Line, MultilineEnd->end());
678bd078b5SDaniel Thornburgh       return *parseElement(FinishedMultiline);
688bd078b5SDaniel Thornburgh     }
698bd078b5SDaniel Thornburgh 
708bd078b5SDaniel Thornburgh     // The whole line is part of the multi-line element.
718bd078b5SDaniel Thornburgh     llvm::append_range(InProgressMultiline, Line);
728bd078b5SDaniel Thornburgh     Line = Line.drop_front(Line.size());
738bd078b5SDaniel Thornburgh     return None;
748bd078b5SDaniel Thornburgh   }
758bd078b5SDaniel Thornburgh 
762040b6dfSDaniel Thornburgh   // Find the first valid markup element, if any.
772040b6dfSDaniel Thornburgh   if (Optional<MarkupNode> Element = parseElement(Line)) {
782040b6dfSDaniel Thornburgh     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
792040b6dfSDaniel Thornburgh     Buffer.push_back(std::move(*Element));
802040b6dfSDaniel Thornburgh     advanceTo(Line, Element->Text.end());
818bd078b5SDaniel Thornburgh     return nextNode();
828bd078b5SDaniel Thornburgh   }
838bd078b5SDaniel Thornburgh 
848bd078b5SDaniel Thornburgh   // Since there were no valid elements remaining, see if the line opens a
858bd078b5SDaniel Thornburgh   // multi-line element.
868bd078b5SDaniel Thornburgh   if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
878bd078b5SDaniel Thornburgh     // Emit any text before the element.
888bd078b5SDaniel Thornburgh     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
898bd078b5SDaniel Thornburgh 
908bd078b5SDaniel Thornburgh     // Begin recording the multi-line element.
918bd078b5SDaniel Thornburgh     llvm::append_range(InProgressMultiline, *MultilineBegin);
928bd078b5SDaniel Thornburgh     Line = Line.drop_front(Line.size());
938bd078b5SDaniel Thornburgh     return nextNode();
948bd078b5SDaniel Thornburgh   }
958bd078b5SDaniel Thornburgh 
962040b6dfSDaniel Thornburgh   // The line doesn't contain any more markup elements, so emit it as text.
972040b6dfSDaniel Thornburgh   parseTextOutsideMarkup(Line);
988bd078b5SDaniel Thornburgh   Line = Line.drop_front(Line.size());
998bd078b5SDaniel Thornburgh   return nextNode();
1008bd078b5SDaniel Thornburgh }
1018bd078b5SDaniel Thornburgh 
flush()1028bd078b5SDaniel Thornburgh void MarkupParser::flush() {
103*17e4c217SDaniel Thornburgh   Buffer.clear();
104*17e4c217SDaniel Thornburgh   NextIdx = 0;
105*17e4c217SDaniel Thornburgh   Line = {};
1068bd078b5SDaniel Thornburgh   if (InProgressMultiline.empty())
1072040b6dfSDaniel Thornburgh     return;
1088bd078b5SDaniel Thornburgh   FinishedMultiline.swap(InProgressMultiline);
1098bd078b5SDaniel Thornburgh   parseTextOutsideMarkup(FinishedMultiline);
1102040b6dfSDaniel Thornburgh }
1112040b6dfSDaniel Thornburgh 
1122040b6dfSDaniel Thornburgh // Finds and returns the next valid markup element in the given line. Returns
1132040b6dfSDaniel Thornburgh // None if the line contains no valid elements.
parseElement(StringRef Line)1142040b6dfSDaniel Thornburgh Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
1152040b6dfSDaniel Thornburgh   while (true) {
1162040b6dfSDaniel Thornburgh     // Find next element using begin and end markers.
1172040b6dfSDaniel Thornburgh     size_t BeginPos = Line.find("{{{");
1182040b6dfSDaniel Thornburgh     if (BeginPos == StringRef::npos)
1192040b6dfSDaniel Thornburgh       return None;
1202040b6dfSDaniel Thornburgh     size_t EndPos = Line.find("}}}", BeginPos + 3);
1212040b6dfSDaniel Thornburgh     if (EndPos == StringRef::npos)
1222040b6dfSDaniel Thornburgh       return None;
1232040b6dfSDaniel Thornburgh     EndPos += 3;
1242040b6dfSDaniel Thornburgh     MarkupNode Element;
1252040b6dfSDaniel Thornburgh     Element.Text = Line.slice(BeginPos, EndPos);
1262040b6dfSDaniel Thornburgh     Line = Line.substr(EndPos);
1272040b6dfSDaniel Thornburgh 
1282040b6dfSDaniel Thornburgh     // Parse tag.
1292040b6dfSDaniel Thornburgh     StringRef Content = Element.Text.drop_front(3).drop_back(3);
1302040b6dfSDaniel Thornburgh     StringRef FieldsContent;
1312040b6dfSDaniel Thornburgh     std::tie(Element.Tag, FieldsContent) = Content.split(':');
1322040b6dfSDaniel Thornburgh     if (Element.Tag.empty())
1332040b6dfSDaniel Thornburgh       continue;
1342040b6dfSDaniel Thornburgh 
1352040b6dfSDaniel Thornburgh     // Parse fields.
1362040b6dfSDaniel Thornburgh     if (!FieldsContent.empty())
1372040b6dfSDaniel Thornburgh       FieldsContent.split(Element.Fields, ":");
1382040b6dfSDaniel Thornburgh     else if (Content.back() == ':')
1392040b6dfSDaniel Thornburgh       Element.Fields.push_back(FieldsContent);
1402040b6dfSDaniel Thornburgh 
1412040b6dfSDaniel Thornburgh     return Element;
1422040b6dfSDaniel Thornburgh   }
1432040b6dfSDaniel Thornburgh }
1442040b6dfSDaniel Thornburgh 
textNode(StringRef Text)1452040b6dfSDaniel Thornburgh static MarkupNode textNode(StringRef Text) {
1462040b6dfSDaniel Thornburgh   MarkupNode Node;
1472040b6dfSDaniel Thornburgh   Node.Text = Text;
1482040b6dfSDaniel Thornburgh   return Node;
1492040b6dfSDaniel Thornburgh }
1502040b6dfSDaniel Thornburgh 
1512040b6dfSDaniel Thornburgh // Parses a region of text known to be outside any markup elements. Such text
1522040b6dfSDaniel Thornburgh // may still contain SGR control codes, so the region is further subdivided into
1532040b6dfSDaniel Thornburgh // control codes and true text regions.
parseTextOutsideMarkup(StringRef Text)1542040b6dfSDaniel Thornburgh void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
1552040b6dfSDaniel Thornburgh   if (Text.empty())
1562040b6dfSDaniel Thornburgh     return;
1572040b6dfSDaniel Thornburgh   SmallVector<StringRef> Matches;
1582040b6dfSDaniel Thornburgh   while (SGRSyntax.match(Text, &Matches)) {
1592040b6dfSDaniel Thornburgh     // Emit any text before the SGR element.
1602040b6dfSDaniel Thornburgh     if (Matches.begin()->begin() != Text.begin())
1612040b6dfSDaniel Thornburgh       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
1622040b6dfSDaniel Thornburgh 
1632040b6dfSDaniel Thornburgh     Buffer.push_back(textNode(*Matches.begin()));
1642040b6dfSDaniel Thornburgh     advanceTo(Text, Matches.begin()->end());
1652040b6dfSDaniel Thornburgh   }
1662040b6dfSDaniel Thornburgh   if (!Text.empty())
1672040b6dfSDaniel Thornburgh     Buffer.push_back(textNode(Text));
1682040b6dfSDaniel Thornburgh }
1692040b6dfSDaniel Thornburgh 
1708bd078b5SDaniel Thornburgh // Given that a line doesn't contain any valid markup, see if it ends with the
1718bd078b5SDaniel Thornburgh // start of a multi-line element. If so, returns the beginning.
parseMultiLineBegin(StringRef Line)1728bd078b5SDaniel Thornburgh Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
1738bd078b5SDaniel Thornburgh   // A multi-line begin marker must be the last one on the line.
1748bd078b5SDaniel Thornburgh   size_t BeginPos = Line.rfind("{{{");
1758bd078b5SDaniel Thornburgh   if (BeginPos == StringRef::npos)
1768bd078b5SDaniel Thornburgh     return None;
1778bd078b5SDaniel Thornburgh   size_t BeginTagPos = BeginPos + 3;
1788bd078b5SDaniel Thornburgh 
1798bd078b5SDaniel Thornburgh   // If there are any end markers afterwards, the begin marker cannot belong to
1808bd078b5SDaniel Thornburgh   // a multi-line element.
1818bd078b5SDaniel Thornburgh   size_t EndPos = Line.find("}}}", BeginTagPos);
1828bd078b5SDaniel Thornburgh   if (EndPos != StringRef::npos)
1838bd078b5SDaniel Thornburgh     return None;
1848bd078b5SDaniel Thornburgh 
1858bd078b5SDaniel Thornburgh   // Check whether the tag is registered multi-line.
1868bd078b5SDaniel Thornburgh   size_t EndTagPos = Line.find(':', BeginTagPos);
1878bd078b5SDaniel Thornburgh   if (EndTagPos == StringRef::npos)
1888bd078b5SDaniel Thornburgh     return None;
1898bd078b5SDaniel Thornburgh   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
1908bd078b5SDaniel Thornburgh   if (!MultilineTags.contains(Tag))
1918bd078b5SDaniel Thornburgh     return None;
1928bd078b5SDaniel Thornburgh   return Line.substr(BeginPos);
1938bd078b5SDaniel Thornburgh }
1948bd078b5SDaniel Thornburgh 
1958bd078b5SDaniel Thornburgh // See if the line begins with the ending of an in-progress multi-line element.
1968bd078b5SDaniel Thornburgh // If so, return the ending.
parseMultiLineEnd(StringRef Line)1978bd078b5SDaniel Thornburgh Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
1988bd078b5SDaniel Thornburgh   size_t EndPos = Line.find("}}}");
1998bd078b5SDaniel Thornburgh   if (EndPos == StringRef::npos)
2008bd078b5SDaniel Thornburgh     return None;
2018bd078b5SDaniel Thornburgh   return Line.take_front(EndPos + 3);
2028bd078b5SDaniel Thornburgh }
2038bd078b5SDaniel Thornburgh 
2042040b6dfSDaniel Thornburgh } // end namespace symbolize
2052040b6dfSDaniel Thornburgh } // end namespace llvm
206