1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the log symbolizer markup data model and parser. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/DebugInfo/Symbolize/Markup.h" 15 16 #include "llvm/ADT/StringExtras.h" 17 18 namespace llvm { 19 namespace symbolize { 20 21 // Matches the following: 22 // "\033[0m" 23 // "\033[1m" 24 // "\033[30m" -- "\033[37m" 25 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 26 27 MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {} 28 29 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 30 return Str.take_front(Pos - Str.begin()); 31 } 32 static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 33 Str = Str.drop_front(Pos - Str.begin()); 34 } 35 36 void MarkupParser::parseLine(StringRef Line) { 37 Buffer.clear(); 38 while (!Line.empty()) { 39 // Find the first valid markup element, if any. 40 if (Optional<MarkupNode> Element = parseElement(Line)) { 41 parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 42 Buffer.push_back(std::move(*Element)); 43 advanceTo(Line, Element->Text.end()); 44 } else { 45 // The line doesn't contain any more markup elements, so emit it as text. 46 parseTextOutsideMarkup(Line); 47 return; 48 } 49 } 50 } 51 52 // Finds and returns the next valid markup element in the given line. Returns 53 // None if the line contains no valid elements. 54 Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 55 while (true) { 56 // Find next element using begin and end markers. 57 size_t BeginPos = Line.find("{{{"); 58 if (BeginPos == StringRef::npos) 59 return None; 60 size_t EndPos = Line.find("}}}", BeginPos + 3); 61 if (EndPos == StringRef::npos) 62 return None; 63 EndPos += 3; 64 MarkupNode Element; 65 Element.Text = Line.slice(BeginPos, EndPos); 66 Line = Line.substr(EndPos); 67 68 // Parse tag. 69 StringRef Content = Element.Text.drop_front(3).drop_back(3); 70 StringRef FieldsContent; 71 std::tie(Element.Tag, FieldsContent) = Content.split(':'); 72 if (Element.Tag.empty()) 73 continue; 74 75 // Parse fields. 76 if (!FieldsContent.empty()) 77 FieldsContent.split(Element.Fields, ":"); 78 else if (Content.back() == ':') 79 Element.Fields.push_back(FieldsContent); 80 81 return Element; 82 } 83 } 84 85 static MarkupNode textNode(StringRef Text) { 86 MarkupNode Node; 87 Node.Text = Text; 88 return Node; 89 } 90 91 // Parses a region of text known to be outside any markup elements. Such text 92 // may still contain SGR control codes, so the region is further subdivided into 93 // control codes and true text regions. 94 void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 95 if (Text.empty()) 96 return; 97 SmallVector<StringRef> Matches; 98 while (SGRSyntax.match(Text, &Matches)) { 99 // Emit any text before the SGR element. 100 if (Matches.begin()->begin() != Text.begin()) 101 Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 102 103 Buffer.push_back(textNode(*Matches.begin())); 104 advanceTo(Text, Matches.begin()->end()); 105 } 106 if (!Text.empty()) 107 Buffer.push_back(textNode(Text)); 108 } 109 110 } // end namespace symbolize 111 } // end namespace llvm 112