1f678e45dSDimitry Andric //===-- RegularExpression.cpp -----------------------------------*- C++ -*-===//
2f678e45dSDimitry Andric //
3f678e45dSDimitry Andric // The LLVM Compiler Infrastructure
4f678e45dSDimitry Andric //
5f678e45dSDimitry Andric // This file is distributed under the University of Illinois Open Source
6f678e45dSDimitry Andric // License. See LICENSE.TXT for details.
7f678e45dSDimitry Andric //
8f678e45dSDimitry Andric //===----------------------------------------------------------------------===//
9f678e45dSDimitry Andric
10f678e45dSDimitry Andric #include "lldb/Utility/RegularExpression.h"
11f678e45dSDimitry Andric
12f678e45dSDimitry Andric #include "llvm/ADT/StringRef.h"
13f678e45dSDimitry Andric
14f678e45dSDimitry Andric #include <string>
15f678e45dSDimitry Andric
16f678e45dSDimitry Andric //----------------------------------------------------------------------
17*4ba319b5SDimitry Andric // Enable enhanced mode if it is available. This allows for things like \d for
18*4ba319b5SDimitry Andric // digit, \s for space, and many more, but it isn't available everywhere.
19f678e45dSDimitry Andric //----------------------------------------------------------------------
20f678e45dSDimitry Andric #if defined(REG_ENHANCED)
21f678e45dSDimitry Andric #define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED)
22f678e45dSDimitry Andric #else
23f678e45dSDimitry Andric #define DEFAULT_COMPILE_FLAGS (REG_EXTENDED)
24f678e45dSDimitry Andric #endif
25f678e45dSDimitry Andric
26f678e45dSDimitry Andric using namespace lldb_private;
27f678e45dSDimitry Andric
RegularExpression()28f678e45dSDimitry Andric RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
29f678e45dSDimitry Andric memset(&m_preg, 0, sizeof(m_preg));
30f678e45dSDimitry Andric }
31f678e45dSDimitry Andric
32f678e45dSDimitry Andric //----------------------------------------------------------------------
33*4ba319b5SDimitry Andric // Constructor that compiles "re" using "flags" and stores the resulting
34*4ba319b5SDimitry Andric // compiled regular expression into this object.
35f678e45dSDimitry Andric //----------------------------------------------------------------------
RegularExpression(llvm::StringRef str)36f678e45dSDimitry Andric RegularExpression::RegularExpression(llvm::StringRef str)
37f678e45dSDimitry Andric : m_re(), m_comp_err(1), m_preg() {
38f678e45dSDimitry Andric memset(&m_preg, 0, sizeof(m_preg));
39f678e45dSDimitry Andric Compile(str);
40f678e45dSDimitry Andric }
41f678e45dSDimitry Andric
RegularExpression(const RegularExpression & rhs)42f678e45dSDimitry Andric RegularExpression::RegularExpression(const RegularExpression &rhs) {
43f678e45dSDimitry Andric memset(&m_preg, 0, sizeof(m_preg));
44f678e45dSDimitry Andric Compile(rhs.GetText());
45f678e45dSDimitry Andric }
46f678e45dSDimitry Andric
47f678e45dSDimitry Andric const RegularExpression &RegularExpression::
operator =(const RegularExpression & rhs)48f678e45dSDimitry Andric operator=(const RegularExpression &rhs) {
49f678e45dSDimitry Andric if (&rhs != this)
50f678e45dSDimitry Andric Compile(rhs.GetText());
51f678e45dSDimitry Andric return *this;
52f678e45dSDimitry Andric }
53f678e45dSDimitry Andric
54f678e45dSDimitry Andric //----------------------------------------------------------------------
55f678e45dSDimitry Andric // Destructor
56f678e45dSDimitry Andric //
57*4ba319b5SDimitry Andric // Any previously compiled regular expression contained in this object will be
58*4ba319b5SDimitry Andric // freed.
59f678e45dSDimitry Andric //----------------------------------------------------------------------
~RegularExpression()60f678e45dSDimitry Andric RegularExpression::~RegularExpression() { Free(); }
61f678e45dSDimitry Andric
62f678e45dSDimitry Andric //----------------------------------------------------------------------
63*4ba319b5SDimitry Andric // Compile a regular expression using the supplied regular expression text and
64*4ba319b5SDimitry Andric // flags. The compiled regular expression lives in this object so that it can
65*4ba319b5SDimitry Andric // be readily used for regular expression matches. Execute() can be called
66*4ba319b5SDimitry Andric // after the regular expression is compiled. Any previously compiled regular
67f678e45dSDimitry Andric // expression contained in this object will be freed.
68f678e45dSDimitry Andric //
69f678e45dSDimitry Andric // RETURNS
70f678e45dSDimitry Andric // True if the regular expression compiles successfully, false
71f678e45dSDimitry Andric // otherwise.
72f678e45dSDimitry Andric //----------------------------------------------------------------------
Compile(llvm::StringRef str)73f678e45dSDimitry Andric bool RegularExpression::Compile(llvm::StringRef str) {
74f678e45dSDimitry Andric Free();
75f678e45dSDimitry Andric
76*4ba319b5SDimitry Andric // regcomp() on darwin does not recognize "" as a valid regular expression,
77*4ba319b5SDimitry Andric // so we substitute it with an equivalent non-empty one.
78f678e45dSDimitry Andric m_re = str.empty() ? "()" : str;
79f678e45dSDimitry Andric m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS);
80f678e45dSDimitry Andric return m_comp_err == 0;
81f678e45dSDimitry Andric }
82f678e45dSDimitry Andric
83f678e45dSDimitry Andric //----------------------------------------------------------------------
84*4ba319b5SDimitry Andric // Execute a regular expression match using the compiled regular expression
85*4ba319b5SDimitry Andric // that is already in this object against the match string "s". If any parens
86*4ba319b5SDimitry Andric // are used for regular expression matches "match_count" should indicate the
87*4ba319b5SDimitry Andric // number of regmatch_t values that are present in "match_ptr". The regular
88*4ba319b5SDimitry Andric // expression will be executed using the "execute_flags".
89f678e45dSDimitry Andric //---------------------------------------------------------------------
Execute(llvm::StringRef str,Match * match) const90f678e45dSDimitry Andric bool RegularExpression::Execute(llvm::StringRef str, Match *match) const {
91f678e45dSDimitry Andric int err = 1;
92f678e45dSDimitry Andric if (m_comp_err == 0) {
93f678e45dSDimitry Andric // Argument to regexec must be null-terminated.
94f678e45dSDimitry Andric std::string reg_str = str;
95f678e45dSDimitry Andric if (match) {
96f678e45dSDimitry Andric err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(),
97f678e45dSDimitry Andric match->GetData(), 0);
98f678e45dSDimitry Andric } else {
99f678e45dSDimitry Andric err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0);
100f678e45dSDimitry Andric }
101f678e45dSDimitry Andric }
102f678e45dSDimitry Andric
103f678e45dSDimitry Andric if (err != 0) {
104f678e45dSDimitry Andric // The regular expression didn't compile, so clear the matches
105f678e45dSDimitry Andric if (match)
106f678e45dSDimitry Andric match->Clear();
107f678e45dSDimitry Andric return false;
108f678e45dSDimitry Andric }
109f678e45dSDimitry Andric return true;
110f678e45dSDimitry Andric }
111f678e45dSDimitry Andric
GetMatchAtIndex(llvm::StringRef s,uint32_t idx,std::string & match_str) const112f678e45dSDimitry Andric bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
113f678e45dSDimitry Andric std::string &match_str) const {
114f678e45dSDimitry Andric llvm::StringRef match_str_ref;
115f678e45dSDimitry Andric if (GetMatchAtIndex(s, idx, match_str_ref)) {
116f678e45dSDimitry Andric match_str = match_str_ref.str();
117f678e45dSDimitry Andric return true;
118f678e45dSDimitry Andric }
119f678e45dSDimitry Andric return false;
120f678e45dSDimitry Andric }
121f678e45dSDimitry Andric
GetMatchAtIndex(llvm::StringRef s,uint32_t idx,llvm::StringRef & match_str) const122f678e45dSDimitry Andric bool RegularExpression::Match::GetMatchAtIndex(
123f678e45dSDimitry Andric llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const {
124f678e45dSDimitry Andric if (idx < m_matches.size()) {
125f678e45dSDimitry Andric if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1)
126f678e45dSDimitry Andric return false;
127f678e45dSDimitry Andric
128f678e45dSDimitry Andric if (m_matches[idx].rm_eo == m_matches[idx].rm_so) {
129f678e45dSDimitry Andric // Matched the empty string...
130f678e45dSDimitry Andric match_str = llvm::StringRef();
131f678e45dSDimitry Andric return true;
132f678e45dSDimitry Andric } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) {
133f678e45dSDimitry Andric match_str = s.substr(m_matches[idx].rm_so,
134f678e45dSDimitry Andric m_matches[idx].rm_eo - m_matches[idx].rm_so);
135f678e45dSDimitry Andric return true;
136f678e45dSDimitry Andric }
137f678e45dSDimitry Andric }
138f678e45dSDimitry Andric return false;
139f678e45dSDimitry Andric }
140f678e45dSDimitry Andric
GetMatchSpanningIndices(llvm::StringRef s,uint32_t idx1,uint32_t idx2,llvm::StringRef & match_str) const141f678e45dSDimitry Andric bool RegularExpression::Match::GetMatchSpanningIndices(
142f678e45dSDimitry Andric llvm::StringRef s, uint32_t idx1, uint32_t idx2,
143f678e45dSDimitry Andric llvm::StringRef &match_str) const {
144f678e45dSDimitry Andric if (idx1 < m_matches.size() && idx2 < m_matches.size()) {
145f678e45dSDimitry Andric if (m_matches[idx1].rm_so == m_matches[idx2].rm_eo) {
146f678e45dSDimitry Andric // Matched the empty string...
147f678e45dSDimitry Andric match_str = llvm::StringRef();
148f678e45dSDimitry Andric return true;
149f678e45dSDimitry Andric } else if (m_matches[idx1].rm_so < m_matches[idx2].rm_eo) {
150f678e45dSDimitry Andric match_str = s.substr(m_matches[idx1].rm_so,
151f678e45dSDimitry Andric m_matches[idx2].rm_eo - m_matches[idx1].rm_so);
152f678e45dSDimitry Andric return true;
153f678e45dSDimitry Andric }
154f678e45dSDimitry Andric }
155f678e45dSDimitry Andric return false;
156f678e45dSDimitry Andric }
157f678e45dSDimitry Andric
158f678e45dSDimitry Andric //----------------------------------------------------------------------
159*4ba319b5SDimitry Andric // Returns true if the regular expression compiled and is ready for execution.
160f678e45dSDimitry Andric //----------------------------------------------------------------------
IsValid() const161f678e45dSDimitry Andric bool RegularExpression::IsValid() const { return m_comp_err == 0; }
162f678e45dSDimitry Andric
163f678e45dSDimitry Andric //----------------------------------------------------------------------
164*4ba319b5SDimitry Andric // Returns the text that was used to compile the current regular expression.
165f678e45dSDimitry Andric //----------------------------------------------------------------------
GetText() const166f678e45dSDimitry Andric llvm::StringRef RegularExpression::GetText() const { return m_re; }
167f678e45dSDimitry Andric
168f678e45dSDimitry Andric //----------------------------------------------------------------------
169f678e45dSDimitry Andric // Free any contained compiled regular expressions.
170f678e45dSDimitry Andric //----------------------------------------------------------------------
Free()171f678e45dSDimitry Andric void RegularExpression::Free() {
172f678e45dSDimitry Andric if (m_comp_err == 0) {
173f678e45dSDimitry Andric m_re.clear();
174f678e45dSDimitry Andric regfree(&m_preg);
175f678e45dSDimitry Andric // Set a compile error since we no longer have a valid regex
176f678e45dSDimitry Andric m_comp_err = 1;
177f678e45dSDimitry Andric }
178f678e45dSDimitry Andric }
179f678e45dSDimitry Andric
GetErrorAsCString(char * err_str,size_t err_str_max_len) const180f678e45dSDimitry Andric size_t RegularExpression::GetErrorAsCString(char *err_str,
181f678e45dSDimitry Andric size_t err_str_max_len) const {
182f678e45dSDimitry Andric if (m_comp_err == 0) {
183f678e45dSDimitry Andric if (err_str && err_str_max_len)
184f678e45dSDimitry Andric *err_str = '\0';
185f678e45dSDimitry Andric return 0;
186f678e45dSDimitry Andric }
187f678e45dSDimitry Andric
188f678e45dSDimitry Andric return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len);
189f678e45dSDimitry Andric }
190f678e45dSDimitry Andric
operator <(const RegularExpression & rhs) const191f678e45dSDimitry Andric bool RegularExpression::operator<(const RegularExpression &rhs) const {
192f678e45dSDimitry Andric return (m_re < rhs.m_re);
193f678e45dSDimitry Andric }
194