1 //===-- RegularExpression.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/RegularExpression.h"
10 
11 #include "llvm/ADT/StringRef.h"
12 
13 #include <string>
14 
15 //----------------------------------------------------------------------
16 // Enable enhanced mode if it is available. This allows for things like \d for
17 // digit, \s for space, and many more, but it isn't available everywhere.
18 //----------------------------------------------------------------------
19 #if defined(REG_ENHANCED)
20 #define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED)
21 #else
22 #define DEFAULT_COMPILE_FLAGS (REG_EXTENDED)
23 #endif
24 
25 using namespace lldb_private;
26 
27 RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
28   memset(&m_preg, 0, sizeof(m_preg));
29 }
30 
31 //----------------------------------------------------------------------
32 // Constructor that compiles "re" using "flags" and stores the resulting
33 // compiled regular expression into this object.
34 //----------------------------------------------------------------------
35 RegularExpression::RegularExpression(llvm::StringRef str)
36     : m_re(), m_comp_err(1), m_preg() {
37   memset(&m_preg, 0, sizeof(m_preg));
38   Compile(str);
39 }
40 
41 RegularExpression::RegularExpression(const RegularExpression &rhs) {
42   memset(&m_preg, 0, sizeof(m_preg));
43   Compile(rhs.GetText());
44 }
45 
46 const RegularExpression &RegularExpression::
47 operator=(const RegularExpression &rhs) {
48   if (&rhs != this)
49     Compile(rhs.GetText());
50   return *this;
51 }
52 
53 //----------------------------------------------------------------------
54 // Destructor
55 //
56 // Any previously compiled regular expression contained in this object will be
57 // freed.
58 //----------------------------------------------------------------------
59 RegularExpression::~RegularExpression() { Free(); }
60 
61 //----------------------------------------------------------------------
62 // Compile a regular expression using the supplied regular expression text and
63 // flags. The compiled regular expression lives in this object so that it can
64 // be readily used for regular expression matches. Execute() can be called
65 // after the regular expression is compiled. Any previously compiled regular
66 // expression contained in this object will be freed.
67 //
68 // RETURNS
69 //  True if the regular expression compiles successfully, false
70 //  otherwise.
71 //----------------------------------------------------------------------
72 bool RegularExpression::Compile(llvm::StringRef str) {
73   Free();
74 
75   // regcomp() on darwin does not recognize "" as a valid regular expression,
76   // so we substitute it with an equivalent non-empty one.
77   m_re = str.empty() ? "()" : str;
78   m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS);
79   return m_comp_err == 0;
80 }
81 
82 //----------------------------------------------------------------------
83 // Execute a regular expression match using the compiled regular expression
84 // that is already in this object against the match string "s". If any parens
85 // are used for regular expression matches "match_count" should indicate the
86 // number of regmatch_t values that are present in "match_ptr". The regular
87 // expression will be executed using the "execute_flags".
88 //---------------------------------------------------------------------
89 bool RegularExpression::Execute(llvm::StringRef str, Match *match) const {
90   int err = 1;
91   if (m_comp_err == 0) {
92     // Argument to regexec must be null-terminated.
93     std::string reg_str = str;
94     if (match) {
95       err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(),
96                       match->GetData(), 0);
97     } else {
98       err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0);
99     }
100   }
101 
102   if (err != 0) {
103     // The regular expression didn't compile, so clear the matches
104     if (match)
105       match->Clear();
106     return false;
107   }
108   return true;
109 }
110 
111 bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
112                                                std::string &match_str) const {
113   llvm::StringRef match_str_ref;
114   if (GetMatchAtIndex(s, idx, match_str_ref)) {
115     match_str = match_str_ref.str();
116     return true;
117   }
118   return false;
119 }
120 
121 bool RegularExpression::Match::GetMatchAtIndex(
122     llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const {
123   if (idx < m_matches.size()) {
124     if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1)
125       return false;
126 
127     if (m_matches[idx].rm_eo == m_matches[idx].rm_so) {
128       // Matched the empty string...
129       match_str = llvm::StringRef();
130       return true;
131     } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) {
132       match_str = s.substr(m_matches[idx].rm_so,
133                            m_matches[idx].rm_eo - m_matches[idx].rm_so);
134       return true;
135     }
136   }
137   return false;
138 }
139 
140 bool RegularExpression::Match::GetMatchSpanningIndices(
141     llvm::StringRef s, uint32_t idx1, uint32_t idx2,
142     llvm::StringRef &match_str) const {
143   if (idx1 < m_matches.size() && idx2 < m_matches.size()) {
144     if (m_matches[idx1].rm_so == m_matches[idx2].rm_eo) {
145       // Matched the empty string...
146       match_str = llvm::StringRef();
147       return true;
148     } else if (m_matches[idx1].rm_so < m_matches[idx2].rm_eo) {
149       match_str = s.substr(m_matches[idx1].rm_so,
150                            m_matches[idx2].rm_eo - m_matches[idx1].rm_so);
151       return true;
152     }
153   }
154   return false;
155 }
156 
157 //----------------------------------------------------------------------
158 // Returns true if the regular expression compiled and is ready for execution.
159 //----------------------------------------------------------------------
160 bool RegularExpression::IsValid() const { return m_comp_err == 0; }
161 
162 //----------------------------------------------------------------------
163 // Returns the text that was used to compile the current regular expression.
164 //----------------------------------------------------------------------
165 llvm::StringRef RegularExpression::GetText() const { return m_re; }
166 
167 //----------------------------------------------------------------------
168 // Free any contained compiled regular expressions.
169 //----------------------------------------------------------------------
170 void RegularExpression::Free() {
171   if (m_comp_err == 0) {
172     m_re.clear();
173     regfree(&m_preg);
174     // Set a compile error since we no longer have a valid regex
175     m_comp_err = 1;
176   }
177 }
178 
179 size_t RegularExpression::GetErrorAsCString(char *err_str,
180                                             size_t err_str_max_len) const {
181   if (m_comp_err == 0) {
182     if (err_str && err_str_max_len)
183       *err_str = '\0';
184     return 0;
185   }
186 
187   return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len);
188 }
189 
190 bool RegularExpression::operator<(const RegularExpression &rhs) const {
191   return (m_re < rhs.m_re);
192 }
193