1 //===-- RegularExpression.cpp -----------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/Utility/RegularExpression.h"
11 
12 // C Includes
13 // C++ Includes
14 #include <cstring>
15 
16 // Other libraries and framework includes
17 #include "llvm/ADT/StringRef.h"
18 
19 // Project includes
20 #include "lldb/Utility/Error.h"
21 
22 //----------------------------------------------------------------------
23 // Enable enhanced mode if it is available. This allows for things like
24 // \d for digit, \s for space, and many more, but it isn't available
25 // everywhere.
26 //----------------------------------------------------------------------
27 #if defined(REG_ENHANCED)
28 #define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED)
29 #else
30 #define DEFAULT_COMPILE_FLAGS (REG_EXTENDED)
31 #endif
32 
33 using namespace lldb_private;
34 
35 RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
36   memset(&m_preg, 0, sizeof(m_preg));
37 }
38 
39 //----------------------------------------------------------------------
40 // Constructor that compiles "re" using "flags" and stores the
41 // resulting compiled regular expression into this object.
42 //----------------------------------------------------------------------
43 RegularExpression::RegularExpression(llvm::StringRef str)
44     : m_re(), m_comp_err(1), m_preg() {
45   memset(&m_preg, 0, sizeof(m_preg));
46   Compile(str);
47 }
48 
49 RegularExpression::RegularExpression(const RegularExpression &rhs) {
50   memset(&m_preg, 0, sizeof(m_preg));
51   Compile(rhs.GetText());
52 }
53 
54 const RegularExpression &RegularExpression::
55 operator=(const RegularExpression &rhs) {
56   if (&rhs != this)
57     Compile(rhs.GetText());
58   return *this;
59 }
60 
61 //----------------------------------------------------------------------
62 // Destructor
63 //
64 // Any previously compiled regular expression contained in this
65 // object will be freed.
66 //----------------------------------------------------------------------
67 RegularExpression::~RegularExpression() { Free(); }
68 
69 //----------------------------------------------------------------------
70 // Compile a regular expression using the supplied regular
71 // expression text and flags. The compiled regular expression lives
72 // in this object so that it can be readily used for regular
73 // expression matches. Execute() can be called after the regular
74 // expression is compiled. Any previously compiled regular
75 // expression contained in this object will be freed.
76 //
77 // RETURNS
78 //  True if the regular expression compiles successfully, false
79 //  otherwise.
80 //----------------------------------------------------------------------
81 bool RegularExpression::Compile(llvm::StringRef str) {
82   Free();
83 
84   // regcomp() on darwin does not recognize "" as a valid regular expression, so
85   // we substitute it with an equivalent non-empty one.
86   m_re = str.empty() ? "()" : str;
87   m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS);
88   return m_comp_err == 0;
89 }
90 
91 //----------------------------------------------------------------------
92 // Execute a regular expression match using the compiled regular
93 // expression that is already in this object against the match
94 // string "s". If any parens are used for regular expression
95 // matches "match_count" should indicate the number of regmatch_t
96 // values that are present in "match_ptr". The regular expression
97 // will be executed using the "execute_flags".
98 //---------------------------------------------------------------------
99 bool RegularExpression::Execute(llvm::StringRef str, Match *match) const {
100   int err = 1;
101   if (m_comp_err == 0) {
102     // Argument to regexec must be null-terminated.
103     std::string reg_str = str;
104     if (match) {
105       err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(),
106                       match->GetData(), 0);
107     } else {
108       err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0);
109     }
110   }
111 
112   if (err != 0) {
113     // The regular expression didn't compile, so clear the matches
114     if (match)
115       match->Clear();
116     return false;
117   }
118   return true;
119 }
120 
121 bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
122                                                std::string &match_str) const {
123   llvm::StringRef match_str_ref;
124   if (GetMatchAtIndex(s, idx, match_str_ref)) {
125     match_str = match_str_ref.str();
126     return true;
127   }
128   return false;
129 }
130 
131 bool RegularExpression::Match::GetMatchAtIndex(
132     llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const {
133   if (idx < m_matches.size()) {
134     if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1)
135       return false;
136 
137     if (m_matches[idx].rm_eo == m_matches[idx].rm_so) {
138       // Matched the empty string...
139       match_str = llvm::StringRef();
140       return true;
141     } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) {
142       match_str = s.substr(m_matches[idx].rm_so,
143                            m_matches[idx].rm_eo - m_matches[idx].rm_so);
144       return true;
145     }
146   }
147   return false;
148 }
149 
150 bool RegularExpression::Match::GetMatchSpanningIndices(
151     llvm::StringRef s, uint32_t idx1, uint32_t idx2,
152     llvm::StringRef &match_str) const {
153   if (idx1 < m_matches.size() && idx2 < m_matches.size()) {
154     if (m_matches[idx1].rm_so == m_matches[idx2].rm_eo) {
155       // Matched the empty string...
156       match_str = llvm::StringRef();
157       return true;
158     } else if (m_matches[idx1].rm_so < m_matches[idx2].rm_eo) {
159       match_str = s.substr(m_matches[idx1].rm_so,
160                            m_matches[idx2].rm_eo - m_matches[idx1].rm_so);
161       return true;
162     }
163   }
164   return false;
165 }
166 
167 //----------------------------------------------------------------------
168 // Returns true if the regular expression compiled and is ready
169 // for execution.
170 //----------------------------------------------------------------------
171 bool RegularExpression::IsValid() const { return m_comp_err == 0; }
172 
173 //----------------------------------------------------------------------
174 // Returns the text that was used to compile the current regular
175 // expression.
176 //----------------------------------------------------------------------
177 llvm::StringRef RegularExpression::GetText() const { return m_re; }
178 
179 //----------------------------------------------------------------------
180 // Free any contained compiled regular expressions.
181 //----------------------------------------------------------------------
182 void RegularExpression::Free() {
183   if (m_comp_err == 0) {
184     m_re.clear();
185     regfree(&m_preg);
186     // Set a compile error since we no longer have a valid regex
187     m_comp_err = 1;
188   }
189 }
190 
191 size_t RegularExpression::GetErrorAsCString(char *err_str,
192                                             size_t err_str_max_len) const {
193   if (m_comp_err == 0) {
194     if (err_str && err_str_max_len)
195       *err_str = '\0';
196     return 0;
197   }
198 
199   return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len);
200 }
201 
202 bool RegularExpression::operator<(const RegularExpression &rhs) const {
203   return (m_re < rhs.m_re);
204 }
205