1 //===-- RegularExpression.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Utility/RegularExpression.h" 10 11 #include "llvm/ADT/StringRef.h" 12 13 #include <string> 14 15 //---------------------------------------------------------------------- 16 // Enable enhanced mode if it is available. This allows for things like \d for 17 // digit, \s for space, and many more, but it isn't available everywhere. 18 //---------------------------------------------------------------------- 19 #if defined(REG_ENHANCED) 20 #define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED) 21 #else 22 #define DEFAULT_COMPILE_FLAGS (REG_EXTENDED) 23 #endif 24 25 using namespace lldb_private; 26 27 RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() { 28 memset(&m_preg, 0, sizeof(m_preg)); 29 } 30 31 //---------------------------------------------------------------------- 32 // Constructor that compiles "re" using "flags" and stores the resulting 33 // compiled regular expression into this object. 34 //---------------------------------------------------------------------- 35 RegularExpression::RegularExpression(llvm::StringRef str) 36 : m_re(), m_comp_err(1), m_preg() { 37 memset(&m_preg, 0, sizeof(m_preg)); 38 Compile(str); 39 } 40 41 RegularExpression::RegularExpression(const RegularExpression &rhs) { 42 memset(&m_preg, 0, sizeof(m_preg)); 43 Compile(rhs.GetText()); 44 } 45 46 const RegularExpression &RegularExpression:: 47 operator=(const RegularExpression &rhs) { 48 if (&rhs != this) 49 Compile(rhs.GetText()); 50 return *this; 51 } 52 53 //---------------------------------------------------------------------- 54 // Destructor 55 // 56 // Any previously compiled regular expression contained in this object will be 57 // freed. 58 //---------------------------------------------------------------------- 59 RegularExpression::~RegularExpression() { Free(); } 60 61 //---------------------------------------------------------------------- 62 // Compile a regular expression using the supplied regular expression text and 63 // flags. The compiled regular expression lives in this object so that it can 64 // be readily used for regular expression matches. Execute() can be called 65 // after the regular expression is compiled. Any previously compiled regular 66 // expression contained in this object will be freed. 67 // 68 // RETURNS 69 // True if the regular expression compiles successfully, false 70 // otherwise. 71 //---------------------------------------------------------------------- 72 bool RegularExpression::Compile(llvm::StringRef str) { 73 Free(); 74 75 // regcomp() on darwin does not recognize "" as a valid regular expression, 76 // so we substitute it with an equivalent non-empty one. 77 m_re = str.empty() ? "()" : str; 78 m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS); 79 return m_comp_err == 0; 80 } 81 82 //---------------------------------------------------------------------- 83 // Execute a regular expression match using the compiled regular expression 84 // that is already in this object against the match string "s". If any parens 85 // are used for regular expression matches "match_count" should indicate the 86 // number of regmatch_t values that are present in "match_ptr". The regular 87 // expression will be executed using the "execute_flags". 88 //--------------------------------------------------------------------- 89 bool RegularExpression::Execute(llvm::StringRef str, Match *match) const { 90 int err = 1; 91 if (m_comp_err == 0) { 92 // Argument to regexec must be null-terminated. 93 std::string reg_str = str; 94 if (match) { 95 err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(), 96 match->GetData(), 0); 97 } else { 98 err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0); 99 } 100 } 101 102 if (err != 0) { 103 // The regular expression didn't compile, so clear the matches 104 if (match) 105 match->Clear(); 106 return false; 107 } 108 return true; 109 } 110 111 bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx, 112 std::string &match_str) const { 113 llvm::StringRef match_str_ref; 114 if (GetMatchAtIndex(s, idx, match_str_ref)) { 115 match_str = match_str_ref.str(); 116 return true; 117 } 118 return false; 119 } 120 121 bool RegularExpression::Match::GetMatchAtIndex( 122 llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const { 123 if (idx < m_matches.size()) { 124 if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1) 125 return false; 126 127 if (m_matches[idx].rm_eo == m_matches[idx].rm_so) { 128 // Matched the empty string... 129 match_str = llvm::StringRef(); 130 return true; 131 } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) { 132 match_str = s.substr(m_matches[idx].rm_so, 133 m_matches[idx].rm_eo - m_matches[idx].rm_so); 134 return true; 135 } 136 } 137 return false; 138 } 139 140 bool RegularExpression::Match::GetMatchSpanningIndices( 141 llvm::StringRef s, uint32_t idx1, uint32_t idx2, 142 llvm::StringRef &match_str) const { 143 if (idx1 < m_matches.size() && idx2 < m_matches.size()) { 144 if (m_matches[idx1].rm_so == m_matches[idx2].rm_eo) { 145 // Matched the empty string... 146 match_str = llvm::StringRef(); 147 return true; 148 } else if (m_matches[idx1].rm_so < m_matches[idx2].rm_eo) { 149 match_str = s.substr(m_matches[idx1].rm_so, 150 m_matches[idx2].rm_eo - m_matches[idx1].rm_so); 151 return true; 152 } 153 } 154 return false; 155 } 156 157 //---------------------------------------------------------------------- 158 // Returns true if the regular expression compiled and is ready for execution. 159 //---------------------------------------------------------------------- 160 bool RegularExpression::IsValid() const { return m_comp_err == 0; } 161 162 //---------------------------------------------------------------------- 163 // Returns the text that was used to compile the current regular expression. 164 //---------------------------------------------------------------------- 165 llvm::StringRef RegularExpression::GetText() const { return m_re; } 166 167 //---------------------------------------------------------------------- 168 // Free any contained compiled regular expressions. 169 //---------------------------------------------------------------------- 170 void RegularExpression::Free() { 171 if (m_comp_err == 0) { 172 m_re.clear(); 173 regfree(&m_preg); 174 // Set a compile error since we no longer have a valid regex 175 m_comp_err = 1; 176 } 177 } 178 179 size_t RegularExpression::GetErrorAsCString(char *err_str, 180 size_t err_str_max_len) const { 181 if (m_comp_err == 0) { 182 if (err_str && err_str_max_len) 183 *err_str = '\0'; 184 return 0; 185 } 186 187 return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len); 188 } 189 190 bool RegularExpression::operator<(const RegularExpression &rhs) const { 191 return (m_re < rhs.m_re); 192 } 193