1 //===-- RegularExpression.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Utility/RegularExpression.h" 10 11 #include "llvm/ADT/StringRef.h" 12 13 #include <string> 14 15 // Enable enhanced mode if it is available. This allows for things like \d for 16 // digit, \s for space, and many more, but it isn't available everywhere. 17 #if defined(REG_ENHANCED) 18 #define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED) 19 #else 20 #define DEFAULT_COMPILE_FLAGS (REG_EXTENDED) 21 #endif 22 23 using namespace lldb_private; 24 25 RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() { 26 memset(&m_preg, 0, sizeof(m_preg)); 27 } 28 29 // Constructor that compiles "re" using "flags" and stores the resulting 30 // compiled regular expression into this object. 31 RegularExpression::RegularExpression(llvm::StringRef str) 32 : m_re(), m_comp_err(1), m_preg() { 33 memset(&m_preg, 0, sizeof(m_preg)); 34 Compile(str); 35 } 36 37 RegularExpression::RegularExpression(const RegularExpression &rhs) { 38 memset(&m_preg, 0, sizeof(m_preg)); 39 Compile(rhs.GetText()); 40 } 41 42 const RegularExpression &RegularExpression:: 43 operator=(const RegularExpression &rhs) { 44 if (&rhs != this) 45 Compile(rhs.GetText()); 46 return *this; 47 } 48 49 // Destructor 50 // 51 // Any previously compiled regular expression contained in this object will be 52 // freed. 53 RegularExpression::~RegularExpression() { Free(); } 54 55 // Compile a regular expression using the supplied regular expression text and 56 // flags. The compiled regular expression lives in this object so that it can 57 // be readily used for regular expression matches. Execute() can be called 58 // after the regular expression is compiled. Any previously compiled regular 59 // expression contained in this object will be freed. 60 // 61 // RETURNS 62 // True if the regular expression compiles successfully, false 63 // otherwise. 64 bool RegularExpression::Compile(llvm::StringRef str) { 65 Free(); 66 67 // regcomp() on darwin does not recognize "" as a valid regular expression, 68 // so we substitute it with an equivalent non-empty one. 69 m_re = str.empty() ? "()" : str; 70 m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS); 71 return m_comp_err == 0; 72 } 73 74 // Execute a regular expression match using the compiled regular expression 75 // that is already in this object against the match string "s". If any parens 76 // are used for regular expression matches "match_count" should indicate the 77 // number of regmatch_t values that are present in "match_ptr". The regular 78 // expression will be executed using the "execute_flags". 79 bool RegularExpression::Execute(llvm::StringRef str, Match *match) const { 80 int err = 1; 81 if (m_comp_err == 0) { 82 // Argument to regexec must be null-terminated. 83 std::string reg_str = str; 84 if (match) { 85 err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(), 86 match->GetData(), 0); 87 } else { 88 err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0); 89 } 90 } 91 92 if (err != 0) { 93 // The regular expression didn't compile, so clear the matches 94 if (match) 95 match->Clear(); 96 return false; 97 } 98 return true; 99 } 100 101 bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx, 102 std::string &match_str) const { 103 llvm::StringRef match_str_ref; 104 if (GetMatchAtIndex(s, idx, match_str_ref)) { 105 match_str = match_str_ref.str(); 106 return true; 107 } 108 return false; 109 } 110 111 bool RegularExpression::Match::GetMatchAtIndex( 112 llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const { 113 if (idx < m_matches.size()) { 114 if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1) 115 return false; 116 117 if (m_matches[idx].rm_eo == m_matches[idx].rm_so) { 118 // Matched the empty string... 119 match_str = llvm::StringRef(); 120 return true; 121 } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) { 122 match_str = s.substr(m_matches[idx].rm_so, 123 m_matches[idx].rm_eo - m_matches[idx].rm_so); 124 return true; 125 } 126 } 127 return false; 128 } 129 130 bool RegularExpression::Match::GetMatchSpanningIndices( 131 llvm::StringRef s, uint32_t idx1, uint32_t idx2, 132 llvm::StringRef &match_str) const { 133 if (idx1 < m_matches.size() && idx2 < m_matches.size()) { 134 if (m_matches[idx1].rm_so == m_matches[idx2].rm_eo) { 135 // Matched the empty string... 136 match_str = llvm::StringRef(); 137 return true; 138 } else if (m_matches[idx1].rm_so < m_matches[idx2].rm_eo) { 139 match_str = s.substr(m_matches[idx1].rm_so, 140 m_matches[idx2].rm_eo - m_matches[idx1].rm_so); 141 return true; 142 } 143 } 144 return false; 145 } 146 147 // Returns true if the regular expression compiled and is ready for execution. 148 bool RegularExpression::IsValid() const { return m_comp_err == 0; } 149 150 // Returns the text that was used to compile the current regular expression. 151 llvm::StringRef RegularExpression::GetText() const { return m_re; } 152 153 // Free any contained compiled regular expressions. 154 void RegularExpression::Free() { 155 if (m_comp_err == 0) { 156 m_re.clear(); 157 regfree(&m_preg); 158 // Set a compile error since we no longer have a valid regex 159 m_comp_err = 1; 160 } 161 } 162 163 size_t RegularExpression::GetErrorAsCString(char *err_str, 164 size_t err_str_max_len) const { 165 if (m_comp_err == 0) { 166 if (err_str && err_str_max_len) 167 *err_str = '\0'; 168 return 0; 169 } 170 171 return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len); 172 } 173 174 bool RegularExpression::operator<(const RegularExpression &rhs) const { 175 return (m_re < rhs.m_re); 176 } 177