112c1cd33SDouglas Gregor //===- StringMatcher.cpp - Generate a matcher for input strings -----------===// 212c1cd33SDouglas Gregor // 312c1cd33SDouglas Gregor // The LLVM Compiler Infrastructure 412c1cd33SDouglas Gregor // 512c1cd33SDouglas Gregor // This file is distributed under the University of Illinois Open Source 612c1cd33SDouglas Gregor // License. See LICENSE.TXT for details. 712c1cd33SDouglas Gregor // 812c1cd33SDouglas Gregor //===----------------------------------------------------------------------===// 912c1cd33SDouglas Gregor // 1012c1cd33SDouglas Gregor // This file implements the StringMatcher class. 1112c1cd33SDouglas Gregor // 1212c1cd33SDouglas Gregor //===----------------------------------------------------------------------===// 1312c1cd33SDouglas Gregor 14*b2ca1b3fSEugene Zelenko #include "llvm/ADT/StringRef.h" 1512c1cd33SDouglas Gregor #include "llvm/Support/raw_ostream.h" 16*b2ca1b3fSEugene Zelenko #include "llvm/TableGen/StringMatcher.h" 17*b2ca1b3fSEugene Zelenko #include <cassert> 1812c1cd33SDouglas Gregor #include <map> 19*b2ca1b3fSEugene Zelenko #include <string> 20*b2ca1b3fSEugene Zelenko #include <utility> 21*b2ca1b3fSEugene Zelenko #include <vector> 22*b2ca1b3fSEugene Zelenko 2312c1cd33SDouglas Gregor using namespace llvm; 2412c1cd33SDouglas Gregor 2512c1cd33SDouglas Gregor /// FindFirstNonCommonLetter - Find the first character in the keys of the 2612c1cd33SDouglas Gregor /// string pairs that is not shared across the whole set of strings. All 2712c1cd33SDouglas Gregor /// strings are assumed to have the same length. 2812c1cd33SDouglas Gregor static unsigned 2912c1cd33SDouglas Gregor FindFirstNonCommonLetter(const std::vector<const 3012c1cd33SDouglas Gregor StringMatcher::StringPair*> &Matches) { 3112c1cd33SDouglas Gregor assert(!Matches.empty()); 3212c1cd33SDouglas Gregor for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { 3312c1cd33SDouglas Gregor // Check to see if letter i is the same across the set. 3412c1cd33SDouglas Gregor char Letter = Matches[0]->first[i]; 3512c1cd33SDouglas Gregor 3612c1cd33SDouglas Gregor for (unsigned str = 0, e = Matches.size(); str != e; ++str) 3712c1cd33SDouglas Gregor if (Matches[str]->first[i] != Letter) 3812c1cd33SDouglas Gregor return i; 3912c1cd33SDouglas Gregor } 4012c1cd33SDouglas Gregor 4112c1cd33SDouglas Gregor return Matches[0]->first.size(); 4212c1cd33SDouglas Gregor } 4312c1cd33SDouglas Gregor 4412c1cd33SDouglas Gregor /// EmitStringMatcherForChar - Given a set of strings that are known to be the 4512c1cd33SDouglas Gregor /// same length and whose characters leading up to CharNo are the same, emit 4612c1cd33SDouglas Gregor /// code to verify that CharNo and later are the same. 4712c1cd33SDouglas Gregor /// 4812c1cd33SDouglas Gregor /// \return - True if control can leave the emitted code fragment. 4912c1cd33SDouglas Gregor bool StringMatcher:: 5012c1cd33SDouglas Gregor EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches, 5112c1cd33SDouglas Gregor unsigned CharNo, unsigned IndentCount) const { 5212c1cd33SDouglas Gregor assert(!Matches.empty() && "Must have at least one string to match!"); 5312c1cd33SDouglas Gregor std::string Indent(IndentCount*2+4, ' '); 5412c1cd33SDouglas Gregor 5512c1cd33SDouglas Gregor // If we have verified that the entire string matches, we're done: output the 5612c1cd33SDouglas Gregor // matching code. 5712c1cd33SDouglas Gregor if (CharNo == Matches[0]->first.size()) { 5812c1cd33SDouglas Gregor assert(Matches.size() == 1 && "Had duplicate keys to match on"); 5912c1cd33SDouglas Gregor 6012c1cd33SDouglas Gregor // If the to-execute code has \n's in it, indent each subsequent line. 6112c1cd33SDouglas Gregor StringRef Code = Matches[0]->second; 6212c1cd33SDouglas Gregor 6312c1cd33SDouglas Gregor std::pair<StringRef, StringRef> Split = Code.split('\n'); 6412c1cd33SDouglas Gregor OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; 6512c1cd33SDouglas Gregor 6612c1cd33SDouglas Gregor Code = Split.second; 6712c1cd33SDouglas Gregor while (!Code.empty()) { 6812c1cd33SDouglas Gregor Split = Code.split('\n'); 6912c1cd33SDouglas Gregor OS << Indent << Split.first << "\n"; 7012c1cd33SDouglas Gregor Code = Split.second; 7112c1cd33SDouglas Gregor } 7212c1cd33SDouglas Gregor return false; 7312c1cd33SDouglas Gregor } 7412c1cd33SDouglas Gregor 7512c1cd33SDouglas Gregor // Bucket the matches by the character we are comparing. 7612c1cd33SDouglas Gregor std::map<char, std::vector<const StringPair*>> MatchesByLetter; 7712c1cd33SDouglas Gregor 7812c1cd33SDouglas Gregor for (unsigned i = 0, e = Matches.size(); i != e; ++i) 7912c1cd33SDouglas Gregor MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); 8012c1cd33SDouglas Gregor 8112c1cd33SDouglas Gregor 8212c1cd33SDouglas Gregor // If we have exactly one bucket to match, see how many characters are common 8312c1cd33SDouglas Gregor // across the whole set and match all of them at once. 8412c1cd33SDouglas Gregor if (MatchesByLetter.size() == 1) { 8512c1cd33SDouglas Gregor unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); 8612c1cd33SDouglas Gregor unsigned NumChars = FirstNonCommonLetter-CharNo; 8712c1cd33SDouglas Gregor 8812c1cd33SDouglas Gregor // Emit code to break out if the prefix doesn't match. 8912c1cd33SDouglas Gregor if (NumChars == 1) { 9012c1cd33SDouglas Gregor // Do the comparison with if (Str[1] != 'f') 9112c1cd33SDouglas Gregor // FIXME: Need to escape general characters. 9212c1cd33SDouglas Gregor OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" 9312c1cd33SDouglas Gregor << Matches[0]->first[CharNo] << "')\n"; 9412c1cd33SDouglas Gregor OS << Indent << " break;\n"; 9512c1cd33SDouglas Gregor } else { 96a4055582SBenjamin Kramer // Do the comparison with if memcmp(Str.data()+1, "foo", 3). 9712c1cd33SDouglas Gregor // FIXME: Need to escape general strings. 98a4055582SBenjamin Kramer OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo 99a4055582SBenjamin Kramer << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " 100*b2ca1b3fSEugene Zelenko << NumChars << ") != 0)\n"; 10112c1cd33SDouglas Gregor OS << Indent << " break;\n"; 10212c1cd33SDouglas Gregor } 10312c1cd33SDouglas Gregor 10412c1cd33SDouglas Gregor return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); 10512c1cd33SDouglas Gregor } 10612c1cd33SDouglas Gregor 10712c1cd33SDouglas Gregor // Otherwise, we have multiple possible things, emit a switch on the 10812c1cd33SDouglas Gregor // character. 10912c1cd33SDouglas Gregor OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; 11012c1cd33SDouglas Gregor OS << Indent << "default: break;\n"; 11112c1cd33SDouglas Gregor 11212c1cd33SDouglas Gregor for (std::map<char, std::vector<const StringPair*>>::iterator LI = 11312c1cd33SDouglas Gregor MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { 11412c1cd33SDouglas Gregor // TODO: escape hard stuff (like \n) if we ever care about it. 11512c1cd33SDouglas Gregor OS << Indent << "case '" << LI->first << "':\t // " 11612c1cd33SDouglas Gregor << LI->second.size() << " string"; 11712c1cd33SDouglas Gregor if (LI->second.size() != 1) OS << 's'; 11812c1cd33SDouglas Gregor OS << " to match.\n"; 11912c1cd33SDouglas Gregor if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) 12012c1cd33SDouglas Gregor OS << Indent << " break;\n"; 12112c1cd33SDouglas Gregor } 12212c1cd33SDouglas Gregor 12312c1cd33SDouglas Gregor OS << Indent << "}\n"; 12412c1cd33SDouglas Gregor return true; 12512c1cd33SDouglas Gregor } 12612c1cd33SDouglas Gregor 12712c1cd33SDouglas Gregor /// Emit - Top level entry point. 12812c1cd33SDouglas Gregor /// 12912c1cd33SDouglas Gregor void StringMatcher::Emit(unsigned Indent) const { 13012c1cd33SDouglas Gregor // If nothing to match, just fall through. 13112c1cd33SDouglas Gregor if (Matches.empty()) return; 13212c1cd33SDouglas Gregor 13312c1cd33SDouglas Gregor // First level categorization: group strings by length. 13412c1cd33SDouglas Gregor std::map<unsigned, std::vector<const StringPair*>> MatchesByLength; 13512c1cd33SDouglas Gregor 13612c1cd33SDouglas Gregor for (unsigned i = 0, e = Matches.size(); i != e; ++i) 13712c1cd33SDouglas Gregor MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); 13812c1cd33SDouglas Gregor 13912c1cd33SDouglas Gregor // Output a switch statement on length and categorize the elements within each 14012c1cd33SDouglas Gregor // bin. 14112c1cd33SDouglas Gregor OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; 14212c1cd33SDouglas Gregor OS.indent(Indent*2+2) << "default: break;\n"; 14312c1cd33SDouglas Gregor 14412c1cd33SDouglas Gregor for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI = 14512c1cd33SDouglas Gregor MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { 14612c1cd33SDouglas Gregor OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " 14712c1cd33SDouglas Gregor << LI->second.size() 14812c1cd33SDouglas Gregor << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; 14912c1cd33SDouglas Gregor if (EmitStringMatcherForChar(LI->second, 0, Indent)) 15012c1cd33SDouglas Gregor OS.indent(Indent*2+4) << "break;\n"; 15112c1cd33SDouglas Gregor } 15212c1cd33SDouglas Gregor 15312c1cd33SDouglas Gregor OS.indent(Indent*2+2) << "}\n"; 15412c1cd33SDouglas Gregor } 155