1e14d4cdbSTorok Edwin //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2e14d4cdbSTorok Edwin //
3*2946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*2946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
5*2946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e14d4cdbSTorok Edwin //
7e14d4cdbSTorok Edwin //===----------------------------------------------------------------------===//
8e14d4cdbSTorok Edwin 
9e14d4cdbSTorok Edwin #include "llvm/Support/Regex.h"
10f08d2db9SChris Lattner #include "llvm/ADT/SmallVector.h"
11130cec21SChandler Carruth #include "gtest/gtest.h"
12e14d4cdbSTorok Edwin #include <cstring>
13e14d4cdbSTorok Edwin 
14e14d4cdbSTorok Edwin using namespace llvm;
15e14d4cdbSTorok Edwin namespace {
16e14d4cdbSTorok Edwin 
17e14d4cdbSTorok Edwin class RegexTest : public ::testing::Test {
18e14d4cdbSTorok Edwin };
19e14d4cdbSTorok Edwin 
TEST_F(RegexTest,Basics)20e14d4cdbSTorok Edwin TEST_F(RegexTest, Basics) {
21e14d4cdbSTorok Edwin   Regex r1("^[0-9]+$");
22e14d4cdbSTorok Edwin   EXPECT_TRUE(r1.match("916"));
23e14d4cdbSTorok Edwin   EXPECT_TRUE(r1.match("9"));
24e14d4cdbSTorok Edwin   EXPECT_FALSE(r1.match("9a"));
25e14d4cdbSTorok Edwin 
26e14d4cdbSTorok Edwin   SmallVector<StringRef, 1> Matches;
2737d8015dSChris Lattner   Regex r2("[0-9]+");
28e14d4cdbSTorok Edwin   EXPECT_TRUE(r2.match("aa216b", &Matches));
29e14d4cdbSTorok Edwin   EXPECT_EQ(1u, Matches.size());
30e14d4cdbSTorok Edwin   EXPECT_EQ("216", Matches[0].str());
31e14d4cdbSTorok Edwin 
3237d8015dSChris Lattner   Regex r3("[0-9]+([a-f])?:([0-9]+)");
33e14d4cdbSTorok Edwin   EXPECT_TRUE(r3.match("9a:513b", &Matches));
34e14d4cdbSTorok Edwin   EXPECT_EQ(3u, Matches.size());
35e14d4cdbSTorok Edwin   EXPECT_EQ("9a:513", Matches[0].str());
36e14d4cdbSTorok Edwin   EXPECT_EQ("a", Matches[1].str());
37e14d4cdbSTorok Edwin   EXPECT_EQ("513", Matches[2].str());
38e14d4cdbSTorok Edwin 
39e14d4cdbSTorok Edwin   EXPECT_TRUE(r3.match("9:513b", &Matches));
40e14d4cdbSTorok Edwin   EXPECT_EQ(3u, Matches.size());
41e14d4cdbSTorok Edwin   EXPECT_EQ("9:513", Matches[0].str());
42e14d4cdbSTorok Edwin   EXPECT_EQ("", Matches[1].str());
43e14d4cdbSTorok Edwin   EXPECT_EQ("513", Matches[2].str());
44e14d4cdbSTorok Edwin 
4537d8015dSChris Lattner   Regex r4("a[^b]+b");
46e14d4cdbSTorok Edwin   std::string String="axxb";
47e14d4cdbSTorok Edwin   String[2] = '\0';
48e14d4cdbSTorok Edwin   EXPECT_FALSE(r4.match("abb"));
49e14d4cdbSTorok Edwin   EXPECT_TRUE(r4.match(String, &Matches));
50e14d4cdbSTorok Edwin   EXPECT_EQ(1u, Matches.size());
51e14d4cdbSTorok Edwin   EXPECT_EQ(String, Matches[0].str());
52e14d4cdbSTorok Edwin 
53e14d4cdbSTorok Edwin   std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
54e14d4cdbSTorok Edwin   String="YX99a:513b";
55e14d4cdbSTorok Edwin   NulPattern[7] = '\0';
5637d8015dSChris Lattner   Regex r5(NulPattern);
57e14d4cdbSTorok Edwin   EXPECT_FALSE(r5.match(String));
58e14d4cdbSTorok Edwin   EXPECT_FALSE(r5.match("X9"));
59e14d4cdbSTorok Edwin   String[3]='\0';
60e14d4cdbSTorok Edwin   EXPECT_TRUE(r5.match(String));
61e14d4cdbSTorok Edwin }
62e14d4cdbSTorok Edwin 
TEST_F(RegexTest,Backreferences)6310f22d70SEli Bendersky TEST_F(RegexTest, Backreferences) {
6410f22d70SEli Bendersky   Regex r1("([a-z]+)_\\1");
6510f22d70SEli Bendersky   SmallVector<StringRef, 4> Matches;
6610f22d70SEli Bendersky   EXPECT_TRUE(r1.match("abc_abc", &Matches));
6710f22d70SEli Bendersky   EXPECT_EQ(2u, Matches.size());
6810f22d70SEli Bendersky   EXPECT_FALSE(r1.match("abc_ab", &Matches));
6910f22d70SEli Bendersky 
7010f22d70SEli Bendersky   Regex r2("a([0-9])b\\1c\\1");
7110f22d70SEli Bendersky   EXPECT_TRUE(r2.match("a4b4c4", &Matches));
7210f22d70SEli Bendersky   EXPECT_EQ(2u, Matches.size());
7310f22d70SEli Bendersky   EXPECT_EQ("4", Matches[1].str());
7410f22d70SEli Bendersky   EXPECT_FALSE(r2.match("a2b2c3"));
7510f22d70SEli Bendersky 
7610f22d70SEli Bendersky   Regex r3("a([0-9])([a-z])b\\1\\2");
7710f22d70SEli Bendersky   EXPECT_TRUE(r3.match("a6zb6z", &Matches));
7810f22d70SEli Bendersky   EXPECT_EQ(3u, Matches.size());
7910f22d70SEli Bendersky   EXPECT_EQ("6", Matches[1].str());
8010f22d70SEli Bendersky   EXPECT_EQ("z", Matches[2].str());
8110f22d70SEli Bendersky   EXPECT_FALSE(r3.match("a6zb6y"));
8210f22d70SEli Bendersky   EXPECT_FALSE(r3.match("a6zb7z"));
8310f22d70SEli Bendersky }
8410f22d70SEli Bendersky 
TEST_F(RegexTest,Substitution)85eb85711eSDaniel Dunbar TEST_F(RegexTest, Substitution) {
86eb85711eSDaniel Dunbar   std::string Error;
87eb85711eSDaniel Dunbar 
88eb85711eSDaniel Dunbar   EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
89eb85711eSDaniel Dunbar 
90eb85711eSDaniel Dunbar   // Standard Escapes
91eb85711eSDaniel Dunbar   EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
924b739894SAlp Toker   EXPECT_EQ("", Error);
93eb85711eSDaniel Dunbar   EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
944b739894SAlp Toker   EXPECT_EQ("", Error);
95eb85711eSDaniel Dunbar   EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
964b739894SAlp Toker   EXPECT_EQ("", Error);
97eb85711eSDaniel Dunbar   EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
984b739894SAlp Toker   EXPECT_EQ("", Error);
99eb85711eSDaniel Dunbar 
100eb85711eSDaniel Dunbar   EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
101eb85711eSDaniel Dunbar   EXPECT_EQ(Error, "replacement string contained trailing backslash");
102eb85711eSDaniel Dunbar 
103eb85711eSDaniel Dunbar   // Backreferences
104eb85711eSDaniel Dunbar   EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
1054b739894SAlp Toker   EXPECT_EQ("", Error);
106eb85711eSDaniel Dunbar 
107eb85711eSDaniel Dunbar   EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
1084b739894SAlp Toker   EXPECT_EQ("", Error);
109eb85711eSDaniel Dunbar 
110eb85711eSDaniel Dunbar   EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
111eb85711eSDaniel Dunbar   EXPECT_EQ(Error, "invalid backreference string '100'");
112eb85711eSDaniel Dunbar }
113eb85711eSDaniel Dunbar 
TEST_F(RegexTest,IsLiteralERE)114fe8cd759SPeter Collingbourne TEST_F(RegexTest, IsLiteralERE) {
115fe8cd759SPeter Collingbourne   EXPECT_TRUE(Regex::isLiteralERE("abc"));
116fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
117fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("^abc"));
118fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc$"));
119fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
120fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc*"));
121fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc+"));
122fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc?"));
123fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc."));
124fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
125fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
126fe8cd759SPeter Collingbourne   EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
127fe8cd759SPeter Collingbourne }
128fe8cd759SPeter Collingbourne 
TEST_F(RegexTest,Escape)129d0d1a74aSAlp Toker TEST_F(RegexTest, Escape) {
1304b739894SAlp Toker   EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
1314b739894SAlp Toker   EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
132d0d1a74aSAlp Toker }
133d0d1a74aSAlp Toker 
TEST_F(RegexTest,IsValid)13496dd18c4SAlexey Samsonov TEST_F(RegexTest, IsValid) {
13596dd18c4SAlexey Samsonov   std::string Error;
13696dd18c4SAlexey Samsonov   EXPECT_FALSE(Regex("(foo").isValid(Error));
13796dd18c4SAlexey Samsonov   EXPECT_EQ("parentheses not balanced", Error);
13896dd18c4SAlexey Samsonov   EXPECT_FALSE(Regex("a[b-").isValid(Error));
13996dd18c4SAlexey Samsonov   EXPECT_EQ("invalid character range", Error);
14096dd18c4SAlexey Samsonov }
14196dd18c4SAlexey Samsonov 
TEST_F(RegexTest,MoveConstruct)1427a238048SDavid Blaikie TEST_F(RegexTest, MoveConstruct) {
1437a238048SDavid Blaikie   Regex r1("^[0-9]+$");
1447a238048SDavid Blaikie   Regex r2(std::move(r1));
1457a238048SDavid Blaikie   EXPECT_TRUE(r2.match("916"));
1467a238048SDavid Blaikie }
1477a238048SDavid Blaikie 
TEST_F(RegexTest,MoveAssign)1487a238048SDavid Blaikie TEST_F(RegexTest, MoveAssign) {
1497a238048SDavid Blaikie   Regex r1("^[0-9]+$");
1507a238048SDavid Blaikie   Regex r2("abc");
1517a238048SDavid Blaikie   r2 = std::move(r1);
1527a238048SDavid Blaikie   EXPECT_TRUE(r2.match("916"));
153d8dfeec0SGeorge Rimar   std::string Error;
154d8dfeec0SGeorge Rimar   EXPECT_FALSE(r1.isValid(Error));
1557a238048SDavid Blaikie }
1567a238048SDavid Blaikie 
TEST_F(RegexTest,NoArgConstructor)157a9ff072fSGeorge Rimar TEST_F(RegexTest, NoArgConstructor) {
158a9ff072fSGeorge Rimar   std::string Error;
159a9ff072fSGeorge Rimar   Regex r1;
160a9ff072fSGeorge Rimar   EXPECT_FALSE(r1.isValid(Error));
161a9ff072fSGeorge Rimar   EXPECT_EQ("invalid regular expression", Error);
162a9ff072fSGeorge Rimar   r1 = Regex("abc");
163a9ff072fSGeorge Rimar   EXPECT_TRUE(r1.isValid(Error));
164a9ff072fSGeorge Rimar }
165a9ff072fSGeorge Rimar 
TEST_F(RegexTest,MatchInvalid)166d8dfeec0SGeorge Rimar TEST_F(RegexTest, MatchInvalid) {
167d8dfeec0SGeorge Rimar   Regex r1;
168d8dfeec0SGeorge Rimar   std::string Error;
169d8dfeec0SGeorge Rimar   EXPECT_FALSE(r1.isValid(Error));
170d8dfeec0SGeorge Rimar   EXPECT_FALSE(r1.match("X"));
171d8dfeec0SGeorge Rimar }
172d8dfeec0SGeorge Rimar 
173b42db156SVlad Tsyrklevich // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
TEST_F(RegexTest,OssFuzz3727Regression)174b42db156SVlad Tsyrklevich TEST_F(RegexTest, OssFuzz3727Regression) {
175b42db156SVlad Tsyrklevich   // Wrap in a StringRef so the NUL byte doesn't terminate the string
176b42db156SVlad Tsyrklevich   Regex r(StringRef("[[[=GS\x00[=][", 10));
177b42db156SVlad Tsyrklevich   std::string Error;
178b42db156SVlad Tsyrklevich   EXPECT_FALSE(r.isValid(Error));
179b42db156SVlad Tsyrklevich }
180b42db156SVlad Tsyrklevich 
181e14d4cdbSTorok Edwin }
182