1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 // UNSUPPORTED: libcpp-has-no-incomplete-ranges
11 
12 // template <class View, class Pattern>
13 // class std::ranges::lazy_split_view;
14 //
15 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no
16 // restrictions on which member functions can be called.
17 
18 #include <ranges>
19 
20 #include <algorithm>
21 #include <array>
22 #include <cassert>
23 #include <map>
24 #include <string>
25 #include <string_view>
26 #include <utility>
27 #include <vector>
28 #include "types.h"
29 
30 // A constexpr-friendly lightweight string, primarily useful for comparisons.
31 // Unlike `std::string_view`, it copies the given string into an
32 // internal buffer and can work with non-contiguous inputs.
33 template <class Char>
34 class BasicSmallString {
35   std::basic_string<Char> buffer_{};
36 
37 public:
BasicSmallString(std::basic_string_view<Char> v)38   constexpr BasicSmallString(std::basic_string_view<Char> v) : buffer_(v) {}
39 
40   template <class I, class S>
BasicSmallString(I b,const S & e)41   constexpr BasicSmallString(I b, const S& e) {
42     for (; b != e; ++b) {
43       buffer_ += *b;
44     }
45   }
46 
47   template <std::ranges::range R>
BasicSmallString(R && from)48   constexpr BasicSmallString(R&& from) : BasicSmallString(from.begin(), from.end()) {}
49 
operator ==(const BasicSmallString & lhs,const BasicSmallString & rhs)50   friend constexpr bool operator==(const BasicSmallString& lhs, const BasicSmallString& rhs) {
51     return lhs.buffer_ == rhs.buffer_;
52   }
53 };
54 
55 template <std::ranges::view View, std::ranges::range Expected>
is_equal(View & view,const Expected & expected)56 constexpr bool is_equal(View& view, const Expected& expected) {
57   using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>;
58   using Str = BasicSmallString<Char>;
59 
60   auto actual_it = view.begin();
61   auto expected_it = expected.begin();
62   for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) {
63     if (Str(*actual_it) != Str(*expected_it))
64       return false;
65   }
66 
67   return actual_it == view.end() && expected_it == expected.end();
68 }
69 
70 template <class T, class Separator, class U, size_t M>
test_function_call(T && input,Separator && separator,std::array<U,M> expected)71 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
72   std::ranges::lazy_split_view v(input, separator);
73   return is_equal(v, expected);
74 }
75 
76 template <class T, class Separator, class U, size_t M>
test_with_piping(T && input,Separator && separator,std::array<U,M> expected)77 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
78   auto expected_it = expected.begin();
79   for (auto e : input | std::ranges::views::lazy_split(separator)) {
80     if (expected_it == expected.end())
81       return false;
82     if (!std::ranges::equal(e, *expected_it))
83       return false;
84 
85     ++expected_it;
86   }
87 
88   return expected_it == expected.end();
89 }
90 
test_l_r_values()91 constexpr bool test_l_r_values() {
92   using namespace std::string_view_literals;
93 
94   // Both lvalues and rvalues can be used as input.
95   {
96     // Lvalues.
97     {
98       auto input = "abc"sv;
99       auto sep = " "sv;
100       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
101     }
102 
103     // Const lvalues.
104     {
105       const auto input = "abc"sv;
106       const auto sep = " "sv;
107       [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
108     }
109 
110     // Rvalues.
111     {
112       auto input = "abc"sv;
113       auto sep = " "sv;
114       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
115     }
116 
117     // Const rvalues.
118     {
119       const auto input = "abc"sv;
120       const auto sep = " "sv;
121       [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
122     }
123   }
124 
125   return true;
126 }
127 
test_string_literal_separator()128 constexpr bool test_string_literal_separator() {
129   using namespace std::string_view_literals;
130 
131   // Splitting works as expected when the separator is a single character literal.
132   {
133     std::ranges::lazy_split_view v("abc def"sv, ' ');
134     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
135   }
136 
137   // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
138   // because of the implicit terminating null in the literal.
139   {
140     std::ranges::lazy_split_view v("abc def"sv, " ");
141     assert(is_equal(v, std::array{"abc def"sv}));
142   }
143 
144   // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
145   // Should the input string contain that two-character sequence, the separator would match.
146   {
147     std::ranges::lazy_split_view v("abc \0def"sv, " ");
148     assert(is_equal(v, std::array{"abc"sv, "def"sv}));
149   }
150 
151   return true;
152 }
153 
154 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
155 // below).
156 template <class T>
sv(T && str)157 constexpr std::string_view sv(T&& str) {
158   return std::string_view(str);
159 };
160 
161 template <class T, class Separator, class U, size_t M>
test_one(T && input,Separator && separator,std::array<U,M> expected)162 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
163   assert(test_function_call(input, separator, expected));
164   assert(test_with_piping(input, separator, expected));
165 
166   // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView,
167   // tiny-range)` cases (all of which have unique code paths).
168   if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) {
169     assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected));
170     assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected));
171 
172     assert(test_function_call(InputView(input), ForwardTinyView(separator), expected));
173     assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected));
174   }
175 }
176 
test_string_literals()177 constexpr bool test_string_literals() {
178   // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected
179   // results due to the implicit terminating null that is treated as part of the range.
180 
181   using namespace std::string_view_literals;
182 
183   char short_sep = ' ';
184   auto long_sep = "12"sv;
185 
186   // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
187   // the original range).
188   {
189     std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
190 
191     assert(test_function_call("abc def", short_sep, expected));
192     assert(test_with_piping("abc def", short_sep, expected));
193     assert(test_function_call("abc12def", long_sep, expected));
194     assert(test_with_piping("abc12def", long_sep, expected));
195   }
196 
197   // Empty string.
198   {
199     // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
200     std::array expected = {std::string_view("", 1)};
201 
202     assert(test_function_call("", short_sep, expected));
203     assert(test_with_piping("", short_sep, expected));
204     assert(test_function_call("", long_sep, expected));
205     assert(test_with_piping("", long_sep, expected));
206   }
207 
208   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
209   // are treated differently due to the presence of an implicit `\0` in the latter.
210   {
211     const char input[] = "abc def";
212     std::array expected_unsplit = {std::string_view(input, sizeof(input))};
213     std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))};
214 
215     assert(test_function_call(input, " ", expected_unsplit));
216     assert(test_function_call("abc \0def", " ", expected_split));
217     // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
218   }
219 
220   // Empty separator.
221   {
222     auto empty_sep = ""sv;
223     std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
224 
225     assert(test_function_call("abc", empty_sep, expected));
226     assert(test_with_piping("abc", empty_sep, expected));
227   }
228 
229   return true;
230 }
231 
test_nontrivial_characters()232 bool test_nontrivial_characters() {
233   // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
234 
235   using Map = std::map<std::string, int>;
236   using Vec = std::vector<Map>;
237 
238   Map sep = {{"yyy", 999}};
239   Map m1 = {
240     {"a", 1},
241     {"bc", 2},
242   };
243   Map m2 = {
244     {"def", 3},
245   };
246   Map m3 = {
247     {"g", 4},
248     {"hijk", 5},
249   };
250 
251   Vec expected1 = {m1, m2};
252   Vec expected2 = {m3};
253 
254   std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep);
255 
256   // Segment 1: {m1, m2}
257   auto outer = v.begin();
258   assert(outer != v.end());
259   auto inner = (*outer).begin();
260   assert(*inner++ == m1);
261   assert(*inner++ == m2);
262   assert(inner == (*outer).end());
263 
264   // Segment 2: {m3}
265   ++outer;
266   assert(outer != v.end());
267   inner = (*outer).begin();
268   assert(*inner++ == m3);
269   assert(inner == (*outer).end());
270 
271   ++outer;
272   assert(outer == v.end());
273 
274   return true;
275 }
276 
main_test()277 constexpr bool main_test() {
278   using namespace std::string_view_literals;
279 
280   char short_sep = ' ';
281   auto long_sep = "12"sv;
282 
283   // One separator.
284   {
285     std::array expected = {"abc"sv, "def"sv};
286     test_one("abc def"sv, short_sep, expected);
287     test_one("abc12def"sv, long_sep, expected);
288   }
289 
290   // Several separators in a row.
291   {
292     std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
293     test_one("abc    def"sv, short_sep, expected);
294     test_one("abc12121212def"sv, long_sep, expected);
295   }
296 
297   // Trailing separator.
298   {
299     std::array expected = {"abc"sv, "def"sv, ""sv};
300     test_one("abc def "sv, short_sep, expected);
301     test_one("abc12def12"sv, long_sep, expected);
302   }
303 
304   // Leading separator.
305   {
306     std::array expected = {""sv, "abc"sv, "def"sv};
307     test_one(" abc def"sv, short_sep, expected);
308     test_one("12abc12def"sv, long_sep, expected);
309   }
310 
311   // No separator.
312   {
313     std::array expected = {"abc"sv};
314     test_one("abc"sv, short_sep, expected);
315     test_one("abc"sv, long_sep, expected);
316   }
317 
318   // Input consisting of a single separator.
319   {
320     std::array expected = {""sv, ""sv};
321     test_one(" "sv, short_sep, expected);
322     test_one("12"sv, long_sep, expected);
323   }
324 
325   // Input consisting of only separators.
326   {
327     std::array expected = {""sv, ""sv, ""sv, ""sv};
328     test_one("   "sv, short_sep, expected);
329     test_one("121212"sv, long_sep, expected);
330   }
331 
332   // The separator and the string use the same character only.
333   {
334     auto overlapping_sep = "aaa"sv;
335     std::array expected = {""sv, "aa"sv};
336     test_one("aaaaa"sv, overlapping_sep, expected);
337   }
338 
339   // Many redundant separators.
340   {
341     std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
342     test_one("  abc   def  "sv, short_sep, expected);
343     test_one("1212abc121212def1212"sv, long_sep, expected);
344   }
345 
346   // Separators after every character.
347   {
348     std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
349     test_one(" a b c "sv, short_sep, expected);
350     test_one("12a12b12c12"sv, long_sep, expected);
351   }
352 
353   // Overlap between the separator and the string (see https://wg21.link/lwg3505).
354   {
355     auto overlapping_sep = "ab"sv;
356     std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv};
357     test_one("aabaaababb"sv, overlapping_sep, expected);
358   }
359 
360   // Empty input.
361   {
362     std::array<std::string_view, 0> expected = {};
363     test_one(""sv, short_sep, expected);
364     test_one(""sv, long_sep, expected);
365   }
366 
367   // Empty separator.
368   {
369     auto empty_sep = ""sv;
370     std::array expected = {"a"sv, "b"sv, "c"sv};
371     test_one("abc"sv, empty_sep, expected);
372     test_one("abc"sv, empty_sep, expected);
373   }
374 
375   // Terminating null as a separator.
376   {
377     std::array expected = {"abc"sv, "def"sv};
378     test_one("abc\0def"sv, '\0', expected);
379     test_one("abc\0\0def"sv, "\0\0"sv, expected);
380   }
381 
382   // Different character types.
383   {
384     // `char`.
385     test_function_call("abc def", ' ', std::array{"abc", "def"});
386 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
387     // `wchar_t`.
388     test_function_call(L"abc def", L' ', std::array{L"abc", L"def"});
389 #endif
390     // `char8_t`.
391     test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"});
392     // `char16_t`.
393     test_function_call(u"abc def", u' ', std::array{u"abc", u"def"});
394     // `char32_t`.
395     test_function_call(U"abc def", U' ', std::array{U"abc", U"def"});
396   }
397 
398   // Non-character input.
399   {
400     std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
401     test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
402     test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
403   }
404 
405   return true;
406 }
407 
408 int main(int, char**) {
409   main_test();
410   static_assert(main_test());
411 
412   test_string_literals();
413   static_assert(test_string_literals());
414 
415   test_l_r_values();
416   static_assert(test_l_r_values());
417 
418   test_string_literal_separator();
419   static_assert(test_string_literal_separator());
420 
421   // Note: map is not `constexpr`, so this test is runtime-only.
422   test_nontrivial_characters();
423 
424   return 0;
425 }
426