1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 // UNSUPPORTED: c++03, c++11, c++14, c++17
10 // UNSUPPORTED: libcpp-has-no-incomplete-ranges
11
12 // template <class View, class Pattern>
13 // class std::ranges::lazy_split_view;
14 //
15 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no
16 // restrictions on which member functions can be called.
17
18 #include <ranges>
19
20 #include <algorithm>
21 #include <array>
22 #include <cassert>
23 #include <map>
24 #include <string>
25 #include <string_view>
26 #include <utility>
27 #include <vector>
28 #include "types.h"
29
30 // A constexpr-friendly lightweight string, primarily useful for comparisons.
31 // Unlike `std::string_view`, it copies the given string into an
32 // internal buffer and can work with non-contiguous inputs.
33 template <class Char>
34 class BasicSmallString {
35 std::basic_string<Char> buffer_{};
36
37 public:
BasicSmallString(std::basic_string_view<Char> v)38 constexpr BasicSmallString(std::basic_string_view<Char> v) : buffer_(v) {}
39
40 template <class I, class S>
BasicSmallString(I b,const S & e)41 constexpr BasicSmallString(I b, const S& e) {
42 for (; b != e; ++b) {
43 buffer_ += *b;
44 }
45 }
46
47 template <std::ranges::range R>
BasicSmallString(R && from)48 constexpr BasicSmallString(R&& from) : BasicSmallString(from.begin(), from.end()) {}
49
operator ==(const BasicSmallString & lhs,const BasicSmallString & rhs)50 friend constexpr bool operator==(const BasicSmallString& lhs, const BasicSmallString& rhs) {
51 return lhs.buffer_ == rhs.buffer_;
52 }
53 };
54
55 template <std::ranges::view View, std::ranges::range Expected>
is_equal(View & view,const Expected & expected)56 constexpr bool is_equal(View& view, const Expected& expected) {
57 using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>;
58 using Str = BasicSmallString<Char>;
59
60 auto actual_it = view.begin();
61 auto expected_it = expected.begin();
62 for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) {
63 if (Str(*actual_it) != Str(*expected_it))
64 return false;
65 }
66
67 return actual_it == view.end() && expected_it == expected.end();
68 }
69
70 template <class T, class Separator, class U, size_t M>
test_function_call(T && input,Separator && separator,std::array<U,M> expected)71 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) {
72 std::ranges::lazy_split_view v(input, separator);
73 return is_equal(v, expected);
74 }
75
76 template <class T, class Separator, class U, size_t M>
test_with_piping(T && input,Separator && separator,std::array<U,M> expected)77 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) {
78 auto expected_it = expected.begin();
79 for (auto e : input | std::ranges::views::lazy_split(separator)) {
80 if (expected_it == expected.end())
81 return false;
82 if (!std::ranges::equal(e, *expected_it))
83 return false;
84
85 ++expected_it;
86 }
87
88 return expected_it == expected.end();
89 }
90
test_l_r_values()91 constexpr bool test_l_r_values() {
92 using namespace std::string_view_literals;
93
94 // Both lvalues and rvalues can be used as input.
95 {
96 // Lvalues.
97 {
98 auto input = "abc"sv;
99 auto sep = " "sv;
100 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
101 }
102
103 // Const lvalues.
104 {
105 const auto input = "abc"sv;
106 const auto sep = " "sv;
107 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep);
108 }
109
110 // Rvalues.
111 {
112 auto input = "abc"sv;
113 auto sep = " "sv;
114 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
115 }
116
117 // Const rvalues.
118 {
119 const auto input = "abc"sv;
120 const auto sep = " "sv;
121 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep));
122 }
123 }
124
125 return true;
126 }
127
test_string_literal_separator()128 constexpr bool test_string_literal_separator() {
129 using namespace std::string_view_literals;
130
131 // Splitting works as expected when the separator is a single character literal.
132 {
133 std::ranges::lazy_split_view v("abc def"sv, ' ');
134 assert(is_equal(v, std::array{"abc"sv, "def"sv}));
135 }
136
137 // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is
138 // because of the implicit terminating null in the literal.
139 {
140 std::ranges::lazy_split_view v("abc def"sv, " ");
141 assert(is_equal(v, std::array{"abc def"sv}));
142 }
143
144 // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`.
145 // Should the input string contain that two-character sequence, the separator would match.
146 {
147 std::ranges::lazy_split_view v("abc \0def"sv, " ");
148 assert(is_equal(v, std::array{"abc"sv, "def"sv}));
149 }
150
151 return true;
152 }
153
154 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see
155 // below).
156 template <class T>
sv(T && str)157 constexpr std::string_view sv(T&& str) {
158 return std::string_view(str);
159 };
160
161 template <class T, class Separator, class U, size_t M>
test_one(T && input,Separator && separator,std::array<U,M> expected)162 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) {
163 assert(test_function_call(input, separator, expected));
164 assert(test_with_piping(input, separator, expected));
165
166 // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView,
167 // tiny-range)` cases (all of which have unique code paths).
168 if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) {
169 assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected));
170 assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected));
171
172 assert(test_function_call(InputView(input), ForwardTinyView(separator), expected));
173 assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected));
174 }
175 }
176
test_string_literals()177 constexpr bool test_string_literals() {
178 // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected
179 // results due to the implicit terminating null that is treated as part of the range.
180
181 using namespace std::string_view_literals;
182
183 char short_sep = ' ';
184 auto long_sep = "12"sv;
185
186 // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from
187 // the original range).
188 {
189 std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))};
190
191 assert(test_function_call("abc def", short_sep, expected));
192 assert(test_with_piping("abc def", short_sep, expected));
193 assert(test_function_call("abc12def", long_sep, expected));
194 assert(test_with_piping("abc12def", long_sep, expected));
195 }
196
197 // Empty string.
198 {
199 // Because an empty string literal contains an implicit terminating null, the output will contain one segment.
200 std::array expected = {std::string_view("", 1)};
201
202 assert(test_function_call("", short_sep, expected));
203 assert(test_with_piping("", short_sep, expected));
204 assert(test_function_call("", long_sep, expected));
205 assert(test_with_piping("", long_sep, expected));
206 }
207
208 // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
209 // are treated differently due to the presence of an implicit `\0` in the latter.
210 {
211 const char input[] = "abc def";
212 std::array expected_unsplit = {std::string_view(input, sizeof(input))};
213 std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))};
214
215 assert(test_function_call(input, " ", expected_unsplit));
216 assert(test_function_call("abc \0def", " ", expected_split));
217 // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`.
218 }
219
220 // Empty separator.
221 {
222 auto empty_sep = ""sv;
223 std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv};
224
225 assert(test_function_call("abc", empty_sep, expected));
226 assert(test_with_piping("abc", empty_sep, expected));
227 }
228
229 return true;
230 }
231
test_nontrivial_characters()232 bool test_nontrivial_characters() {
233 // Try a deliberately heavyweight "character" type to see if it triggers any corner cases.
234
235 using Map = std::map<std::string, int>;
236 using Vec = std::vector<Map>;
237
238 Map sep = {{"yyy", 999}};
239 Map m1 = {
240 {"a", 1},
241 {"bc", 2},
242 };
243 Map m2 = {
244 {"def", 3},
245 };
246 Map m3 = {
247 {"g", 4},
248 {"hijk", 5},
249 };
250
251 Vec expected1 = {m1, m2};
252 Vec expected2 = {m3};
253
254 std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep);
255
256 // Segment 1: {m1, m2}
257 auto outer = v.begin();
258 assert(outer != v.end());
259 auto inner = (*outer).begin();
260 assert(*inner++ == m1);
261 assert(*inner++ == m2);
262 assert(inner == (*outer).end());
263
264 // Segment 2: {m3}
265 ++outer;
266 assert(outer != v.end());
267 inner = (*outer).begin();
268 assert(*inner++ == m3);
269 assert(inner == (*outer).end());
270
271 ++outer;
272 assert(outer == v.end());
273
274 return true;
275 }
276
main_test()277 constexpr bool main_test() {
278 using namespace std::string_view_literals;
279
280 char short_sep = ' ';
281 auto long_sep = "12"sv;
282
283 // One separator.
284 {
285 std::array expected = {"abc"sv, "def"sv};
286 test_one("abc def"sv, short_sep, expected);
287 test_one("abc12def"sv, long_sep, expected);
288 }
289
290 // Several separators in a row.
291 {
292 std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv};
293 test_one("abc def"sv, short_sep, expected);
294 test_one("abc12121212def"sv, long_sep, expected);
295 }
296
297 // Trailing separator.
298 {
299 std::array expected = {"abc"sv, "def"sv, ""sv};
300 test_one("abc def "sv, short_sep, expected);
301 test_one("abc12def12"sv, long_sep, expected);
302 }
303
304 // Leading separator.
305 {
306 std::array expected = {""sv, "abc"sv, "def"sv};
307 test_one(" abc def"sv, short_sep, expected);
308 test_one("12abc12def"sv, long_sep, expected);
309 }
310
311 // No separator.
312 {
313 std::array expected = {"abc"sv};
314 test_one("abc"sv, short_sep, expected);
315 test_one("abc"sv, long_sep, expected);
316 }
317
318 // Input consisting of a single separator.
319 {
320 std::array expected = {""sv, ""sv};
321 test_one(" "sv, short_sep, expected);
322 test_one("12"sv, long_sep, expected);
323 }
324
325 // Input consisting of only separators.
326 {
327 std::array expected = {""sv, ""sv, ""sv, ""sv};
328 test_one(" "sv, short_sep, expected);
329 test_one("121212"sv, long_sep, expected);
330 }
331
332 // The separator and the string use the same character only.
333 {
334 auto overlapping_sep = "aaa"sv;
335 std::array expected = {""sv, "aa"sv};
336 test_one("aaaaa"sv, overlapping_sep, expected);
337 }
338
339 // Many redundant separators.
340 {
341 std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv};
342 test_one(" abc def "sv, short_sep, expected);
343 test_one("1212abc121212def1212"sv, long_sep, expected);
344 }
345
346 // Separators after every character.
347 {
348 std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv};
349 test_one(" a b c "sv, short_sep, expected);
350 test_one("12a12b12c12"sv, long_sep, expected);
351 }
352
353 // Overlap between the separator and the string (see https://wg21.link/lwg3505).
354 {
355 auto overlapping_sep = "ab"sv;
356 std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv};
357 test_one("aabaaababb"sv, overlapping_sep, expected);
358 }
359
360 // Empty input.
361 {
362 std::array<std::string_view, 0> expected = {};
363 test_one(""sv, short_sep, expected);
364 test_one(""sv, long_sep, expected);
365 }
366
367 // Empty separator.
368 {
369 auto empty_sep = ""sv;
370 std::array expected = {"a"sv, "b"sv, "c"sv};
371 test_one("abc"sv, empty_sep, expected);
372 test_one("abc"sv, empty_sep, expected);
373 }
374
375 // Terminating null as a separator.
376 {
377 std::array expected = {"abc"sv, "def"sv};
378 test_one("abc\0def"sv, '\0', expected);
379 test_one("abc\0\0def"sv, "\0\0"sv, expected);
380 }
381
382 // Different character types.
383 {
384 // `char`.
385 test_function_call("abc def", ' ', std::array{"abc", "def"});
386 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
387 // `wchar_t`.
388 test_function_call(L"abc def", L' ', std::array{L"abc", L"def"});
389 #endif
390 // `char8_t`.
391 test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"});
392 // `char16_t`.
393 test_function_call(u"abc def", u' ', std::array{u"abc", u"def"});
394 // `char32_t`.
395 test_function_call(U"abc def", U' ', std::array{U"abc", U"def"});
396 }
397
398 // Non-character input.
399 {
400 std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}};
401 test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected);
402 test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected);
403 }
404
405 return true;
406 }
407
408 int main(int, char**) {
409 main_test();
410 static_assert(main_test());
411
412 test_string_literals();
413 static_assert(test_string_literals());
414
415 test_l_r_values();
416 static_assert(test_l_r_values());
417
418 test_string_literal_separator();
419 static_assert(test_string_literal_separator());
420
421 // Note: map is not `constexpr`, so this test is runtime-only.
422 test_nontrivial_characters();
423
424 return 0;
425 }
426