1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // UNSUPPORTED: c++03, c++11, c++14, c++17 10 // UNSUPPORTED: libcpp-has-no-incomplete-ranges 11 12 // template <class View, class Pattern> 13 // class std::ranges::lazy_split_view; 14 // 15 // These test check the output `lazy_split_view` produces for a variety of inputs, including many corner cases, with no 16 // restrictions on which member functions can be called. 17 18 #include <ranges> 19 20 #include <algorithm> 21 #include <array> 22 #include <cassert> 23 #include <map> 24 #include <string> 25 #include <string_view> 26 #include <utility> 27 #include <vector> 28 #include "types.h" 29 30 // A constexpr-friendly lightweight string, primarily useful for comparisons. 31 // Unlike `std::string_view`, it copies the given string into an 32 // internal buffer and can work with non-contiguous inputs. 33 template <class Char> 34 class BasicSmallString { 35 std::basic_string<Char> buffer_{}; 36 37 public: 38 constexpr BasicSmallString(std::basic_string_view<Char> v) : buffer_(v) {} 39 40 template <class I, class S> 41 constexpr BasicSmallString(I b, const S& e) { 42 for (; b != e; ++b) { 43 buffer_ += *b; 44 } 45 } 46 47 template <std::ranges::range R> 48 constexpr BasicSmallString(R&& from) : BasicSmallString(from.begin(), from.end()) {} 49 50 friend constexpr bool operator==(const BasicSmallString& lhs, const BasicSmallString& rhs) { 51 return lhs.buffer_ == rhs.buffer_; 52 } 53 }; 54 55 template <std::ranges::view View, std::ranges::range Expected> 56 constexpr bool is_equal(View& view, const Expected& expected) { 57 using Char = std::ranges::range_value_t<std::ranges::range_value_t<View>>; 58 using Str = BasicSmallString<Char>; 59 60 auto actual_it = view.begin(); 61 auto expected_it = expected.begin(); 62 for (; actual_it != view.end() && expected_it != expected.end(); ++actual_it, ++expected_it) { 63 if (Str(*actual_it) != Str(*expected_it)) 64 return false; 65 } 66 67 return actual_it == view.end() && expected_it == expected.end(); 68 } 69 70 template <class T, class Separator, class U, size_t M> 71 constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) { 72 std::ranges::lazy_split_view v(input, separator); 73 return is_equal(v, expected); 74 } 75 76 template <class T, class Separator, class U, size_t M> 77 constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) { 78 auto expected_it = expected.begin(); 79 for (auto e : input | std::ranges::views::lazy_split(separator)) { 80 if (expected_it == expected.end()) 81 return false; 82 if (!std::ranges::equal(e, *expected_it)) 83 return false; 84 85 ++expected_it; 86 } 87 88 return expected_it == expected.end(); 89 } 90 91 constexpr bool test_l_r_values() { 92 using namespace std::string_view_literals; 93 94 // Both lvalues and rvalues can be used as input. 95 { 96 // Lvalues. 97 { 98 auto input = "abc"sv; 99 auto sep = " "sv; 100 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep); 101 } 102 103 // Const lvalues. 104 { 105 const auto input = "abc"sv; 106 const auto sep = " "sv; 107 [[maybe_unused]] std::ranges::lazy_split_view v(input, sep); 108 } 109 110 // Rvalues. 111 { 112 auto input = "abc"sv; 113 auto sep = " "sv; 114 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep)); 115 } 116 117 // Const rvalues. 118 { 119 const auto input = "abc"sv; 120 const auto sep = " "sv; 121 [[maybe_unused]] std::ranges::lazy_split_view v(std::move(input), std::move(sep)); 122 } 123 } 124 125 return true; 126 } 127 128 constexpr bool test_string_literal_separator() { 129 using namespace std::string_view_literals; 130 131 // Splitting works as expected when the separator is a single character literal. 132 { 133 std::ranges::lazy_split_view v("abc def"sv, ' '); 134 assert(is_equal(v, std::array{"abc"sv, "def"sv})); 135 } 136 137 // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is 138 // because of the implicit terminating null in the literal. 139 { 140 std::ranges::lazy_split_view v("abc def"sv, " "); 141 assert(is_equal(v, std::array{"abc def"sv})); 142 } 143 144 // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`. 145 // Should the input string contain that two-character sequence, the separator would match. 146 { 147 std::ranges::lazy_split_view v("abc \0def"sv, " "); 148 assert(is_equal(v, std::array{"abc"sv, "def"sv})); 149 } 150 151 return true; 152 } 153 154 // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see 155 // below). 156 template <class T> 157 constexpr std::string_view sv(T&& str) { 158 return std::string_view(str); 159 }; 160 161 template <class T, class Separator, class U, size_t M> 162 constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) { 163 assert(test_function_call(input, separator, expected)); 164 assert(test_with_piping(input, separator, expected)); 165 166 // In addition to the `(ForwardView, ForwardView)` case, test the `(ForwardView, tiny-range)` and `(InputView, 167 // tiny-range)` cases (all of which have unique code paths). 168 if constexpr (std::is_same_v<std::remove_reference_t<Separator>, char>) { 169 assert(test_function_call(CopyableView(input), ForwardTinyView(separator), expected)); 170 assert(test_with_piping(CopyableView(input), ForwardTinyView(separator), expected)); 171 172 assert(test_function_call(InputView(input), ForwardTinyView(separator), expected)); 173 assert(test_with_piping(InputView(input), ForwardTinyView(separator), expected)); 174 } 175 } 176 177 constexpr bool test_string_literals() { 178 // These tests show characteristic examples of how using string literals with `lazy_split_view` produces unexpected 179 // results due to the implicit terminating null that is treated as part of the range. 180 181 using namespace std::string_view_literals; 182 183 char short_sep = ' '; 184 auto long_sep = "12"sv; 185 186 // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from 187 // the original range). 188 { 189 std::array expected = {"abc"sv, std::string_view("def", sizeof("def"))}; 190 191 assert(test_function_call("abc def", short_sep, expected)); 192 assert(test_with_piping("abc def", short_sep, expected)); 193 assert(test_function_call("abc12def", long_sep, expected)); 194 assert(test_with_piping("abc12def", long_sep, expected)); 195 } 196 197 // Empty string. 198 { 199 // Because an empty string literal contains an implicit terminating null, the output will contain one segment. 200 std::array expected = {std::string_view("", 1)}; 201 202 assert(test_function_call("", short_sep, expected)); 203 assert(test_with_piping("", short_sep, expected)); 204 assert(test_function_call("", long_sep, expected)); 205 assert(test_with_piping("", long_sep, expected)); 206 } 207 208 // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "` 209 // are treated differently due to the presence of an implicit `\0` in the latter. 210 { 211 const char input[] = "abc def"; 212 std::array expected_unsplit = {std::string_view(input, sizeof(input))}; 213 std::array expected_split = {"abc"sv, std::string_view("def", sizeof("def"))}; 214 215 assert(test_function_call(input, " ", expected_unsplit)); 216 assert(test_function_call("abc \0def", " ", expected_split)); 217 // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`. 218 } 219 220 // Empty separator. 221 { 222 auto empty_sep = ""sv; 223 std::array expected = {"a"sv, "b"sv, "c"sv, "\0"sv}; 224 225 assert(test_function_call("abc", empty_sep, expected)); 226 assert(test_with_piping("abc", empty_sep, expected)); 227 } 228 229 return true; 230 } 231 232 bool test_nontrivial_characters() { 233 // Try a deliberately heavyweight "character" type to see if it triggers any corner cases. 234 235 using Map = std::map<std::string, int>; 236 using Vec = std::vector<Map>; 237 238 Map sep = {{"yyy", 999}}; 239 Map m1 = { 240 {"a", 1}, 241 {"bc", 2}, 242 }; 243 Map m2 = { 244 {"def", 3}, 245 }; 246 Map m3 = { 247 {"g", 4}, 248 {"hijk", 5}, 249 }; 250 251 Vec expected1 = {m1, m2}; 252 Vec expected2 = {m3}; 253 254 std::ranges::lazy_split_view v(Vec{m1, m2, sep, m3}, sep); 255 256 // Segment 1: {m1, m2} 257 auto outer = v.begin(); 258 assert(outer != v.end()); 259 auto inner = (*outer).begin(); 260 assert(*inner++ == m1); 261 assert(*inner++ == m2); 262 assert(inner == (*outer).end()); 263 264 // Segment 2: {m3} 265 ++outer; 266 assert(outer != v.end()); 267 inner = (*outer).begin(); 268 assert(*inner++ == m3); 269 assert(inner == (*outer).end()); 270 271 ++outer; 272 assert(outer == v.end()); 273 274 return true; 275 } 276 277 constexpr bool main_test() { 278 using namespace std::string_view_literals; 279 280 char short_sep = ' '; 281 auto long_sep = "12"sv; 282 283 // One separator. 284 { 285 std::array expected = {"abc"sv, "def"sv}; 286 test_one("abc def"sv, short_sep, expected); 287 test_one("abc12def"sv, long_sep, expected); 288 } 289 290 // Several separators in a row. 291 { 292 std::array expected = {"abc"sv, ""sv, ""sv, ""sv, "def"sv}; 293 test_one("abc def"sv, short_sep, expected); 294 test_one("abc12121212def"sv, long_sep, expected); 295 } 296 297 // Trailing separator. 298 { 299 std::array expected = {"abc"sv, "def"sv, ""sv}; 300 test_one("abc def "sv, short_sep, expected); 301 test_one("abc12def12"sv, long_sep, expected); 302 } 303 304 // Leading separator. 305 { 306 std::array expected = {""sv, "abc"sv, "def"sv}; 307 test_one(" abc def"sv, short_sep, expected); 308 test_one("12abc12def"sv, long_sep, expected); 309 } 310 311 // No separator. 312 { 313 std::array expected = {"abc"sv}; 314 test_one("abc"sv, short_sep, expected); 315 test_one("abc"sv, long_sep, expected); 316 } 317 318 // Input consisting of a single separator. 319 { 320 std::array expected = {""sv, ""sv}; 321 test_one(" "sv, short_sep, expected); 322 test_one("12"sv, long_sep, expected); 323 } 324 325 // Input consisting of only separators. 326 { 327 std::array expected = {""sv, ""sv, ""sv, ""sv}; 328 test_one(" "sv, short_sep, expected); 329 test_one("121212"sv, long_sep, expected); 330 } 331 332 // The separator and the string use the same character only. 333 { 334 auto overlapping_sep = "aaa"sv; 335 std::array expected = {""sv, "aa"sv}; 336 test_one("aaaaa"sv, overlapping_sep, expected); 337 } 338 339 // Many redundant separators. 340 { 341 std::array expected = {""sv, ""sv, "abc"sv, ""sv, ""sv, "def"sv, ""sv, ""sv}; 342 test_one(" abc def "sv, short_sep, expected); 343 test_one("1212abc121212def1212"sv, long_sep, expected); 344 } 345 346 // Separators after every character. 347 { 348 std::array expected = {""sv, "a"sv, "b"sv, "c"sv, ""sv}; 349 test_one(" a b c "sv, short_sep, expected); 350 test_one("12a12b12c12"sv, long_sep, expected); 351 } 352 353 // Overlap between the separator and the string (see https://wg21.link/lwg3505). 354 { 355 auto overlapping_sep = "ab"sv; 356 std::array expected = {"a"sv, "aa"sv, ""sv, "b"sv}; 357 test_one("aabaaababb"sv, overlapping_sep, expected); 358 } 359 360 // Empty input. 361 { 362 std::array<std::string_view, 0> expected = {}; 363 test_one(""sv, short_sep, expected); 364 test_one(""sv, long_sep, expected); 365 } 366 367 // Empty separator. 368 { 369 auto empty_sep = ""sv; 370 std::array expected = {"a"sv, "b"sv, "c"sv}; 371 test_one("abc"sv, empty_sep, expected); 372 test_one("abc"sv, empty_sep, expected); 373 } 374 375 // Terminating null as a separator. 376 { 377 std::array expected = {"abc"sv, "def"sv}; 378 test_one("abc\0def"sv, '\0', expected); 379 test_one("abc\0\0def"sv, "\0\0"sv, expected); 380 } 381 382 // Different character types. 383 { 384 // `char`. 385 test_function_call("abc def", ' ', std::array{"abc", "def"}); 386 #ifndef TEST_HAS_NO_WIDE_CHARACTERS 387 // `wchar_t`. 388 test_function_call(L"abc def", L' ', std::array{L"abc", L"def"}); 389 #endif 390 // `char8_t`. 391 test_function_call(u8"abc def", u8' ', std::array{u8"abc", u8"def"}); 392 // `char16_t`. 393 test_function_call(u"abc def", u' ', std::array{u"abc", u"def"}); 394 // `char32_t`. 395 test_function_call(U"abc def", U' ', std::array{U"abc", U"def"}); 396 } 397 398 // Non-character input. 399 { 400 std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}}; 401 test_one(std::array{1, 2, 3, 0, 4, 5, 6}, 0, expected); 402 test_one(std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, std::array{0, 0, 0}, expected); 403 } 404 405 return true; 406 } 407 408 int main(int, char**) { 409 main_test(); 410 static_assert(main_test()); 411 412 test_string_literals(); 413 static_assert(test_string_literals()); 414 415 test_l_r_values(); 416 static_assert(test_l_r_values()); 417 418 test_string_literal_separator(); 419 static_assert(test_string_literal_separator()); 420 421 // Note: map is not `constexpr`, so this test is runtime-only. 422 test_nontrivial_characters(); 423 424 return 0; 425 } 426