1 //===----------------------------------------------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 
8 // UNSUPPORTED: c++03, c++11, c++14, c++17
9 // UNSUPPORTED: libcpp-has-no-incomplete-format
10 // TODO FMT Fix this test using GCC, it currently times out.
11 // UNSUPPORTED: gcc-12
12 
13 // <format>
14 
15 // Tests the implementation of the extended grapheme cluster boundaries per
16 // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
17 //
18 // The tests are based on the test data provided by Unicode
19 // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
20 
21 #include <cassert>
22 #include <format>
23 #include <functional>
24 #include <numeric>
25 
26 #include "extended_grapheme_cluster.h"
27 
28 // Validates whether the number of code points in our "database" matches with
29 // the number in the Unicode. The assumption is when the number of items per
30 // property matches the code points themselves also match.
31 namespace {
32 namespace cluster = std::__extended_grapheme_custer_property_boundary;
count_entries(cluster::__property property)33 constexpr int count_entries(cluster::__property property) {
34   return std::transform_reduce(
35       std::begin(cluster::__entries), std::end(cluster::__entries), 0, std::plus{}, [property](auto entry) {
36         if (static_cast<cluster::__property>(entry & 0xf) != property)
37           return 0;
38 
39         return 1 + static_cast<int>((entry >> 4) & 0x7f);
40       });
41 }
42 
43 static_assert(count_entries(cluster::__property::__Prepend) == 26);
44 static_assert(count_entries(cluster::__property::__CR) == 1);
45 static_assert(count_entries(cluster::__property::__LF) == 1);
46 static_assert(count_entries(cluster::__property::__Control) == 3886);
47 static_assert(count_entries(cluster::__property::__Extend) == 2095);
48 static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26);
49 static_assert(count_entries(cluster::__property::__SpacingMark) == 388);
50 static_assert(count_entries(cluster::__property::__L) == 125);
51 static_assert(count_entries(cluster::__property::__V) == 95);
52 static_assert(count_entries(cluster::__property::__T) == 137);
53 static_assert(count_entries(cluster::__property::__LV) == 399);
54 static_assert(count_entries(cluster::__property::__LVT) == 10773);
55 static_assert(count_entries(cluster::__property::__ZWJ) == 1);
56 static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3537);
57 
58 } // namespace
59 
60 template <class Data>
test(const Data & data)61 constexpr void test(const Data& data) {
62   for (const auto& d : data) {
63     assert(d.code_points.size() == d.breaks.size());
64 
65     std::__unicode::__extended_grapheme_cluster_view view{d.input.data(), d.input.data() + d.input.size()};
66     for (size_t i = 0; i < d.breaks.size(); ++i) {
67       auto r = view.__consume();
68       assert(r.__code_point_ == d.code_points[i]);
69       assert(r.__last_ == d.input.data() + d.breaks[i]);
70     }
71   }
72 }
73 
test()74 constexpr bool test() {
75   test(data_utf8);
76 
77 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
78   if constexpr (sizeof(wchar_t) == 2)
79     test(data_utf16);
80   else
81     test(data_utf32);
82 #endif
83 
84   return true;
85 }
86 
main(int,char **)87 int main(int, char**) {
88   test();
89   // static_assert(test());
90 
91   return 0;
92 }
93