1*857a78c0SMark de Wever //===----------------------------------------------------------------------===//
2*857a78c0SMark de Wever // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3*857a78c0SMark de Wever // See https://llvm.org/LICENSE.txt for license information.
4*857a78c0SMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5*857a78c0SMark de Wever //
6*857a78c0SMark de Wever //===----------------------------------------------------------------------===//
7*857a78c0SMark de Wever 
8*857a78c0SMark de Wever // UNSUPPORTED: c++03, c++11, c++14, c++17
9*857a78c0SMark de Wever // UNSUPPORTED: libcpp-has-no-incomplete-format
10*857a78c0SMark de Wever 
11*857a78c0SMark de Wever // This version runs the test when the platform has Unicode support.
12*857a78c0SMark de Wever // UNSUPPORTED: libcpp-has-no-unicode
13*857a78c0SMark de Wever 
14*857a78c0SMark de Wever // <format>
15*857a78c0SMark de Wever 
16*857a78c0SMark de Wever // Tests the Unicode width support of the standard format specifiers.
17*857a78c0SMark de Wever // It tests [format.string.std]/8 - 11:
18*857a78c0SMark de Wever // - Properly determining the estimated with of a unicode string.
19*857a78c0SMark de Wever // - Properly truncating to the wanted maximum width.
20*857a78c0SMark de Wever 
21*857a78c0SMark de Wever // More specific extended grapheme cluster boundary rules are tested in
22*857a78c0SMark de Wever // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
23*857a78c0SMark de Wever // this test is based on test data provided by the Unicode Consortium.
24*857a78c0SMark de Wever 
25*857a78c0SMark de Wever #include <format>
26*857a78c0SMark de Wever #include <cassert>
27*857a78c0SMark de Wever #include <vector>
28*857a78c0SMark de Wever 
29*857a78c0SMark de Wever #include "make_string.h"
30*857a78c0SMark de Wever #include "test_macros.h"
31*857a78c0SMark de Wever #include "string_literal.h"
32*857a78c0SMark de Wever 
33*857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
34*857a78c0SMark de Wever #  include <iostream>
35*857a78c0SMark de Wever #  include <type_traits>
36*857a78c0SMark de Wever #endif
37*857a78c0SMark de Wever 
38*857a78c0SMark de Wever #define SV(S) MAKE_STRING_VIEW(CharT, S)
39*857a78c0SMark de Wever 
40*857a78c0SMark de Wever auto check = []<string_literal fmt, class CharT, class... Args>(
41*857a78c0SMark de Wever     std::basic_string_view<CharT> expected, const Args&... args) constexpr {
42*857a78c0SMark de Wever   std::basic_string<CharT> out = std::format(fmt.template sv<CharT>(), args...);
43*857a78c0SMark de Wever #ifndef TEST_HAS_NO_LOCALIZATION
44*857a78c0SMark de Wever   if constexpr (std::same_as<CharT, char>)
45*857a78c0SMark de Wever     if (out != expected)
46*857a78c0SMark de Wever       std::cerr << "\nFormat string   " << fmt.template sv<char>() << "\nExpected output " << expected
47*857a78c0SMark de Wever                 << "\nActual output   " << out << '\n';
48*857a78c0SMark de Wever #endif
49*857a78c0SMark de Wever   assert(out == expected);
50*857a78c0SMark de Wever };
51*857a78c0SMark de Wever 
52*857a78c0SMark de Wever template <class CharT>
test_single_code_point_fill()53*857a78c0SMark de Wever static void test_single_code_point_fill() {
54*857a78c0SMark de Wever   //*** 1-byte code points ***
55*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("* *"), SV(" "));
56*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*~*"), SV("~"));
57*857a78c0SMark de Wever 
58*857a78c0SMark de Wever   //*** 2-byte code points ***
59*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
60*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN
61*857a78c0SMark de Wever 
62*857a78c0SMark de Wever   //*** 3-byte code points ***
63*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF
64*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER
65*857a78c0SMark de Wever 
66*857a78c0SMark de Wever   // 2 column ranges
67*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u1100*"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
68*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u115f*"), SV("\u115f")); // HANGUL CHOSEONG FILLER
69*857a78c0SMark de Wever 
70*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u2329*"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
71*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u232a*"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
72*857a78c0SMark de Wever 
73*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u2e80*"), SV("\u2e80")); // CJK RADICAL REPEAT
74*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u303e*"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
75*857a78c0SMark de Wever 
76*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u3040*"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A
77*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ua4cf*"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA
78*857a78c0SMark de Wever 
79*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uac00*"), SV("\uac00")); // <Hangul Syllable, First>
80*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ud7a3*"), SV("\ud7a3")); // Hangul Syllable Hih
81*857a78c0SMark de Wever 
82*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uf900*"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
83*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ufaff*"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
84*857a78c0SMark de Wever 
85*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ufe10*"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
86*857a78c0SMark de Wever   check.template
87*857a78c0SMark de Wever   operator()<"{:*^4}">(SV("*\ufe19*"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
88*857a78c0SMark de Wever 
89*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ufe30*"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
90*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\ufe6f*"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
91*857a78c0SMark de Wever 
92*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uff00*"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK
93*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uff60*"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
94*857a78c0SMark de Wever 
95*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uffe0*"), SV("\uffe0")); // FULLWIDTH CENT SIGN
96*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\uffe6*"), SV("\uffe6")); // FULLWIDTH WON SIGN
97*857a78c0SMark de Wever 
98*857a78c0SMark de Wever   //*** 4-byte code points ***
99*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
100*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character
101*857a78c0SMark de Wever 
102*857a78c0SMark de Wever   // 2 column ranges
103*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0001f300*"), SV("\U0001f300")); // CYCLONE
104*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0001f64f*"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
105*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0001f900*"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
106*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0001f9ff*"), SV("\U0001f9ff")); // NAZAR AMULET
107*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U00020000*"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
108*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0002fffd*"), SV("\U0002fffd")); // Undefined Character
109*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U00030000*"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
110*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0003fffd*"), SV("\U0003fffd")); // Undefined Character
111*857a78c0SMark de Wever }
112*857a78c0SMark de Wever 
113*857a78c0SMark de Wever // One column output is unaffected.
114*857a78c0SMark de Wever // Two column output is removed, thus the result is only the fill character.
115*857a78c0SMark de Wever template <class CharT>
test_single_code_point_truncate()116*857a78c0SMark de Wever static void test_single_code_point_truncate() {
117*857a78c0SMark de Wever   //*** 1-byte code points ***
118*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("* *"), SV(" "));
119*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*~*"), SV("~"));
120*857a78c0SMark de Wever 
121*857a78c0SMark de Wever   //*** 2-byte code points ***
122*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
123*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN
124*857a78c0SMark de Wever 
125*857a78c0SMark de Wever   //*** 3.1-byte code points ***
126*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF
127*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER
128*857a78c0SMark de Wever 
129*857a78c0SMark de Wever   // 2 column ranges
130*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
131*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u115f")); // HANGUL CHOSEONG FILLER
132*857a78c0SMark de Wever 
133*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
134*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
135*857a78c0SMark de Wever 
136*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2e80")); // CJK RADICAL REPEAT
137*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
138*857a78c0SMark de Wever 
139*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A
140*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA
141*857a78c0SMark de Wever 
142*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uac00")); // <Hangul Syllable, First>
143*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ud7a3")); // Hangul Syllable Hih
144*857a78c0SMark de Wever 
145*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
146*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
147*857a78c0SMark de Wever 
148*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
149*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
150*857a78c0SMark de Wever 
151*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
152*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
153*857a78c0SMark de Wever 
154*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK
155*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
156*857a78c0SMark de Wever 
157*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe0")); // FULLWIDTH CENT SIGN
158*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe6")); // FULLWIDTH WON SIGN
159*857a78c0SMark de Wever 
160*857a78c0SMark de Wever   //*** 3.1-byte code points ***
161*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
162*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character
163*857a78c0SMark de Wever 
164*857a78c0SMark de Wever   // 2 column ranges
165*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f300")); // CYCLONE
166*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
167*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
168*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f9ff")); // NAZAR AMULET
169*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
170*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0002fffd")); // Undefined Character
171*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
172*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0003fffd")); // Undefined Character
173*857a78c0SMark de Wever }
174*857a78c0SMark de Wever 
175*857a78c0SMark de Wever // The examples used in that paper.
176*857a78c0SMark de Wever template <class CharT>
test_P1868()177*857a78c0SMark de Wever static void test_P1868() {
178*857a78c0SMark de Wever   // Fill
179*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
180*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
181*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(
182*857a78c0SMark de Wever       SV("*\u0041\u0301*"),
183*857a78c0SMark de Wever       SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
184*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
185*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
186*857a78c0SMark de Wever 
187*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0429*"), SV("\u0429"));         // { CYRILLIC CAPITAL LETTER SHCHA }
188*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u05d0*"), SV("\u05d0"));         // { HEBREW LETTER ALEF }
189*857a78c0SMark de Wever   check.template operator()<"{:*^3}">(SV("*\u0634*"), SV("\u0634"));         // { ARABIC LETTER SHEEN }
190*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u3009*"), SV("\u3009"));         // { RIGHT-POINTING ANGLE BRACKET }
191*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\u754c*"), SV("\u754c"));         // { CJK Unified Ideograph-754C }
192*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE }
193*857a78c0SMark de Wever   check.template operator()<"{:*^4}">(
194*857a78c0SMark de Wever       SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
195*857a78c0SMark de Wever       SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
196*857a78c0SMark de Wever 
197*857a78c0SMark de Wever   // Truncate to 1 column: 1 column grapheme clusters are kept together.
198*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
199*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
200*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(
201*857a78c0SMark de Wever       SV("*\u0041\u0301*"),
202*857a78c0SMark de Wever       SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
203*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
204*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
205*857a78c0SMark de Wever 
206*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
207*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF }
208*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN }
209*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3009"));      // { RIGHT-POINTING ANGLE BRACKET }
210*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\u754c"));      // { CJK Unified Ideograph-754C }
211*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f921"));  // { UNICORN FACE }
212*857a78c0SMark de Wever   check.template operator()<"{:*^3.1}">(
213*857a78c0SMark de Wever       SV("***"),
214*857a78c0SMark de Wever       SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
215*857a78c0SMark de Wever 
216*857a78c0SMark de Wever   // Truncate to 2 column: 2 column grapheme clusters are kept together.
217*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
218*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
219*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(
220*857a78c0SMark de Wever       SV("*\u0041\u0301*"),
221*857a78c0SMark de Wever       SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
222*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
223*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
224*857a78c0SMark de Wever 
225*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u0429*"), SV("\u0429"));         // { CYRILLIC CAPITAL LETTER SHCHA }
226*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u05d0*"), SV("\u05d0"));         // { HEBREW LETTER ALEF }
227*857a78c0SMark de Wever   check.template operator()<"{:*^3.2}">(SV("*\u0634*"), SV("\u0634"));         // { ARABIC LETTER SHEEN }
228*857a78c0SMark de Wever   check.template operator()<"{:*^4.2}">(SV("*\u3009*"), SV("\u3009"));         // { RIGHT-POINTING ANGLE BRACKET }
229*857a78c0SMark de Wever   check.template operator()<"{:*^4.2}">(SV("*\u754c*"), SV("\u754c"));         // { CJK Unified Ideograph-754C }
230*857a78c0SMark de Wever   check.template operator()<"{:*^4.2}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE }
231*857a78c0SMark de Wever   check.template operator()<"{:*^4.2}">(
232*857a78c0SMark de Wever       SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
233*857a78c0SMark de Wever       SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
234*857a78c0SMark de Wever }
235*857a78c0SMark de Wever 
236*857a78c0SMark de Wever #ifdef _LIBCPP_VERSION
237*857a78c0SMark de Wever // Tests the libc++ specific behaviour for malformed UTF-sequences. The
238*857a78c0SMark de Wever // Standard doesn't specify how to handle this.
239*857a78c0SMark de Wever template <class CharT>
test_malformed_code_point()240*857a78c0SMark de Wever static void test_malformed_code_point() {
241*857a78c0SMark de Wever   if constexpr (sizeof(CharT) == 1) {
242*857a78c0SMark de Wever     // Malformed at end.
243*857a78c0SMark de Wever     check.template operator()<"{:*^7}">(SV("*ZZZZ\x8f*"), SV("ZZZZ\x8f"));
244*857a78c0SMark de Wever     check.template operator()<"{:*^7}">(SV("*ZZZZ\xcf*"), SV("ZZZZ\xcf"));
245*857a78c0SMark de Wever     check.template operator()<"{:*^7}">(SV("*ZZZZ\xef*"), SV("ZZZZ\xef"));
246*857a78c0SMark de Wever     check.template operator()<"{:*^7}">(SV("*ZZZZ\xff*"), SV("ZZZZ\xff"));
247*857a78c0SMark de Wever 
248*857a78c0SMark de Wever     // Malformed in middle, no continuation
249*857a78c0SMark de Wever     check.template operator()<"{:*^8}">(SV("*ZZZZ\x8fZ*"), SV("ZZZZ\x8fZ"));
250*857a78c0SMark de Wever     check.template operator()<"{:*^8}">(SV("*ZZZZ\xcfZ*"), SV("ZZZZ\xcfZ"));
251*857a78c0SMark de Wever     check.template operator()<"{:*^8}">(SV("*ZZZZ\xefZ*"), SV("ZZZZ\xefZ"));
252*857a78c0SMark de Wever     check.template operator()<"{:*^8}">(SV("*ZZZZ\xffZ*"), SV("ZZZZ\xffZ"));
253*857a78c0SMark de Wever 
254*857a78c0SMark de Wever     check.template operator()<"{:*^9}">(SV("*ZZZZ\x8fZZ*"), SV("ZZZZ\x8fZZ"));
255*857a78c0SMark de Wever     check.template operator()<"{:*^9}">(SV("*ZZZZ\xcfZZ*"), SV("ZZZZ\xcfZZ"));
256*857a78c0SMark de Wever     check.template operator()<"{:*^9}">(SV("*ZZZZ\xefZZ*"), SV("ZZZZ\xefZZ"));
257*857a78c0SMark de Wever     check.template operator()<"{:*^9}">(SV("*ZZZZ\xffZZ*"), SV("ZZZZ\xffZZ"));
258*857a78c0SMark de Wever 
259*857a78c0SMark de Wever     check.template operator()<"{:*^10}">(SV("*ZZZZ\x8fZZZ*"), SV("ZZZZ\x8fZZZ"));
260*857a78c0SMark de Wever     check.template operator()<"{:*^10}">(SV("*ZZZZ\xcfZZZ*"), SV("ZZZZ\xcfZZZ"));
261*857a78c0SMark de Wever     check.template operator()<"{:*^10}">(SV("*ZZZZ\xefZZZ*"), SV("ZZZZ\xefZZZ"));
262*857a78c0SMark de Wever     check.template operator()<"{:*^10}">(SV("*ZZZZ\xffZZZ*"), SV("ZZZZ\xffZZZ"));
263*857a78c0SMark de Wever 
264*857a78c0SMark de Wever     check.template operator()<"{:*^11}">(SV("*ZZZZ\x8fZZZZ*"), SV("ZZZZ\x8fZZZZ"));
265*857a78c0SMark de Wever     check.template operator()<"{:*^11}">(SV("*ZZZZ\xcfZZZZ*"), SV("ZZZZ\xcfZZZZ"));
266*857a78c0SMark de Wever     check.template operator()<"{:*^11}">(SV("*ZZZZ\xefZZZZ*"), SV("ZZZZ\xefZZZZ"));
267*857a78c0SMark de Wever     check.template operator()<"{:*^11}">(SV("*ZZZZ\xffZZZZ*"), SV("ZZZZ\xffZZZZ"));
268*857a78c0SMark de Wever 
269*857a78c0SMark de Wever     // Premature end.
270*857a78c0SMark de Wever     check.template operator()<"{:*^8}">(SV("*ZZZZ\xef\xf5*"), SV("ZZZZ\xef\xf5"));
271*857a78c0SMark de Wever     check.template operator()<"{:*^12}">(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("ZZZZ\xef\xf5ZZZZ"));
272*857a78c0SMark de Wever     check.template operator()<"{:*^9}">(SV("*ZZZZ\xff\xf5\xf5*"), SV("ZZZZ\xff\xf5\xf5"));
273*857a78c0SMark de Wever     check.template operator()<"{:*^13}">(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("ZZZZ\xff\xf5\xf5ZZZZ"));
274*857a78c0SMark de Wever 
275*857a78c0SMark de Wever   } else if constexpr (sizeof(CharT) == 2) {
276*857a78c0SMark de Wever     // TODO FMT Add these tests.
277*857a78c0SMark de Wever   }
278*857a78c0SMark de Wever   // UTF-32 doesn't combine characters, thus no corruption tests.
279*857a78c0SMark de Wever }
280*857a78c0SMark de Wever #endif
281*857a78c0SMark de Wever 
282*857a78c0SMark de Wever template <class CharT>
test()283*857a78c0SMark de Wever static void test() {
284*857a78c0SMark de Wever   test_single_code_point_fill<CharT>();
285*857a78c0SMark de Wever   test_single_code_point_truncate<CharT>();
286*857a78c0SMark de Wever   test_P1868<CharT>();
287*857a78c0SMark de Wever 
288*857a78c0SMark de Wever #ifdef _LIBCPP_VERSION
289*857a78c0SMark de Wever   test_malformed_code_point<CharT>();
290*857a78c0SMark de Wever #endif
291*857a78c0SMark de Wever }
292*857a78c0SMark de Wever 
main(int,char **)293*857a78c0SMark de Wever int main(int, char**) {
294*857a78c0SMark de Wever   test<char>();
295*857a78c0SMark de Wever 
296*857a78c0SMark de Wever #ifndef TEST_HAS_NO_WIDE_CHARACTERS
297*857a78c0SMark de Wever   test<wchar_t>();
298*857a78c0SMark de Wever #endif
299*857a78c0SMark de Wever 
300*857a78c0SMark de Wever   return 0;
301*857a78c0SMark de Wever }
302