1 //===----------------------------------------------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7
8 // UNSUPPORTED: c++03, c++11, c++14, c++17
9 // UNSUPPORTED: libcpp-has-no-incomplete-format
10
11 // This version runs the test when the platform has Unicode support.
12 // UNSUPPORTED: libcpp-has-no-unicode
13
14 // <format>
15
16 // Tests the Unicode width support of the standard format specifiers.
17 // It tests [format.string.std]/8 - 11:
18 // - Properly determining the estimated with of a unicode string.
19 // - Properly truncating to the wanted maximum width.
20
21 // More specific extended grapheme cluster boundary rules are tested in
22 // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
23 // this test is based on test data provided by the Unicode Consortium.
24
25 #include <format>
26 #include <cassert>
27 #include <vector>
28
29 #include "make_string.h"
30 #include "test_macros.h"
31 #include "string_literal.h"
32
33 #ifndef TEST_HAS_NO_LOCALIZATION
34 # include <iostream>
35 # include <type_traits>
36 #endif
37
38 #define SV(S) MAKE_STRING_VIEW(CharT, S)
39
40 auto check = []<string_literal fmt, class CharT, class... Args>(
41 std::basic_string_view<CharT> expected, const Args&... args) constexpr {
42 std::basic_string<CharT> out = std::format(fmt.template sv<CharT>(), args...);
43 #ifndef TEST_HAS_NO_LOCALIZATION
44 if constexpr (std::same_as<CharT, char>)
45 if (out != expected)
46 std::cerr << "\nFormat string " << fmt.template sv<char>() << "\nExpected output " << expected
47 << "\nActual output " << out << '\n';
48 #endif
49 assert(out == expected);
50 };
51
52 template <class CharT>
test_single_code_point_fill()53 static void test_single_code_point_fill() {
54 //*** 1-byte code points ***
55 check.template operator()<"{:*^3}">(SV("* *"), SV(" "));
56 check.template operator()<"{:*^3}">(SV("*~*"), SV("~"));
57
58 //*** 2-byte code points ***
59 check.template operator()<"{:*^3}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
60 check.template operator()<"{:*^3}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN
61
62 //*** 3-byte code points ***
63 check.template operator()<"{:*^3}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF
64 check.template operator()<"{:*^3}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER
65
66 // 2 column ranges
67 check.template operator()<"{:*^4}">(SV("*\u1100*"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
68 check.template operator()<"{:*^4}">(SV("*\u115f*"), SV("\u115f")); // HANGUL CHOSEONG FILLER
69
70 check.template operator()<"{:*^4}">(SV("*\u2329*"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
71 check.template operator()<"{:*^4}">(SV("*\u232a*"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
72
73 check.template operator()<"{:*^4}">(SV("*\u2e80*"), SV("\u2e80")); // CJK RADICAL REPEAT
74 check.template operator()<"{:*^4}">(SV("*\u303e*"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
75
76 check.template operator()<"{:*^4}">(SV("*\u3040*"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A
77 check.template operator()<"{:*^4}">(SV("*\ua4cf*"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA
78
79 check.template operator()<"{:*^4}">(SV("*\uac00*"), SV("\uac00")); // <Hangul Syllable, First>
80 check.template operator()<"{:*^4}">(SV("*\ud7a3*"), SV("\ud7a3")); // Hangul Syllable Hih
81
82 check.template operator()<"{:*^4}">(SV("*\uf900*"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
83 check.template operator()<"{:*^4}">(SV("*\ufaff*"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
84
85 check.template operator()<"{:*^4}">(SV("*\ufe10*"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
86 check.template
87 operator()<"{:*^4}">(SV("*\ufe19*"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
88
89 check.template operator()<"{:*^4}">(SV("*\ufe30*"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
90 check.template operator()<"{:*^4}">(SV("*\ufe6f*"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
91
92 check.template operator()<"{:*^4}">(SV("*\uff00*"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK
93 check.template operator()<"{:*^4}">(SV("*\uff60*"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
94
95 check.template operator()<"{:*^4}">(SV("*\uffe0*"), SV("\uffe0")); // FULLWIDTH CENT SIGN
96 check.template operator()<"{:*^4}">(SV("*\uffe6*"), SV("\uffe6")); // FULLWIDTH WON SIGN
97
98 //*** 4-byte code points ***
99 check.template operator()<"{:*^3}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
100 check.template operator()<"{:*^3}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character
101
102 // 2 column ranges
103 check.template operator()<"{:*^4}">(SV("*\U0001f300*"), SV("\U0001f300")); // CYCLONE
104 check.template operator()<"{:*^4}">(SV("*\U0001f64f*"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
105 check.template operator()<"{:*^4}">(SV("*\U0001f900*"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
106 check.template operator()<"{:*^4}">(SV("*\U0001f9ff*"), SV("\U0001f9ff")); // NAZAR AMULET
107 check.template operator()<"{:*^4}">(SV("*\U00020000*"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
108 check.template operator()<"{:*^4}">(SV("*\U0002fffd*"), SV("\U0002fffd")); // Undefined Character
109 check.template operator()<"{:*^4}">(SV("*\U00030000*"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
110 check.template operator()<"{:*^4}">(SV("*\U0003fffd*"), SV("\U0003fffd")); // Undefined Character
111 }
112
113 // One column output is unaffected.
114 // Two column output is removed, thus the result is only the fill character.
115 template <class CharT>
test_single_code_point_truncate()116 static void test_single_code_point_truncate() {
117 //*** 1-byte code points ***
118 check.template operator()<"{:*^3.1}">(SV("* *"), SV(" "));
119 check.template operator()<"{:*^3.1}">(SV("*~*"), SV("~"));
120
121 //*** 2-byte code points ***
122 check.template operator()<"{:*^3.1}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
123 check.template operator()<"{:*^3.1}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN
124
125 //*** 3.1-byte code points ***
126 check.template operator()<"{:*^3.1}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF
127 check.template operator()<"{:*^3.1}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER
128
129 // 2 column ranges
130 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
131 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u115f")); // HANGUL CHOSEONG FILLER
132
133 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
134 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
135
136 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2e80")); // CJK RADICAL REPEAT
137 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
138
139 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A
140 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA
141
142 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uac00")); // <Hangul Syllable, First>
143 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ud7a3")); // Hangul Syllable Hih
144
145 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
146 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
147
148 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
149 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
150
151 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
152 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
153
154 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK
155 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
156
157 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe0")); // FULLWIDTH CENT SIGN
158 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe6")); // FULLWIDTH WON SIGN
159
160 //*** 3.1-byte code points ***
161 check.template operator()<"{:*^3.1}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
162 check.template operator()<"{:*^3.1}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character
163
164 // 2 column ranges
165 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f300")); // CYCLONE
166 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
167 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
168 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f9ff")); // NAZAR AMULET
169 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
170 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0002fffd")); // Undefined Character
171 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
172 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0003fffd")); // Undefined Character
173 }
174
175 // The examples used in that paper.
176 template <class CharT>
test_P1868()177 static void test_P1868() {
178 // Fill
179 check.template operator()<"{:*^3}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
180 check.template operator()<"{:*^3}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
181 check.template operator()<"{:*^3}">(
182 SV("*\u0041\u0301*"),
183 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
184 check.template operator()<"{:*^3}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
185 check.template operator()<"{:*^3}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
186
187 check.template operator()<"{:*^3}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
188 check.template operator()<"{:*^3}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF }
189 check.template operator()<"{:*^3}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN }
190 check.template operator()<"{:*^4}">(SV("*\u3009*"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
191 check.template operator()<"{:*^4}">(SV("*\u754c*"), SV("\u754c")); // { CJK Unified Ideograph-754C }
192 check.template operator()<"{:*^4}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE }
193 check.template operator()<"{:*^4}">(
194 SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
195 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
196
197 // Truncate to 1 column: 1 column grapheme clusters are kept together.
198 check.template operator()<"{:*^3.1}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
199 check.template operator()<"{:*^3.1}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
200 check.template operator()<"{:*^3.1}">(
201 SV("*\u0041\u0301*"),
202 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
203 check.template operator()<"{:*^3.1}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
204 check.template operator()<"{:*^3.1}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
205
206 check.template operator()<"{:*^3.1}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
207 check.template operator()<"{:*^3.1}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF }
208 check.template operator()<"{:*^3.1}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN }
209 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
210 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u754c")); // { CJK Unified Ideograph-754C }
211 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f921")); // { UNICORN FACE }
212 check.template operator()<"{:*^3.1}">(
213 SV("***"),
214 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
215
216 // Truncate to 2 column: 2 column grapheme clusters are kept together.
217 check.template operator()<"{:*^3.2}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
218 check.template operator()<"{:*^3.2}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
219 check.template operator()<"{:*^3.2}">(
220 SV("*\u0041\u0301*"),
221 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
222 check.template operator()<"{:*^3.2}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
223 check.template operator()<"{:*^3.2}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
224
225 check.template operator()<"{:*^3.2}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
226 check.template operator()<"{:*^3.2}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF }
227 check.template operator()<"{:*^3.2}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN }
228 check.template operator()<"{:*^4.2}">(SV("*\u3009*"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
229 check.template operator()<"{:*^4.2}">(SV("*\u754c*"), SV("\u754c")); // { CJK Unified Ideograph-754C }
230 check.template operator()<"{:*^4.2}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE }
231 check.template operator()<"{:*^4.2}">(
232 SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
233 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
234 }
235
236 #ifdef _LIBCPP_VERSION
237 // Tests the libc++ specific behaviour for malformed UTF-sequences. The
238 // Standard doesn't specify how to handle this.
239 template <class CharT>
test_malformed_code_point()240 static void test_malformed_code_point() {
241 if constexpr (sizeof(CharT) == 1) {
242 // Malformed at end.
243 check.template operator()<"{:*^7}">(SV("*ZZZZ\x8f*"), SV("ZZZZ\x8f"));
244 check.template operator()<"{:*^7}">(SV("*ZZZZ\xcf*"), SV("ZZZZ\xcf"));
245 check.template operator()<"{:*^7}">(SV("*ZZZZ\xef*"), SV("ZZZZ\xef"));
246 check.template operator()<"{:*^7}">(SV("*ZZZZ\xff*"), SV("ZZZZ\xff"));
247
248 // Malformed in middle, no continuation
249 check.template operator()<"{:*^8}">(SV("*ZZZZ\x8fZ*"), SV("ZZZZ\x8fZ"));
250 check.template operator()<"{:*^8}">(SV("*ZZZZ\xcfZ*"), SV("ZZZZ\xcfZ"));
251 check.template operator()<"{:*^8}">(SV("*ZZZZ\xefZ*"), SV("ZZZZ\xefZ"));
252 check.template operator()<"{:*^8}">(SV("*ZZZZ\xffZ*"), SV("ZZZZ\xffZ"));
253
254 check.template operator()<"{:*^9}">(SV("*ZZZZ\x8fZZ*"), SV("ZZZZ\x8fZZ"));
255 check.template operator()<"{:*^9}">(SV("*ZZZZ\xcfZZ*"), SV("ZZZZ\xcfZZ"));
256 check.template operator()<"{:*^9}">(SV("*ZZZZ\xefZZ*"), SV("ZZZZ\xefZZ"));
257 check.template operator()<"{:*^9}">(SV("*ZZZZ\xffZZ*"), SV("ZZZZ\xffZZ"));
258
259 check.template operator()<"{:*^10}">(SV("*ZZZZ\x8fZZZ*"), SV("ZZZZ\x8fZZZ"));
260 check.template operator()<"{:*^10}">(SV("*ZZZZ\xcfZZZ*"), SV("ZZZZ\xcfZZZ"));
261 check.template operator()<"{:*^10}">(SV("*ZZZZ\xefZZZ*"), SV("ZZZZ\xefZZZ"));
262 check.template operator()<"{:*^10}">(SV("*ZZZZ\xffZZZ*"), SV("ZZZZ\xffZZZ"));
263
264 check.template operator()<"{:*^11}">(SV("*ZZZZ\x8fZZZZ*"), SV("ZZZZ\x8fZZZZ"));
265 check.template operator()<"{:*^11}">(SV("*ZZZZ\xcfZZZZ*"), SV("ZZZZ\xcfZZZZ"));
266 check.template operator()<"{:*^11}">(SV("*ZZZZ\xefZZZZ*"), SV("ZZZZ\xefZZZZ"));
267 check.template operator()<"{:*^11}">(SV("*ZZZZ\xffZZZZ*"), SV("ZZZZ\xffZZZZ"));
268
269 // Premature end.
270 check.template operator()<"{:*^8}">(SV("*ZZZZ\xef\xf5*"), SV("ZZZZ\xef\xf5"));
271 check.template operator()<"{:*^12}">(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("ZZZZ\xef\xf5ZZZZ"));
272 check.template operator()<"{:*^9}">(SV("*ZZZZ\xff\xf5\xf5*"), SV("ZZZZ\xff\xf5\xf5"));
273 check.template operator()<"{:*^13}">(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("ZZZZ\xff\xf5\xf5ZZZZ"));
274
275 } else if constexpr (sizeof(CharT) == 2) {
276 // TODO FMT Add these tests.
277 }
278 // UTF-32 doesn't combine characters, thus no corruption tests.
279 }
280 #endif
281
282 template <class CharT>
test()283 static void test() {
284 test_single_code_point_fill<CharT>();
285 test_single_code_point_truncate<CharT>();
286 test_P1868<CharT>();
287
288 #ifdef _LIBCPP_VERSION
289 test_malformed_code_point<CharT>();
290 #endif
291 }
292
main(int,char **)293 int main(int, char**) {
294 test<char>();
295
296 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
297 test<wchar_t>();
298 #endif
299
300 return 0;
301 }
302