1 //===----------------------------------------------------------------------===// 2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3 // See https://llvm.org/LICENSE.txt for license information. 4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 // 6 //===----------------------------------------------------------------------===// 7 8 // UNSUPPORTED: c++03, c++11, c++14, c++17 9 // UNSUPPORTED: libcpp-has-no-incomplete-format 10 11 // This version runs the test when the platform has Unicode support. 12 // UNSUPPORTED: libcpp-has-no-unicode 13 14 // <format> 15 16 // Tests the Unicode width support of the standard format specifiers. 17 // It tests [format.string.std]/8 - 11: 18 // - Properly determining the estimated with of a unicode string. 19 // - Properly truncating to the wanted maximum width. 20 21 // More specific extended grapheme cluster boundary rules are tested in 22 // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp 23 // this test is based on test data provided by the Unicode Consortium. 24 25 #include <format> 26 #include <cassert> 27 #include <vector> 28 29 #include "make_string.h" 30 #include "test_macros.h" 31 #include "string_literal.h" 32 33 #ifndef TEST_HAS_NO_LOCALIZATION 34 # include <iostream> 35 # include <type_traits> 36 #endif 37 38 #define SV(S) MAKE_STRING_VIEW(CharT, S) 39 40 auto check = []<string_literal fmt, class CharT, class... Args>( 41 std::basic_string_view<CharT> expected, const Args&... args) constexpr { 42 std::basic_string<CharT> out = std::format(fmt.template sv<CharT>(), args...); 43 #ifndef TEST_HAS_NO_LOCALIZATION 44 if constexpr (std::same_as<CharT, char>) 45 if (out != expected) 46 std::cerr << "\nFormat string " << fmt.template sv<char>() << "\nExpected output " << expected 47 << "\nActual output " << out << '\n'; 48 #endif 49 assert(out == expected); 50 }; 51 52 template <class CharT> 53 static void test_single_code_point_fill() { 54 //*** 1-byte code points *** 55 check.template operator()<"{:*^3}">(SV("* *"), SV(" ")); 56 check.template operator()<"{:*^3}">(SV("*~*"), SV("~")); 57 58 //*** 2-byte code points *** 59 check.template operator()<"{:*^3}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 60 check.template operator()<"{:*^3}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN 61 62 //*** 3-byte code points *** 63 check.template operator()<"{:*^3}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF 64 check.template operator()<"{:*^3}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER 65 66 // 2 column ranges 67 check.template operator()<"{:*^4}">(SV("*\u1100*"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 68 check.template operator()<"{:*^4}">(SV("*\u115f*"), SV("\u115f")); // HANGUL CHOSEONG FILLER 69 70 check.template operator()<"{:*^4}">(SV("*\u2329*"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 71 check.template operator()<"{:*^4}">(SV("*\u232a*"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 72 73 check.template operator()<"{:*^4}">(SV("*\u2e80*"), SV("\u2e80")); // CJK RADICAL REPEAT 74 check.template operator()<"{:*^4}">(SV("*\u303e*"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 75 76 check.template operator()<"{:*^4}">(SV("*\u3040*"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A 77 check.template operator()<"{:*^4}">(SV("*\ua4cf*"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA 78 79 check.template operator()<"{:*^4}">(SV("*\uac00*"), SV("\uac00")); // <Hangul Syllable, First> 80 check.template operator()<"{:*^4}">(SV("*\ud7a3*"), SV("\ud7a3")); // Hangul Syllable Hih 81 82 check.template operator()<"{:*^4}">(SV("*\uf900*"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 83 check.template operator()<"{:*^4}">(SV("*\ufaff*"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 84 85 check.template operator()<"{:*^4}">(SV("*\ufe10*"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 86 check.template 87 operator()<"{:*^4}">(SV("*\ufe19*"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 88 89 check.template operator()<"{:*^4}">(SV("*\ufe30*"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 90 check.template operator()<"{:*^4}">(SV("*\ufe6f*"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 91 92 check.template operator()<"{:*^4}">(SV("*\uff00*"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK 93 check.template operator()<"{:*^4}">(SV("*\uff60*"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 94 95 check.template operator()<"{:*^4}">(SV("*\uffe0*"), SV("\uffe0")); // FULLWIDTH CENT SIGN 96 check.template operator()<"{:*^4}">(SV("*\uffe6*"), SV("\uffe6")); // FULLWIDTH WON SIGN 97 98 //*** 4-byte code points *** 99 check.template operator()<"{:*^3}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 100 check.template operator()<"{:*^3}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character 101 102 // 2 column ranges 103 check.template operator()<"{:*^4}">(SV("*\U0001f300*"), SV("\U0001f300")); // CYCLONE 104 check.template operator()<"{:*^4}">(SV("*\U0001f64f*"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 105 check.template operator()<"{:*^4}">(SV("*\U0001f900*"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 106 check.template operator()<"{:*^4}">(SV("*\U0001f9ff*"), SV("\U0001f9ff")); // NAZAR AMULET 107 check.template operator()<"{:*^4}">(SV("*\U00020000*"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 108 check.template operator()<"{:*^4}">(SV("*\U0002fffd*"), SV("\U0002fffd")); // Undefined Character 109 check.template operator()<"{:*^4}">(SV("*\U00030000*"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 110 check.template operator()<"{:*^4}">(SV("*\U0003fffd*"), SV("\U0003fffd")); // Undefined Character 111 } 112 113 // One column output is unaffected. 114 // Two column output is removed, thus the result is only the fill character. 115 template <class CharT> 116 static void test_single_code_point_truncate() { 117 //*** 1-byte code points *** 118 check.template operator()<"{:*^3.1}">(SV("* *"), SV(" ")); 119 check.template operator()<"{:*^3.1}">(SV("*~*"), SV("~")); 120 121 //*** 2-byte code points *** 122 check.template operator()<"{:*^3.1}">(SV("*\u00a1*"), SV("\u00a1")); // INVERTED EXCLAMATION MARK 123 check.template operator()<"{:*^3.1}">(SV("*\u07ff*"), SV("\u07ff")); // NKO TAMAN SIGN 124 125 //*** 3.1-byte code points *** 126 check.template operator()<"{:*^3.1}">(SV("*\u0800*"), SV("\u0800")); // SAMARITAN LETTER ALAF 127 check.template operator()<"{:*^3.1}">(SV("*\ufffd*"), SV("\ufffd")); // REPLACEMENT CHARACTER 128 129 // 2 column ranges 130 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK 131 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u115f")); // HANGUL CHOSEONG FILLER 132 133 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET 134 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET 135 136 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u2e80")); // CJK RADICAL REPEAT 137 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR 138 139 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A 140 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA 141 142 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uac00")); // <Hangul Syllable, First> 143 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ud7a3")); // Hangul Syllable Hih 144 145 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900 146 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF 147 148 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA 149 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 150 151 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 152 check.template operator()<"{:*^3.1}">(SV("***"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM 153 154 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK 155 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS 156 157 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe0")); // FULLWIDTH CENT SIGN 158 check.template operator()<"{:*^3.1}">(SV("***"), SV("\uffe6")); // FULLWIDTH WON SIGN 159 160 //*** 3.1-byte code points *** 161 check.template operator()<"{:*^3.1}">(SV("*\U00010000*"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A 162 check.template operator()<"{:*^3.1}">(SV("*\U0010FFFF*"), SV("\U0010FFFF")); // Undefined Character 163 164 // 2 column ranges 165 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f300")); // CYCLONE 166 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS 167 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS 168 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f9ff")); // NAZAR AMULET 169 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00020000")); // <CJK Ideograph Extension B, First> 170 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0002fffd")); // Undefined Character 171 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U00030000")); // <CJK Ideograph Extension G, First> 172 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0003fffd")); // Undefined Character 173 } 174 175 // The examples used in that paper. 176 template <class CharT> 177 static void test_P1868() { 178 // Fill 179 check.template operator()<"{:*^3}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 180 check.template operator()<"{:*^3}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 181 check.template operator()<"{:*^3}">( 182 SV("*\u0041\u0301*"), 183 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 184 check.template operator()<"{:*^3}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 185 check.template operator()<"{:*^3}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 186 187 check.template operator()<"{:*^3}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 188 check.template operator()<"{:*^3}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF } 189 check.template operator()<"{:*^3}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN } 190 check.template operator()<"{:*^4}">(SV("*\u3009*"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 191 check.template operator()<"{:*^4}">(SV("*\u754c*"), SV("\u754c")); // { CJK Unified Ideograph-754C } 192 check.template operator()<"{:*^4}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE } 193 check.template operator()<"{:*^4}">( 194 SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 195 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 196 197 // Truncate to 1 column: 1 column grapheme clusters are kept together. 198 check.template operator()<"{:*^3.1}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 199 check.template operator()<"{:*^3.1}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 200 check.template operator()<"{:*^3.1}">( 201 SV("*\u0041\u0301*"), 202 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 203 check.template operator()<"{:*^3.1}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 204 check.template operator()<"{:*^3.1}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 205 206 check.template operator()<"{:*^3.1}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 207 check.template operator()<"{:*^3.1}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF } 208 check.template operator()<"{:*^3.1}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN } 209 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 210 check.template operator()<"{:*^3.1}">(SV("***"), SV("\u754c")); // { CJK Unified Ideograph-754C } 211 check.template operator()<"{:*^3.1}">(SV("***"), SV("\U0001f921")); // { UNICORN FACE } 212 check.template operator()<"{:*^3.1}">( 213 SV("***"), 214 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 215 216 // Truncate to 2 column: 2 column grapheme clusters are kept together. 217 check.template operator()<"{:*^3.2}">(SV("*\u0041*"), SV("\u0041")); // { LATIN CAPITAL LETTER A } 218 check.template operator()<"{:*^3.2}">(SV("*\u00c1*"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } 219 check.template operator()<"{:*^3.2}">( 220 SV("*\u0041\u0301*"), 221 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } 222 check.template operator()<"{:*^3.2}">(SV("*\u0132*"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ } 223 check.template operator()<"{:*^3.2}">(SV("*\u0394*"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA } 224 225 check.template operator()<"{:*^3.2}">(SV("*\u0429*"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } 226 check.template operator()<"{:*^3.2}">(SV("*\u05d0*"), SV("\u05d0")); // { HEBREW LETTER ALEF } 227 check.template operator()<"{:*^3.2}">(SV("*\u0634*"), SV("\u0634")); // { ARABIC LETTER SHEEN } 228 check.template operator()<"{:*^4.2}">(SV("*\u3009*"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } 229 check.template operator()<"{:*^4.2}">(SV("*\u754c*"), SV("\u754c")); // { CJK Unified Ideograph-754C } 230 check.template operator()<"{:*^4.2}">(SV("*\U0001f921*"), SV("\U0001f921")); // { UNICORN FACE } 231 check.template operator()<"{:*^4.2}">( 232 SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), 233 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } 234 } 235 236 #ifdef _LIBCPP_VERSION 237 // Tests the libc++ specific behaviour for malformed UTF-sequences. The 238 // Standard doesn't specify how to handle this. 239 template <class CharT> 240 static void test_malformed_code_point() { 241 if constexpr (sizeof(CharT) == 1) { 242 // Malformed at end. 243 check.template operator()<"{:*^7}">(SV("*ZZZZ\x8f*"), SV("ZZZZ\x8f")); 244 check.template operator()<"{:*^7}">(SV("*ZZZZ\xcf*"), SV("ZZZZ\xcf")); 245 check.template operator()<"{:*^7}">(SV("*ZZZZ\xef*"), SV("ZZZZ\xef")); 246 check.template operator()<"{:*^7}">(SV("*ZZZZ\xff*"), SV("ZZZZ\xff")); 247 248 // Malformed in middle, no continuation 249 check.template operator()<"{:*^8}">(SV("*ZZZZ\x8fZ*"), SV("ZZZZ\x8fZ")); 250 check.template operator()<"{:*^8}">(SV("*ZZZZ\xcfZ*"), SV("ZZZZ\xcfZ")); 251 check.template operator()<"{:*^8}">(SV("*ZZZZ\xefZ*"), SV("ZZZZ\xefZ")); 252 check.template operator()<"{:*^8}">(SV("*ZZZZ\xffZ*"), SV("ZZZZ\xffZ")); 253 254 check.template operator()<"{:*^9}">(SV("*ZZZZ\x8fZZ*"), SV("ZZZZ\x8fZZ")); 255 check.template operator()<"{:*^9}">(SV("*ZZZZ\xcfZZ*"), SV("ZZZZ\xcfZZ")); 256 check.template operator()<"{:*^9}">(SV("*ZZZZ\xefZZ*"), SV("ZZZZ\xefZZ")); 257 check.template operator()<"{:*^9}">(SV("*ZZZZ\xffZZ*"), SV("ZZZZ\xffZZ")); 258 259 check.template operator()<"{:*^10}">(SV("*ZZZZ\x8fZZZ*"), SV("ZZZZ\x8fZZZ")); 260 check.template operator()<"{:*^10}">(SV("*ZZZZ\xcfZZZ*"), SV("ZZZZ\xcfZZZ")); 261 check.template operator()<"{:*^10}">(SV("*ZZZZ\xefZZZ*"), SV("ZZZZ\xefZZZ")); 262 check.template operator()<"{:*^10}">(SV("*ZZZZ\xffZZZ*"), SV("ZZZZ\xffZZZ")); 263 264 check.template operator()<"{:*^11}">(SV("*ZZZZ\x8fZZZZ*"), SV("ZZZZ\x8fZZZZ")); 265 check.template operator()<"{:*^11}">(SV("*ZZZZ\xcfZZZZ*"), SV("ZZZZ\xcfZZZZ")); 266 check.template operator()<"{:*^11}">(SV("*ZZZZ\xefZZZZ*"), SV("ZZZZ\xefZZZZ")); 267 check.template operator()<"{:*^11}">(SV("*ZZZZ\xffZZZZ*"), SV("ZZZZ\xffZZZZ")); 268 269 // Premature end. 270 check.template operator()<"{:*^8}">(SV("*ZZZZ\xef\xf5*"), SV("ZZZZ\xef\xf5")); 271 check.template operator()<"{:*^12}">(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("ZZZZ\xef\xf5ZZZZ")); 272 check.template operator()<"{:*^9}">(SV("*ZZZZ\xff\xf5\xf5*"), SV("ZZZZ\xff\xf5\xf5")); 273 check.template operator()<"{:*^13}">(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("ZZZZ\xff\xf5\xf5ZZZZ")); 274 275 } else if constexpr (sizeof(CharT) == 2) { 276 // TODO FMT Add these tests. 277 } 278 // UTF-32 doesn't combine characters, thus no corruption tests. 279 } 280 #endif 281 282 template <class CharT> 283 static void test() { 284 test_single_code_point_fill<CharT>(); 285 test_single_code_point_truncate<CharT>(); 286 test_P1868<CharT>(); 287 288 #ifdef _LIBCPP_VERSION 289 test_malformed_code_point<CharT>(); 290 #endif 291 } 292 293 int main(int, char**) { 294 test<char>(); 295 296 #ifndef TEST_HAS_NO_WIDE_CHARACTERS 297 test<wchar_t>(); 298 #endif 299 300 return 0; 301 } 302