1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/find_if.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__config> 23 #include <__debug> 24 #include <__format/format_arg.h> 25 #include <__format/format_error.h> 26 #include <__format/format_parse_context.h> 27 #include <__format/format_string.h> 28 #include <__variant/monostate.h> 29 #include <bit> 30 #include <concepts> 31 #include <cstdint> 32 #include <type_traits> 33 34 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 35 # pragma GCC system_header 36 #endif 37 38 _LIBCPP_PUSH_MACROS 39 #include <__undef_macros> 40 41 _LIBCPP_BEGIN_NAMESPACE_STD 42 43 #if _LIBCPP_STD_VER > 17 44 45 namespace __format_spec { 46 47 template <class _CharT> 48 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT> 49 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 50 // This function is a wrapper to call the real parser. But it does the 51 // validation for the pre-conditions and post-conditions. 52 if (__begin == __end) 53 __throw_format_error("End of input while parsing format-spec arg-id"); 54 55 __format::__parse_number_result __r = 56 __format::__parse_arg_id(__begin, __end, __parse_ctx); 57 58 if (__r.__ptr == __end || *__r.__ptr != _CharT('}')) 59 __throw_format_error("Invalid arg-id"); 60 61 ++__r.__ptr; 62 return __r; 63 } 64 65 template <class _Context> 66 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t 67 __substitute_arg_id(basic_format_arg<_Context> __format_arg) { 68 return visit_format_arg( 69 [](auto __arg) -> uint32_t { 70 using _Type = decltype(__arg); 71 if constexpr (integral<_Type>) { 72 if constexpr (signed_integral<_Type>) { 73 if (__arg < 0) 74 __throw_format_error("A format-spec arg-id replacement shouldn't " 75 "have a negative value"); 76 } 77 78 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 79 if (static_cast<_CT>(__arg) > 80 static_cast<_CT>(__format::__number_max)) 81 __throw_format_error("A format-spec arg-id replacement exceeds " 82 "the maximum supported value"); 83 84 return __arg; 85 } else if constexpr (same_as<_Type, monostate>) 86 __throw_format_error("Argument index out of bounds"); 87 else 88 __throw_format_error("A format-spec arg-id replacement argument " 89 "isn't an integral type"); 90 }, 91 __format_arg); 92 } 93 94 /** Helper struct returned from @ref __get_string_alignment. */ 95 template <class _CharT> 96 struct _LIBCPP_TEMPLATE_VIS __string_alignment { 97 /** Points beyond the last character to write to the output. */ 98 const _CharT* __last; 99 /** 100 * The estimated number of columns in the output or 0. 101 * 102 * Only when the output needs to be aligned it's required to know the exact 103 * number of columns in the output. So if the formatted output has only a 104 * minimum width the exact size isn't important. It's only important to know 105 * the minimum has been reached. The minimum width is the width specified in 106 * the format-spec. 107 * 108 * For example in this code @code std::format("{:10}", MyString); @endcode 109 * the width estimation can stop once the algorithm has determined the output 110 * width is 10 columns. 111 * 112 * So if: 113 * * @ref __align == @c true the @ref __size is the estimated number of 114 * columns required. 115 * * @ref __align == @c false the @ref __size is the estimated number of 116 * columns required or 0 when the estimation algorithm stopped prematurely. 117 */ 118 ptrdiff_t __size; 119 /** 120 * Does the output need to be aligned. 121 * 122 * When alignment is needed the output algorithm needs to add the proper 123 * padding. Else the output algorithm just needs to copy the input up to 124 * @ref __last. 125 */ 126 bool __align; 127 }; 128 129 #ifndef _LIBCPP_HAS_NO_UNICODE 130 namespace __detail { 131 132 /** 133 * Unicode column width estimates. 134 * 135 * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 136 * Depending on format the relation between the number of code units stored and 137 * the number of output columns differs. The first relation is the number of 138 * code units forming a code point. (The text assumes the code units are 139 * unsigned.) 140 * - UTF-8 The number of code units is between one and four. The first 127 141 * Unicode code points match the ASCII character set. When the highest bit is 142 * set it means the code point has more than one code unit. 143 * - UTF-16: The number of code units is between 1 and 2. When the first 144 * code unit is in the range [0xd800,0xdfff) it means the code point uses two 145 * code units. 146 * - UTF-32: The number of code units is always one. 147 * 148 * The code point to the number of columns isn't well defined. The code uses the 149 * estimations defined in [format.string.std]/11. This list might change in the 150 * future. 151 * 152 * The algorithm of @ref __get_string_alignment uses two different scanners: 153 * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes 154 * 1 code unit is 1 column. This scanner stops when it can't be sure the 155 * assumption is valid: 156 * - UTF-8 when the code point is encoded in more than 1 code unit. 157 * - UTF-16 and UTF-32 when the first multi-column code point is encountered. 158 * (The code unit's value is lower than 0xd800 so the 2 code unit encoding 159 * is irrelevant for this scanner.) 160 * Due to these assumptions the scanner is faster than the full scanner. It 161 * can process all text only containing ASCII. For UTF-16/32 it can process 162 * most (all?) European languages. (Note the set it can process might be 163 * reduced in the future, due to updates in the scanning rules.) 164 * - The full scanner @ref __estimate_column_width. This scanner, if needed, 165 * converts multiple code units into one code point then converts the code 166 * point to a column width. 167 * 168 * See also: 169 * - [format.string.general]/11 170 * - https://en.wikipedia.org/wiki/UTF-8#Encoding 171 * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 172 */ 173 174 /** 175 * The first 2 column code point. 176 * 177 * This is the point where the fast UTF-16/32 scanner needs to stop processing. 178 */ 179 inline constexpr uint32_t __two_column_code_point = 0x1100; 180 181 /** Helper concept for an UTF-8 character type. */ 182 template <class _CharT> 183 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>; 184 185 /** Helper concept for an UTF-16 character type. */ 186 template <class _CharT> 187 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>; 188 189 /** Helper concept for an UTF-32 character type. */ 190 template <class _CharT> 191 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>; 192 193 /** Helper concept for an UTF-16 or UTF-32 character type. */ 194 template <class _CharT> 195 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>; 196 197 /** 198 * Converts a code point to the column width. 199 * 200 * The estimations are conforming to [format.string.general]/11 201 * 202 * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 203 * character. 204 */ 205 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept { 206 _LIBCPP_ASSERT(__c < 0x10000, 207 "Use __column_width_4 or __column_width for larger values"); 208 209 // clang-format off 210 return 1 + (__c >= 0x1100 && (__c <= 0x115f || 211 (__c >= 0x2329 && (__c <= 0x232a || 212 (__c >= 0x2e80 && (__c <= 0x303e || 213 (__c >= 0x3040 && (__c <= 0xa4cf || 214 (__c >= 0xac00 && (__c <= 0xd7a3 || 215 (__c >= 0xf900 && (__c <= 0xfaff || 216 (__c >= 0xfe10 && (__c <= 0xfe19 || 217 (__c >= 0xfe30 && (__c <= 0xfe6f || 218 (__c >= 0xff00 && (__c <= 0xff60 || 219 (__c >= 0xffe0 && (__c <= 0xffe6 220 )))))))))))))))))))); 221 // clang-format on 222 } 223 224 /** 225 * @overload 226 * 227 * This version expects a value greater than or equal to 0x1'0000, which is a 228 * 4-byte UTF-8 character. 229 */ 230 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept { 231 _LIBCPP_ASSERT(__c >= 0x10000, 232 "Use __column_width_3 or __column_width for smaller values"); 233 234 // clang-format off 235 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || 236 (__c >= 0x1'f900 && (__c <= 0x1'f9ff || 237 (__c >= 0x2'0000 && (__c <= 0x2'fffd || 238 (__c >= 0x3'0000 && (__c <= 0x3'fffd 239 )))))))); 240 // clang-format on 241 } 242 243 /** 244 * @overload 245 * 246 * The general case, accepting all values. 247 */ 248 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept { 249 if (__c < 0x10000) 250 return __column_width_3(__c); 251 252 return __column_width_4(__c); 253 } 254 255 /** 256 * Estimate the column width for the UTF-8 sequence using the fast algorithm. 257 */ 258 template <__utf8_character _CharT> 259 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 260 __estimate_column_width_fast(const _CharT* __first, 261 const _CharT* __last) noexcept { 262 return _VSTD::find_if(__first, __last, 263 [](unsigned char __c) { return __c & 0x80; }); 264 } 265 266 /** 267 * @overload 268 * 269 * The implementation for UTF-16/32. 270 */ 271 template <__utf16_or_32_character _CharT> 272 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 273 __estimate_column_width_fast(const _CharT* __first, 274 const _CharT* __last) noexcept { 275 return _VSTD::find_if(__first, __last, 276 [](uint32_t __c) { return __c >= 0x1100; }); 277 } 278 279 template <class _CharT> 280 struct _LIBCPP_TEMPLATE_VIS __column_width_result { 281 /** The number of output columns. */ 282 size_t __width; 283 /** 284 * The last parsed element. 285 * 286 * This limits the original output to fit in the wanted number of columns. 287 */ 288 const _CharT* __ptr; 289 }; 290 291 /** 292 * Small helper to determine the width of malformed Unicode. 293 * 294 * @note This function's only needed for UTF-8. During scanning UTF-8 there 295 * are multiple place where it can be detected that the Unicode is malformed. 296 * UTF-16 only requires 1 test and UTF-32 requires no testing. 297 */ 298 template <__utf8_character _CharT> 299 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 300 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last, 301 size_t __maximum, size_t __result) noexcept { 302 size_t __size = __last - __first; 303 size_t __n = _VSTD::min(__size, __maximum); 304 return {__result + __n, __first + __n}; 305 } 306 307 /** 308 * Determines the number of output columns needed to render the input. 309 * 310 * @note When the scanner encounters malformed Unicode it acts as-if every code 311 * unit at the end of the input is one output column. It's expected the output 312 * terminal will replace these malformed code units with a one column 313 * replacement characters. 314 * 315 * @param __first Points to the first element of the input range. 316 * @param __last Points beyond the last element of the input range. 317 * @param __maximum The maximum number of output columns. The returned number 318 * of estimated output columns will not exceed this value. 319 */ 320 template <__utf8_character _CharT> 321 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 322 __estimate_column_width(const _CharT* __first, const _CharT* __last, 323 size_t __maximum) noexcept { 324 size_t __result = 0; 325 326 while (__first != __last) { 327 // Based on the number of leading 1 bits the number of code units in the 328 // code point can be determined. See 329 // https://en.wikipedia.org/wiki/UTF-8#Encoding 330 switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) { 331 case 0: // 1-code unit encoding: all 1 column 332 ++__result; 333 ++__first; 334 break; 335 336 case 2: // 2-code unit encoding: all 1 column 337 // Malformed Unicode. 338 if (__last - __first < 2) [[unlikely]] 339 return __estimate_column_width_malformed(__first, __last, __maximum, 340 __result); 341 __first += 2; 342 ++__result; 343 break; 344 345 case 3: // 3-code unit encoding: either 1 or 2 columns 346 // Malformed Unicode. 347 if (__last - __first < 3) [[unlikely]] 348 return __estimate_column_width_malformed(__first, __last, __maximum, 349 __result); 350 { 351 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f; 352 __c <<= 6; 353 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 354 __c <<= 6; 355 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 356 __result += __column_width_3(__c); 357 if (__result > __maximum) 358 return {__result - 2, __first - 3}; 359 } 360 break; 361 case 4: // 4-code unit encoding: either 1 or 2 columns 362 // Malformed Unicode. 363 if (__last - __first < 4) [[unlikely]] 364 return __estimate_column_width_malformed(__first, __last, __maximum, 365 __result); 366 { 367 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07; 368 __c <<= 6; 369 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 370 __c <<= 6; 371 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 372 __c <<= 6; 373 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 374 __result += __column_width_4(__c); 375 if (__result > __maximum) 376 return {__result - 2, __first - 4}; 377 } 378 break; 379 default: 380 // Malformed Unicode. 381 return __estimate_column_width_malformed(__first, __last, __maximum, 382 __result); 383 } 384 385 if (__result >= __maximum) 386 return {__result, __first}; 387 } 388 return {__result, __first}; 389 } 390 391 template <__utf16_character _CharT> 392 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 393 __estimate_column_width(const _CharT* __first, const _CharT* __last, 394 size_t __maximum) noexcept { 395 size_t __result = 0; 396 397 while (__first != __last) { 398 uint32_t __c = *__first; 399 // Is the code unit part of a surrogate pair? See 400 // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 401 if (__c >= 0xd800 && __c <= 0xDfff) { 402 // Malformed Unicode. 403 if (__last - __first < 2) [[unlikely]] 404 return {__result + 1, __first + 1}; 405 406 __c -= 0xd800; 407 __c <<= 10; 408 __c += (*(__first + 1) - 0xdc00); 409 __c += 0x10000; 410 411 __result += __column_width_4(__c); 412 if (__result > __maximum) 413 return {__result - 2, __first}; 414 __first += 2; 415 } else { 416 __result += __column_width_3(__c); 417 if (__result > __maximum) 418 return {__result - 2, __first}; 419 ++__first; 420 } 421 422 if (__result >= __maximum) 423 return {__result, __first}; 424 } 425 426 return {__result, __first}; 427 } 428 429 template <__utf32_character _CharT> 430 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 431 __estimate_column_width(const _CharT* __first, const _CharT* __last, 432 size_t __maximum) noexcept { 433 size_t __result = 0; 434 435 while (__first != __last) { 436 uint32_t __c = *__first; 437 __result += __column_width(__c); 438 439 if (__result > __maximum) 440 return {__result - 2, __first}; 441 442 ++__first; 443 if (__result >= __maximum) 444 return {__result, __first}; 445 } 446 447 return {__result, __first}; 448 } 449 450 } // namespace __detail 451 452 template <class _CharT> 453 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> 454 __get_string_alignment(const _CharT* __first, const _CharT* __last, 455 ptrdiff_t __width, ptrdiff_t __precision) noexcept { 456 _LIBCPP_ASSERT(__width != 0 || __precision != -1, 457 "The function has no effect and shouldn't be used"); 458 459 // TODO FMT There might be more optimizations possible: 460 // If __precision == __format::__number_max and the encoding is: 461 // * UTF-8 : 4 * (__last - __first) >= __width 462 // * UTF-16 : 2 * (__last - __first) >= __width 463 // * UTF-32 : (__last - __first) >= __width 464 // In these cases it's certain the output is at least the requested width. 465 // It's unknown how often this happens in practice. For now the improvement 466 // isn't implemented. 467 468 /* 469 * First assume there are no special Unicode code units in the input. 470 * - Apply the precision (this may reduce the size of the input). When 471 * __precison == -1 this step is omitted. 472 * - Scan for special code units in the input. 473 * If our assumption was correct the __pos will be at the end of the input. 474 */ 475 const ptrdiff_t __length = __last - __first; 476 const _CharT* __limit = 477 __first + 478 (__precision == -1 ? __length : _VSTD::min(__length, __precision)); 479 ptrdiff_t __size = __limit - __first; 480 const _CharT* __pos = 481 __detail::__estimate_column_width_fast(__first, __limit); 482 483 if (__pos == __limit) 484 return {__limit, __size, __size < __width}; 485 486 /* 487 * Our assumption was wrong, there are special Unicode code units. 488 * The range [__first, __pos) contains a set of code units with the 489 * following property: 490 * Every _CharT in the range will be rendered in 1 column. 491 * 492 * If there's no maximum width and the parsed size already exceeds the 493 * minimum required width. The real size isn't important. So bail out. 494 */ 495 if (__precision == -1 && (__pos - __first) >= __width) 496 return {__last, 0, false}; 497 498 /* If there's a __precision, truncate the output to that width. */ 499 ptrdiff_t __prefix = __pos - __first; 500 if (__precision != -1) { 501 _LIBCPP_ASSERT(__precision > __prefix, "Logic error."); 502 auto __lengh_info = __detail::__estimate_column_width( 503 __pos, __last, __precision - __prefix); 504 __size = __lengh_info.__width + __prefix; 505 return {__lengh_info.__ptr, __size, __size < __width}; 506 } 507 508 /* Else use __width to determine the number of required padding characters. */ 509 _LIBCPP_ASSERT(__width > __prefix, "Logic error."); 510 /* 511 * The column width is always one or two columns. For the precision the wanted 512 * column width is the maximum, for the width it's the minimum. Using the 513 * width estimation with its truncating behavior will result in the wrong 514 * result in the following case: 515 * - The last code unit processed requires two columns and exceeds the 516 * maximum column width. 517 * By increasing the __maximum by one avoids this issue. (It means it may 518 * pass one code point more than required to determine the proper result; 519 * that however isn't a problem for the algorithm.) 520 */ 521 size_t __maximum = 1 + __width - __prefix; 522 auto __lengh_info = 523 __detail::__estimate_column_width(__pos, __last, __maximum); 524 if (__lengh_info.__ptr != __last) { 525 // Consumed the width number of code units. The exact size of the string 526 // is unknown. We only know we don't need to align the output. 527 _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >= 528 __width, 529 "Logic error"); 530 return {__last, 0, false}; 531 } 532 533 __size = __lengh_info.__width + __prefix; 534 return {__last, __size, __size < __width}; 535 } 536 #else // _LIBCPP_HAS_NO_UNICODE 537 template <class _CharT> 538 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> 539 __get_string_alignment(const _CharT* __first, const _CharT* __last, 540 ptrdiff_t __width, ptrdiff_t __precision) noexcept { 541 const ptrdiff_t __length = __last - __first; 542 const _CharT* __limit = 543 __first + 544 (__precision == -1 ? __length : _VSTD::min(__length, __precision)); 545 ptrdiff_t __size = __limit - __first; 546 return {__limit, __size, __size < __width}; 547 } 548 #endif // _LIBCPP_HAS_NO_UNICODE 549 550 /// These fields are a filter for which elements to parse. 551 /// 552 /// They default to false so when a new field is added it needs to be opted in 553 /// explicitly. 554 struct __fields { 555 uint8_t __sign_ : 1 {false}; 556 uint8_t __alternate_form_ : 1 {false}; 557 uint8_t __zero_padding_ : 1 {false}; 558 uint8_t __precision_ : 1 {false}; 559 uint8_t __locale_specific_form_ : 1 {false}; 560 uint8_t __type_ : 1 {false}; 561 }; 562 563 // By not placing this constant in the formatter class it's not duplicated for 564 // char and wchar_t. 565 inline constexpr __fields __fields_integral{ 566 .__sign_ = true, 567 .__alternate_form_ = true, 568 .__zero_padding_ = true, 569 .__locale_specific_form_ = true, 570 .__type_ = true}; 571 inline constexpr __fields __fields_floating_point{ 572 .__sign_ = true, 573 .__alternate_form_ = true, 574 .__zero_padding_ = true, 575 .__precision_ = true, 576 .__locale_specific_form_ = true, 577 .__type_ = true}; 578 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; 579 inline constexpr __fields __fields_pointer{.__type_ = true}; 580 581 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { 582 /// No alignment is set in the format string. 583 __default, 584 __left, 585 __center, 586 __right, 587 __zero_padding 588 }; 589 590 enum class _LIBCPP_ENUM_VIS __sign : uint8_t { 591 /// No sign is set in the format string. 592 /// 593 /// The sign isn't allowed for certain format-types. By using this value 594 /// it's possible to detect whether or not the user explicitly set the sign 595 /// flag. For formatting purposes it behaves the same as \ref __minus. 596 __default, 597 __minus, 598 __plus, 599 __space 600 }; 601 602 enum class _LIBCPP_ENUM_VIS __type : uint8_t { 603 __default, 604 __string, 605 __binary_lower_case, 606 __binary_upper_case, 607 __octal, 608 __decimal, 609 __hexadecimal_lower_case, 610 __hexadecimal_upper_case, 611 __pointer, 612 __char, 613 __hexfloat_lower_case, 614 __hexfloat_upper_case, 615 __scientific_lower_case, 616 __scientific_upper_case, 617 __fixed_lower_case, 618 __fixed_upper_case, 619 __general_lower_case, 620 __general_upper_case 621 }; 622 623 struct __std { 624 __alignment __alignment_ : 3; 625 __sign __sign_ : 2; 626 bool __alternate_form_ : 1; 627 bool __locale_specific_form_ : 1; 628 __type __type_; 629 }; 630 631 struct __chrono { 632 __alignment __alignment_ : 3; 633 bool __weekday_name_ : 1; 634 bool __month_name_ : 1; 635 }; 636 637 /// Contains the parsed formatting specifications. 638 /// 639 /// This contains information for both the std-format-spec and the 640 /// chrono-format-spec. This results in some unused members for both 641 /// specifications. However these unused members don't increase the size 642 /// of the structure. 643 /// 644 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 645 /// kept stable. 646 template <class _CharT> 647 struct __parsed_specifications { 648 union { 649 // The field __alignment_ is the first element in __std_ and __chrono_. 650 // This allows the code to always inspect this value regards which member 651 // of the union is the active member [class.union.general]/2. 652 // 653 // This is needed since the generic output routines handle the alignment of 654 // the output. 655 __alignment __alignment_ : 3; 656 __std __std_; 657 __chrono __chrono_; 658 }; 659 660 /// The requested width. 661 /// 662 /// When the format-spec used an arg-id for this field it has already been 663 /// replaced with the value of that arg-id. 664 int32_t __width_; 665 666 /// The requested precision. 667 /// 668 /// When the format-spec used an arg-id for this field it has already been 669 /// replaced with the value of that arg-id. 670 int32_t __precision_; 671 672 _CharT __fill_; 673 674 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 675 676 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 677 }; 678 679 // Validate the struct is small and cheap to copy since the struct is passed by 680 // value in formatting functions. 681 static_assert(sizeof(__parsed_specifications<char>) == 16); 682 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 683 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 684 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 685 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 686 # endif 687 688 /// The parser for the std-format-spec. 689 /// 690 /// Note this class is a member of std::formatter specializations. It's 691 /// expected developers will create their own formatter specializations that 692 /// inherit from the std::formatter specializations. This means this class 693 /// must be ABI stable. To aid the stability the unused bits in the class are 694 /// set to zero. That way they can be repurposed if a future revision of the 695 /// Standards adds new fields to std-format-spec. 696 template <class _CharT> 697 class _LIBCPP_TEMPLATE_VIS __parser { 698 public: 699 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields) 700 -> decltype(__parse_ctx.begin()) { 701 702 const _CharT* __begin = __parse_ctx.begin(); 703 const _CharT* __end = __parse_ctx.end(); 704 if (__begin == __end) 705 return __begin; 706 707 if (__parse_fill_align(__begin, __end) && __begin == __end) 708 return __begin; 709 710 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end) 711 return __begin; 712 713 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end) 714 return __begin; 715 716 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end) 717 return __begin; 718 719 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end) 720 return __begin; 721 722 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end) 723 return __begin; 724 725 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end) 726 return __begin; 727 728 if (__fields.__type_) { 729 __parse_type(__begin); 730 731 // When __type_ is false the calling parser is expected to do additional 732 // parsing. In that case that parser should do the end of format string 733 // validation. 734 if (__begin != __end && *__begin != _CharT('}')) 735 __throw_format_error("The format-spec should consume the input or end with a '}'"); 736 } 737 738 return __begin; 739 } 740 741 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 742 _LIBCPP_HIDE_FROM_ABI 743 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 744 return __parsed_specifications<_CharT>{ 745 .__std_ = 746 __std{.__alignment_ = __alignment_, 747 .__sign_ = __sign_, 748 .__alternate_form_ = __alternate_form_, 749 .__locale_specific_form_ = __locale_specific_form_, 750 .__type_ = __type_}, 751 .__width_{__get_width(__ctx)}, 752 .__precision_{__get_precision(__ctx)}, 753 .__fill_{__fill_}}; 754 } 755 756 __alignment __alignment_ : 3 {__alignment::__default}; 757 __sign __sign_ : 2 {__sign::__default}; 758 bool __alternate_form_ : 1 {false}; 759 bool __locale_specific_form_ : 1 {false}; 760 bool __reserved_0_ : 1 {false}; 761 __type __type_{__type::__default}; 762 763 // These two flags are used for formatting chrono. Since the struct has 764 // padding space left it's added to this structure. 765 bool __weekday_name_ : 1 {false}; 766 bool __month_name_ : 1 {false}; 767 768 uint8_t __reserved_1_ : 6 {0}; 769 uint8_t __reserved_2_ : 6 {0}; 770 // These two flags are only used internally and not part of the 771 // __parsed_specifications. Therefore put them at the end. 772 bool __width_as_arg_ : 1 {false}; 773 bool __precision_as_arg_ : 1 {false}; 774 775 /// The requested width, either the value or the arg-id. 776 int32_t __width_{0}; 777 778 /// The requested precision, either the value or the arg-id. 779 int32_t __precision_{-1}; 780 781 // LWG 3576 will probably change this to always accept a Unicode code point 782 // To avoid changing the size with that change align the field so when it 783 // becomes 32-bit its alignment will remain the same. That also means the 784 // size will remain the same. (D2572 addresses the solution for LWG 3576.) 785 _CharT __fill_{_CharT(' ')}; 786 787 private: 788 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 789 switch (__c) { 790 case _CharT('<'): 791 __alignment_ = __alignment::__left; 792 return true; 793 794 case _CharT('^'): 795 __alignment_ = __alignment::__center; 796 return true; 797 798 case _CharT('>'): 799 __alignment_ = __alignment::__right; 800 return true; 801 } 802 return false; 803 } 804 805 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) { 806 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " 807 "undefined behavior by evaluating data not in the input"); 808 if (__begin + 1 != __end) { 809 if (__parse_alignment(*(__begin + 1))) { 810 if (*__begin == _CharT('{') || *__begin == _CharT('}')) 811 __throw_format_error("The format-spec fill field contains an invalid character"); 812 813 __fill_ = *__begin; 814 __begin += 2; 815 return true; 816 } 817 } 818 819 if (!__parse_alignment(*__begin)) 820 return false; 821 822 ++__begin; 823 return true; 824 } 825 826 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) { 827 switch (*__begin) { 828 case _CharT('-'): 829 __sign_ = __sign::__minus; 830 break; 831 case _CharT('+'): 832 __sign_ = __sign::__plus; 833 break; 834 case _CharT(' '): 835 __sign_ = __sign::__space; 836 break; 837 default: 838 return false; 839 } 840 ++__begin; 841 return true; 842 } 843 844 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) { 845 if (*__begin != _CharT('#')) 846 return false; 847 848 __alternate_form_ = true; 849 ++__begin; 850 return true; 851 } 852 853 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) { 854 if (*__begin != _CharT('0')) 855 return false; 856 857 if (__alignment_ == __alignment::__default) 858 __alignment_ = __alignment::__zero_padding; 859 ++__begin; 860 return true; 861 } 862 863 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) { 864 if (*__begin == _CharT('0')) 865 __throw_format_error("A format-spec width field shouldn't have a leading zero"); 866 867 if (*__begin == _CharT('{')) { 868 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 869 __width_as_arg_ = true; 870 __width_ = __r.__value; 871 __begin = __r.__ptr; 872 return true; 873 } 874 875 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 876 return false; 877 878 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 879 __width_ = __r.__value; 880 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " 881 "due to validations in this function"); 882 __begin = __r.__ptr; 883 return true; 884 } 885 886 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end, 887 auto& __parse_ctx) { 888 if (*__begin != _CharT('.')) 889 return false; 890 891 ++__begin; 892 if (__begin == __end) 893 __throw_format_error("End of input while parsing format-spec precision"); 894 895 if (*__begin == _CharT('{')) { 896 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 897 __precision_as_arg_ = true; 898 __precision_ = __arg_id.__value; 899 __begin = __arg_id.__ptr; 900 return true; 901 } 902 903 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 904 __throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); 905 906 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 907 __precision_ = __r.__value; 908 __precision_as_arg_ = false; 909 __begin = __r.__ptr; 910 return true; 911 } 912 913 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) { 914 if (*__begin != _CharT('L')) 915 return false; 916 917 __locale_specific_form_ = true; 918 ++__begin; 919 return true; 920 } 921 922 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) { 923 // Determines the type. It does not validate whether the selected type is 924 // valid. Most formatters have optional fields that are only allowed for 925 // certain types. These parsers need to do validation after the type has 926 // been parsed. So its easier to implement the validation for all types in 927 // the specific parse function. 928 switch (*__begin) { 929 case 'A': 930 __type_ = __type::__hexfloat_upper_case; 931 break; 932 case 'B': 933 __type_ = __type::__binary_upper_case; 934 break; 935 case 'E': 936 __type_ = __type::__scientific_upper_case; 937 break; 938 case 'F': 939 __type_ = __type::__fixed_upper_case; 940 break; 941 case 'G': 942 __type_ = __type::__general_upper_case; 943 break; 944 case 'X': 945 __type_ = __type::__hexadecimal_upper_case; 946 break; 947 case 'a': 948 __type_ = __type::__hexfloat_lower_case; 949 break; 950 case 'b': 951 __type_ = __type::__binary_lower_case; 952 break; 953 case 'c': 954 __type_ = __type::__char; 955 break; 956 case 'd': 957 __type_ = __type::__decimal; 958 break; 959 case 'e': 960 __type_ = __type::__scientific_lower_case; 961 break; 962 case 'f': 963 __type_ = __type::__fixed_lower_case; 964 break; 965 case 'g': 966 __type_ = __type::__general_lower_case; 967 break; 968 case 'o': 969 __type_ = __type::__octal; 970 break; 971 case 'p': 972 __type_ = __type::__pointer; 973 break; 974 case 's': 975 __type_ = __type::__string; 976 break; 977 case 'x': 978 __type_ = __type::__hexadecimal_lower_case; 979 break; 980 default: 981 return; 982 } 983 ++__begin; 984 } 985 986 _LIBCPP_HIDE_FROM_ABI 987 int32_t __get_width(auto& __ctx) const { 988 if (!__width_as_arg_) 989 return __width_; 990 991 int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 992 if (__result == 0) 993 __throw_format_error("A format-spec width field replacement should have a positive value"); 994 return __result; 995 } 996 997 _LIBCPP_HIDE_FROM_ABI 998 int32_t __get_precision(auto& __ctx) const { 999 if (!__precision_as_arg_) 1000 return __precision_; 1001 1002 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 1003 } 1004 }; 1005 1006 // Validates whether the reserved bitfields don't change the size. 1007 static_assert(sizeof(__parser<char>) == 16); 1008 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 1009 static_assert(sizeof(__parser<wchar_t>) == 16); 1010 # endif 1011 1012 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 1013 switch (__type) { 1014 case __format_spec::__type::__default: 1015 case __format_spec::__type::__string: 1016 break; 1017 1018 default: 1019 std::__throw_format_error("The format-spec type has a type not supported for a string argument"); 1020 } 1021 } 1022 1023 template <class _CharT> 1024 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) { 1025 if (__parser.__sign_ != __sign::__default) 1026 std::__throw_format_error("A sign field isn't allowed in this format-spec"); 1027 1028 if (__parser.__alternate_form_) 1029 std::__throw_format_error("An alternate form field isn't allowed in this format-spec"); 1030 1031 if (__parser.__alignment_ == __alignment::__zero_padding) 1032 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec"); 1033 1034 if (__parser.__alignment_ == __alignment::__default) 1035 __parser.__alignment_ = __alignment::__left; 1036 } 1037 1038 template <class _CharT> 1039 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) { 1040 __format_spec::__process_display_type_bool_string(__parser); 1041 } 1042 1043 template <class _CharT> 1044 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) { 1045 switch (__parser.__type_) { 1046 case __format_spec::__type::__default: 1047 case __format_spec::__type::__string: 1048 __format_spec::__process_display_type_bool_string(__parser); 1049 break; 1050 1051 case __format_spec::__type::__binary_lower_case: 1052 case __format_spec::__type::__binary_upper_case: 1053 case __format_spec::__type::__octal: 1054 case __format_spec::__type::__decimal: 1055 case __format_spec::__type::__hexadecimal_lower_case: 1056 case __format_spec::__type::__hexadecimal_upper_case: 1057 break; 1058 1059 default: 1060 std::__throw_format_error("The format-spec type has a type not supported for a bool argument"); 1061 } 1062 } 1063 1064 template <class _CharT> 1065 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) { 1066 switch (__parser.__type_) { 1067 case __format_spec::__type::__default: 1068 case __format_spec::__type::__char: 1069 __format_spec::__process_display_type_char(__parser); 1070 break; 1071 1072 case __format_spec::__type::__binary_lower_case: 1073 case __format_spec::__type::__binary_upper_case: 1074 case __format_spec::__type::__octal: 1075 case __format_spec::__type::__decimal: 1076 case __format_spec::__type::__hexadecimal_lower_case: 1077 case __format_spec::__type::__hexadecimal_upper_case: 1078 break; 1079 1080 default: 1081 std::__throw_format_error("The format-spec type has a type not supported for a char argument"); 1082 } 1083 } 1084 1085 template <class _CharT> 1086 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) { 1087 switch (__parser.__type_) { 1088 case __format_spec::__type::__default: 1089 case __format_spec::__type::__binary_lower_case: 1090 case __format_spec::__type::__binary_upper_case: 1091 case __format_spec::__type::__octal: 1092 case __format_spec::__type::__decimal: 1093 case __format_spec::__type::__hexadecimal_lower_case: 1094 case __format_spec::__type::__hexadecimal_upper_case: 1095 break; 1096 1097 case __format_spec::__type::__char: 1098 __format_spec::__process_display_type_char(__parser); 1099 break; 1100 1101 default: 1102 std::__throw_format_error("The format-spec type has a type not supported for an integer argument"); 1103 } 1104 } 1105 1106 template <class _CharT> 1107 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) { 1108 switch (__parser.__type_) { 1109 case __format_spec::__type::__default: 1110 // When no precision specified then it keeps default since that 1111 // formatting differs from the other types. 1112 if (__parser.__precision_as_arg_ || __parser.__precision_ != -1) 1113 __parser.__type_ = __format_spec::__type::__general_lower_case; 1114 break; 1115 case __format_spec::__type::__hexfloat_lower_case: 1116 case __format_spec::__type::__hexfloat_upper_case: 1117 // Precision specific behavior will be handled later. 1118 break; 1119 case __format_spec::__type::__scientific_lower_case: 1120 case __format_spec::__type::__scientific_upper_case: 1121 case __format_spec::__type::__fixed_lower_case: 1122 case __format_spec::__type::__fixed_upper_case: 1123 case __format_spec::__type::__general_lower_case: 1124 case __format_spec::__type::__general_upper_case: 1125 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) 1126 // Set the default precision for the call to to_chars. 1127 __parser.__precision_ = 6; 1128 break; 1129 1130 default: 1131 std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument"); 1132 } 1133 } 1134 1135 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) { 1136 switch (__type) { 1137 case __format_spec::__type::__default: 1138 case __format_spec::__type::__pointer: 1139 break; 1140 1141 default: 1142 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument"); 1143 } 1144 } 1145 1146 } // namespace __format_spec 1147 1148 #endif //_LIBCPP_STD_VER > 17 1149 1150 _LIBCPP_END_NAMESPACE_STD 1151 1152 _LIBCPP_POP_MACROS 1153 1154 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 1155