1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/find_if.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__config> 23 #include <__debug> 24 #include <__format/format_arg.h> 25 #include <__format/format_error.h> 26 #include <__format/format_parse_context.h> 27 #include <__format/format_string.h> 28 #include <__variant/monostate.h> 29 #include <bit> 30 #include <concepts> 31 #include <cstdint> 32 #include <type_traits> 33 34 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 35 # pragma GCC system_header 36 #endif 37 38 _LIBCPP_PUSH_MACROS 39 #include <__undef_macros> 40 41 _LIBCPP_BEGIN_NAMESPACE_STD 42 43 #if _LIBCPP_STD_VER > 17 44 45 namespace __format_spec { 46 47 /** 48 * Contains the flags for the std-format-spec. 49 * 50 * Some format-options can only be used for specific C++ types and may depend on 51 * the selected format-type. 52 * * The C++type filtering can be done using the proper policies for 53 * @ref __parser_std. 54 * * The format-type filtering needs to be done post parsing in the parser 55 * derived from @ref __parser_std. 56 */ 57 _LIBCPP_PACKED_BYTE_FOR_AIX 58 class _LIBCPP_TYPE_VIS _Flags { 59 public: 60 enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t { 61 /** 62 * No alignment is set in the format string. 63 * 64 * Zero-padding is ignored when an alignment is selected. 65 * The default alignment depends on the selected format-type. 66 */ 67 __default, 68 __left, 69 __center, 70 __right 71 }; 72 enum class _LIBCPP_ENUM_VIS _Sign : uint8_t { 73 /** 74 * No sign is set in the format string. 75 * 76 * The sign isn't allowed for certain format-types. By using this value 77 * it's possible to detect whether or not the user explicitly set the sign 78 * flag. For formatting purposes it behaves the same as @ref __minus. 79 */ 80 __default, 81 __minus, 82 __plus, 83 __space 84 }; 85 86 _Alignment __alignment : 2 {_Alignment::__default}; 87 _Sign __sign : 2 {_Sign::__default}; 88 uint8_t __alternate_form : 1 {false}; 89 uint8_t __zero_padding : 1 {false}; 90 uint8_t __locale_specific_form : 1 {false}; 91 92 enum class _LIBCPP_ENUM_VIS _Type : uint8_t { 93 __default, 94 __string, 95 __binary_lower_case, 96 __binary_upper_case, 97 __octal, 98 __decimal, 99 __hexadecimal_lower_case, 100 __hexadecimal_upper_case, 101 __pointer, 102 __char, 103 __float_hexadecimal_lower_case, 104 __float_hexadecimal_upper_case, 105 __scientific_lower_case, 106 __scientific_upper_case, 107 __fixed_lower_case, 108 __fixed_upper_case, 109 __general_lower_case, 110 __general_upper_case 111 }; 112 113 _Type __type{_Type::__default}; 114 }; 115 _LIBCPP_PACKED_BYTE_FOR_AIX_END 116 117 namespace __detail { 118 template <class _CharT> 119 _LIBCPP_HIDE_FROM_ABI constexpr bool 120 __parse_alignment(_CharT __c, _Flags& __flags) noexcept { 121 switch (__c) { 122 case _CharT('<'): 123 __flags.__alignment = _Flags::_Alignment::__left; 124 return true; 125 126 case _CharT('^'): 127 __flags.__alignment = _Flags::_Alignment::__center; 128 return true; 129 130 case _CharT('>'): 131 __flags.__alignment = _Flags::_Alignment::__right; 132 return true; 133 } 134 return false; 135 } 136 } // namespace __detail 137 138 template <class _CharT> 139 class _LIBCPP_TEMPLATE_VIS __parser_fill_align { 140 public: 141 // TODO FMT The standard doesn't specify this character is a Unicode 142 // character. Validate what fmt and MSVC have implemented. 143 _CharT __fill{_CharT(' ')}; 144 145 protected: 146 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 147 __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) { 148 _LIBCPP_ASSERT(__begin != __end, 149 "When called with an empty input the function will cause " 150 "undefined behavior by evaluating data not in the input"); 151 if (__begin + 1 != __end) { 152 if (__detail::__parse_alignment(*(__begin + 1), __flags)) { 153 if (*__begin == _CharT('{') || *__begin == _CharT('}')) 154 __throw_format_error( 155 "The format-spec fill field contains an invalid character"); 156 __fill = *__begin; 157 return __begin + 2; 158 } 159 } 160 161 if (__detail::__parse_alignment(*__begin, __flags)) 162 return __begin + 1; 163 164 return __begin; 165 } 166 }; 167 168 template <class _CharT> 169 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 170 __parse_sign(const _CharT* __begin, _Flags& __flags) noexcept { 171 switch (*__begin) { 172 case _CharT('-'): 173 __flags.__sign = _Flags::_Sign::__minus; 174 break; 175 case _CharT('+'): 176 __flags.__sign = _Flags::_Sign::__plus; 177 break; 178 case _CharT(' '): 179 __flags.__sign = _Flags::_Sign::__space; 180 break; 181 default: 182 return __begin; 183 } 184 return __begin + 1; 185 } 186 187 template <class _CharT> 188 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 189 __parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept { 190 if (*__begin == _CharT('#')) { 191 __flags.__alternate_form = true; 192 ++__begin; 193 } 194 195 return __begin; 196 } 197 198 template <class _CharT> 199 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 200 __parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept { 201 if (*__begin == _CharT('0')) { 202 __flags.__zero_padding = true; 203 ++__begin; 204 } 205 206 return __begin; 207 } 208 209 template <class _CharT> 210 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT> 211 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 212 // This function is a wrapper to call the real parser. But it does the 213 // validation for the pre-conditions and post-conditions. 214 if (__begin == __end) 215 __throw_format_error("End of input while parsing format-spec arg-id"); 216 217 __format::__parse_number_result __r = 218 __format::__parse_arg_id(__begin, __end, __parse_ctx); 219 220 if (__r.__ptr == __end || *__r.__ptr != _CharT('}')) 221 __throw_format_error("Invalid arg-id"); 222 223 ++__r.__ptr; 224 return __r; 225 } 226 227 template <class _Context> 228 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t 229 __substitute_arg_id(basic_format_arg<_Context> _Arg) { 230 return visit_format_arg( 231 [](auto __arg) -> uint32_t { 232 using _Type = decltype(__arg); 233 if constexpr (integral<_Type>) { 234 if constexpr (signed_integral<_Type>) { 235 if (__arg < 0) 236 __throw_format_error("A format-spec arg-id replacement shouldn't " 237 "have a negative value"); 238 } 239 240 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 241 if (static_cast<_CT>(__arg) > 242 static_cast<_CT>(__format::__number_max)) 243 __throw_format_error("A format-spec arg-id replacement exceeds " 244 "the maximum supported value"); 245 246 return __arg; 247 } else if constexpr (same_as<_Type, monostate>) 248 __throw_format_error("Argument index out of bounds"); 249 else 250 __throw_format_error("A format-spec arg-id replacement argument " 251 "isn't an integral type"); 252 }, 253 _Arg); 254 } 255 256 class _LIBCPP_TYPE_VIS __parser_width { 257 public: 258 /** Contains a width or an arg-id. */ 259 uint32_t __width : 31 {0}; 260 /** Determines whether the value stored is a width or an arg-id. */ 261 uint32_t __width_as_arg : 1 {0}; 262 263 /** 264 * Does the supplied width field contain an arg-id? 265 * 266 * If @c true the formatter needs to call @ref __substitute_width_arg_id. 267 */ 268 constexpr bool __width_needs_substitution() const noexcept { return __width_as_arg; } 269 270 protected: 271 /** 272 * Does the supplied std-format-spec contain a width field? 273 * 274 * When the field isn't present there's no padding required. This can be used 275 * to optimize the formatting. 276 */ 277 constexpr bool __has_width_field() const noexcept { return __width_as_arg || __width; } 278 279 template <class _CharT> 280 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 281 __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 282 if (*__begin == _CharT('0')) 283 __throw_format_error( 284 "A format-spec width field shouldn't have a leading zero"); 285 286 if (*__begin == _CharT('{')) { 287 __format::__parse_number_result __r = 288 __parse_arg_id(++__begin, __end, __parse_ctx); 289 __width = __r.__value; 290 __width_as_arg = 1; 291 return __r.__ptr; 292 } 293 294 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 295 return __begin; 296 297 __format::__parse_number_result __r = 298 __format::__parse_number(__begin, __end); 299 __width = __r.__value; 300 _LIBCPP_ASSERT(__width != 0, 301 "A zero value isn't allowed and should be impossible, " 302 "due to validations in this function"); 303 return __r.__ptr; 304 } 305 306 _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_width_arg_id(auto __arg) { 307 _LIBCPP_ASSERT(__width_as_arg == 1, 308 "Substitute width called when no substitution is required"); 309 310 // The clearing of the flag isn't required but looks better when debugging 311 // the code. 312 __width_as_arg = 0; 313 __width = __substitute_arg_id(__arg); 314 if (__width == 0) 315 __throw_format_error( 316 "A format-spec width field replacement should have a positive value"); 317 } 318 }; 319 320 class _LIBCPP_TYPE_VIS __parser_precision { 321 public: 322 /** Contains a precision or an arg-id. */ 323 uint32_t __precision : 31 {__format::__number_max}; 324 /** 325 * Determines whether the value stored is a precision or an arg-id. 326 * 327 * @note Since @ref __precision == @ref __format::__number_max is a valid 328 * value, the default value contains an arg-id of INT32_MAX. (This number of 329 * arguments isn't supported by compilers.) This is used to detect whether 330 * the std-format-spec contains a precision field. 331 */ 332 uint32_t __precision_as_arg : 1 {1}; 333 334 /** 335 * Does the supplied precision field contain an arg-id? 336 * 337 * If @c true the formatter needs to call @ref __substitute_precision_arg_id. 338 */ 339 constexpr bool __precision_needs_substitution() const noexcept { 340 return __precision_as_arg && __precision != __format::__number_max; 341 } 342 343 protected: 344 /** 345 * Does the supplied std-format-spec contain a precision field? 346 * 347 * When the field isn't present there's no truncating required. This can be 348 * used to optimize the formatting. 349 */ 350 constexpr bool __has_precision_field() const noexcept { 351 352 return __precision_as_arg == 0 || // Contains a value? 353 __precision != __format::__number_max; // The arg-id is valid? 354 } 355 356 template <class _CharT> 357 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 358 __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 359 if (*__begin != _CharT('.')) 360 return __begin; 361 362 ++__begin; 363 if (__begin == __end) 364 __throw_format_error("End of input while parsing format-spec precision"); 365 366 if (*__begin == _CharT('{')) { 367 __format::__parse_number_result __arg_id = 368 __parse_arg_id(++__begin, __end, __parse_ctx); 369 _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max, 370 "Unsupported number of arguments, since this number of " 371 "arguments is used a special value"); 372 __precision = __arg_id.__value; 373 return __arg_id.__ptr; 374 } 375 376 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 377 __throw_format_error( 378 "The format-spec precision field doesn't contain a value or arg-id"); 379 380 __format::__parse_number_result __r = 381 __format::__parse_number(__begin, __end); 382 __precision = __r.__value; 383 __precision_as_arg = 0; 384 return __r.__ptr; 385 } 386 387 _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_precision_arg_id( 388 auto __arg) { 389 _LIBCPP_ASSERT( 390 __precision_as_arg == 1 && __precision != __format::__number_max, 391 "Substitute precision called when no substitution is required"); 392 393 // The clearing of the flag isn't required but looks better when debugging 394 // the code. 395 __precision_as_arg = 0; 396 __precision = __substitute_arg_id(__arg); 397 } 398 }; 399 400 template <class _CharT> 401 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 402 __parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept { 403 if (*__begin == _CharT('L')) { 404 __flags.__locale_specific_form = true; 405 ++__begin; 406 } 407 408 return __begin; 409 } 410 411 template <class _CharT> 412 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 413 __parse_type(const _CharT* __begin, _Flags& __flags) { 414 415 // Determines the type. It does not validate whether the selected type is 416 // valid. Most formatters have optional fields that are only allowed for 417 // certain types. These parsers need to do validation after the type has 418 // been parsed. So its easier to implement the validation for all types in 419 // the specific parse function. 420 switch (*__begin) { 421 case 'A': 422 __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case; 423 break; 424 case 'B': 425 __flags.__type = _Flags::_Type::__binary_upper_case; 426 break; 427 case 'E': 428 __flags.__type = _Flags::_Type::__scientific_upper_case; 429 break; 430 case 'F': 431 __flags.__type = _Flags::_Type::__fixed_upper_case; 432 break; 433 case 'G': 434 __flags.__type = _Flags::_Type::__general_upper_case; 435 break; 436 case 'X': 437 __flags.__type = _Flags::_Type::__hexadecimal_upper_case; 438 break; 439 case 'a': 440 __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case; 441 break; 442 case 'b': 443 __flags.__type = _Flags::_Type::__binary_lower_case; 444 break; 445 case 'c': 446 __flags.__type = _Flags::_Type::__char; 447 break; 448 case 'd': 449 __flags.__type = _Flags::_Type::__decimal; 450 break; 451 case 'e': 452 __flags.__type = _Flags::_Type::__scientific_lower_case; 453 break; 454 case 'f': 455 __flags.__type = _Flags::_Type::__fixed_lower_case; 456 break; 457 case 'g': 458 __flags.__type = _Flags::_Type::__general_lower_case; 459 break; 460 case 'o': 461 __flags.__type = _Flags::_Type::__octal; 462 break; 463 case 'p': 464 __flags.__type = _Flags::_Type::__pointer; 465 break; 466 case 's': 467 __flags.__type = _Flags::_Type::__string; 468 break; 469 case 'x': 470 __flags.__type = _Flags::_Type::__hexadecimal_lower_case; 471 break; 472 default: 473 return __begin; 474 } 475 return ++__begin; 476 } 477 478 /** 479 * Process the parsed alignment and zero-padding state of arithmetic types. 480 * 481 * [format.string.std]/13 482 * If the 0 character and an align option both appear, the 0 character is 483 * ignored. 484 * 485 * For the formatter a @ref __default alignment means zero-padding. 486 */ 487 _LIBCPP_HIDE_FROM_ABI constexpr void __process_arithmetic_alignment(_Flags& __flags) { 488 __flags.__zero_padding &= __flags.__alignment == _Flags::_Alignment::__default; 489 if (!__flags.__zero_padding && __flags.__alignment == _Flags::_Alignment::__default) 490 __flags.__alignment = _Flags::_Alignment::__right; 491 } 492 493 /** 494 * The parser for the std-format-spec. 495 * 496 * [format.string.std]/1 specifies the std-format-spec: 497 * fill-and-align sign # 0 width precision L type 498 * 499 * All these fields are optional. Whether these fields can be used depend on: 500 * - The type supplied to the format string. 501 * E.g. A string never uses the sign field so the field may not be set. 502 * This constrain is validated by the parsers in this file. 503 * - The supplied value for the optional type field. 504 * E.g. A int formatted as decimal uses the sign field. 505 * When formatted as a char the sign field may no longer be set. 506 * This constrain isn't validated by the parsers in this file. 507 * 508 * The base classes are ordered to minimize the amount of padding. 509 * 510 * This implements the parser for the string types. 511 */ 512 template <class _CharT> 513 class _LIBCPP_TEMPLATE_VIS __parser_string 514 : public __parser_width, // provides __width(|as_arg) 515 public __parser_precision, // provides __precision(|as_arg) 516 public __parser_fill_align<_CharT>, // provides __fill and uses __flags 517 public _Flags // provides __flags 518 { 519 public: 520 using char_type = _CharT; 521 522 _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() { 523 this->__alignment = _Flags::_Alignment::__left; 524 } 525 526 /** 527 * The low-level std-format-spec parse function. 528 * 529 * @pre __begin points at the beginning of the std-format-spec. This means 530 * directly after the ':'. 531 * @pre The std-format-spec parses the entire input, or the first unmatched 532 * character is a '}'. 533 * 534 * @returns The iterator pointing at the last parsed character. 535 */ 536 _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) 537 -> decltype(__parse_ctx.begin()) { 538 auto __it = __parse(__parse_ctx); 539 __process_display_type(); 540 return __it; 541 } 542 543 private: 544 /** 545 * Parses the std-format-spec. 546 * 547 * @throws __throw_format_error When @a __parse_ctx contains an ill-formed 548 * std-format-spec. 549 * 550 * @returns An iterator to the end of input or point at the closing '}'. 551 */ 552 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) 553 -> decltype(__parse_ctx.begin()) { 554 555 auto __begin = __parse_ctx.begin(); 556 auto __end = __parse_ctx.end(); 557 if (__begin == __end) 558 return __begin; 559 560 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, 561 static_cast<_Flags&>(*this)); 562 if (__begin == __end) 563 return __begin; 564 565 __begin = __parser_width::__parse(__begin, __end, __parse_ctx); 566 if (__begin == __end) 567 return __begin; 568 569 __begin = __parser_precision::__parse(__begin, __end, __parse_ctx); 570 if (__begin == __end) 571 return __begin; 572 573 __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); 574 575 if (__begin != __end && *__begin != _CharT('}')) 576 __throw_format_error( 577 "The format-spec should consume the input or end with a '}'"); 578 579 return __begin; 580 } 581 582 /** Processes the parsed std-format-spec based on the parsed display type. */ 583 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { 584 switch (this->__type) { 585 case _Flags::_Type::__default: 586 case _Flags::_Type::__string: 587 break; 588 589 default: 590 __throw_format_error("The format-spec type has a type not supported for " 591 "a string argument"); 592 } 593 } 594 }; 595 596 /** 597 * The parser for the std-format-spec. 598 * 599 * This implements the parser for the integral types. This includes the 600 * character type and boolean type. 601 * 602 * See @ref __parser_string. 603 */ 604 template <class _CharT> 605 class _LIBCPP_TEMPLATE_VIS __parser_integral 606 : public __parser_width, // provides __width(|as_arg) 607 public __parser_fill_align<_CharT>, // provides __fill and uses __flags 608 public _Flags // provides __flags 609 { 610 public: 611 using char_type = _CharT; 612 613 protected: 614 /** 615 * The low-level std-format-spec parse function. 616 * 617 * @pre __begin points at the beginning of the std-format-spec. This means 618 * directly after the ':'. 619 * @pre The std-format-spec parses the entire input, or the first unmatched 620 * character is a '}'. 621 * 622 * @returns The iterator pointing at the last parsed character. 623 */ 624 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) 625 -> decltype(__parse_ctx.begin()) { 626 auto __begin = __parse_ctx.begin(); 627 auto __end = __parse_ctx.end(); 628 if (__begin == __end) 629 return __begin; 630 631 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, 632 static_cast<_Flags&>(*this)); 633 if (__begin == __end) 634 return __begin; 635 636 __begin = __parse_sign(__begin, static_cast<_Flags&>(*this)); 637 if (__begin == __end) 638 return __begin; 639 640 __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this)); 641 if (__begin == __end) 642 return __begin; 643 644 __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this)); 645 if (__begin == __end) 646 return __begin; 647 648 __begin = __parser_width::__parse(__begin, __end, __parse_ctx); 649 if (__begin == __end) 650 return __begin; 651 652 __begin = 653 __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this)); 654 if (__begin == __end) 655 return __begin; 656 657 __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); 658 659 if (__begin != __end && *__begin != _CharT('}')) 660 __throw_format_error( 661 "The format-spec should consume the input or end with a '}'"); 662 663 return __begin; 664 } 665 666 /** Handles the post-parsing updates for the integer types. */ 667 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept { 668 __process_arithmetic_alignment(static_cast<_Flags&>(*this)); 669 } 670 671 /** 672 * Handles the post-parsing updates for the character types. 673 * 674 * Sets the alignment and validates the format flags set for a character type. 675 * 676 * At the moment the validation for a character and a Boolean behave the 677 * same, but this may change in the future. 678 * Specifically at the moment the locale-specific form is allowed for the 679 * char output type, but it has no effect on the output. 680 */ 681 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); } 682 683 /** 684 * Handles the post-parsing updates for the Boolean types. 685 * 686 * Sets the alignment and validates the format flags set for a Boolean type. 687 */ 688 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() { 689 if (this->__sign != _Flags::_Sign::__default) 690 __throw_format_error("A sign field isn't allowed in this format-spec"); 691 692 if (this->__alternate_form) 693 __throw_format_error( 694 "An alternate form field isn't allowed in this format-spec"); 695 696 if (this->__zero_padding) 697 __throw_format_error( 698 "A zero-padding field isn't allowed in this format-spec"); 699 700 if (this->__alignment == _Flags::_Alignment::__default) 701 this->__alignment = _Flags::_Alignment::__left; 702 } 703 }; 704 705 /** 706 * The parser for the std-format-spec. 707 * 708 * This implements the parser for the floating-point types. 709 * 710 * See @ref __parser_string. 711 */ 712 template <class _CharT> 713 class _LIBCPP_TEMPLATE_VIS __parser_floating_point 714 : public __parser_width, // provides __width(|as_arg) 715 public __parser_precision, // provides __precision(|as_arg) 716 public __parser_fill_align<_CharT>, // provides __fill and uses __flags 717 public _Flags // provides __flags 718 { 719 public: 720 using char_type = _CharT; 721 722 /** 723 * The low-level std-format-spec parse function. 724 * 725 * @pre __begin points at the beginning of the std-format-spec. This means 726 * directly after the ':'. 727 * @pre The std-format-spec parses the entire input, or the first unmatched 728 * character is a '}'. 729 * 730 * @returns The iterator pointing at the last parsed character. 731 */ 732 _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) 733 -> decltype(__parse_ctx.begin()) { 734 auto __it = __parse(__parse_ctx); 735 __process_arithmetic_alignment(static_cast<_Flags&>(*this)); 736 __process_display_type(); 737 return __it; 738 } 739 protected: 740 /** 741 * The low-level std-format-spec parse function. 742 * 743 * @pre __begin points at the beginning of the std-format-spec. This means 744 * directly after the ':'. 745 * @pre The std-format-spec parses the entire input, or the first unmatched 746 * character is a '}'. 747 * 748 * @returns The iterator pointing at the last parsed character. 749 */ 750 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) 751 -> decltype(__parse_ctx.begin()) { 752 auto __begin = __parse_ctx.begin(); 753 auto __end = __parse_ctx.end(); 754 if (__begin == __end) 755 return __begin; 756 757 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, 758 static_cast<_Flags&>(*this)); 759 if (__begin == __end) 760 return __begin; 761 762 __begin = __parse_sign(__begin, static_cast<_Flags&>(*this)); 763 if (__begin == __end) 764 return __begin; 765 766 __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this)); 767 if (__begin == __end) 768 return __begin; 769 770 __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this)); 771 if (__begin == __end) 772 return __begin; 773 774 __begin = __parser_width::__parse(__begin, __end, __parse_ctx); 775 if (__begin == __end) 776 return __begin; 777 778 __begin = __parser_precision::__parse(__begin, __end, __parse_ctx); 779 if (__begin == __end) 780 return __begin; 781 782 __begin = 783 __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this)); 784 if (__begin == __end) 785 return __begin; 786 787 __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); 788 789 if (__begin != __end && *__begin != _CharT('}')) 790 __throw_format_error( 791 "The format-spec should consume the input or end with a '}'"); 792 793 return __begin; 794 } 795 796 /** Processes the parsed std-format-spec based on the parsed display type. */ 797 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { 798 switch (this->__type) { 799 case _Flags::_Type::__default: 800 // When no precision specified then it keeps default since that 801 // formatting differs from the other types. 802 if (this->__has_precision_field()) 803 this->__type = _Flags::_Type::__general_lower_case; 804 break; 805 case _Flags::_Type::__float_hexadecimal_lower_case: 806 case _Flags::_Type::__float_hexadecimal_upper_case: 807 // Precision specific behavior will be handled later. 808 break; 809 case _Flags::_Type::__scientific_lower_case: 810 case _Flags::_Type::__scientific_upper_case: 811 case _Flags::_Type::__fixed_lower_case: 812 case _Flags::_Type::__fixed_upper_case: 813 case _Flags::_Type::__general_lower_case: 814 case _Flags::_Type::__general_upper_case: 815 if (!this->__has_precision_field()) { 816 // Set the default precision for the call to to_chars. 817 this->__precision = 6; 818 this->__precision_as_arg = false; 819 } 820 break; 821 822 default: 823 __throw_format_error("The format-spec type has a type not supported for " 824 "a floating-point argument"); 825 } 826 } 827 }; 828 829 /** 830 * The parser for the std-format-spec. 831 * 832 * This implements the parser for the pointer types. 833 * 834 * See @ref __parser_string. 835 */ 836 template <class _CharT> 837 class _LIBCPP_TEMPLATE_VIS __parser_pointer : public __parser_width, // provides __width(|as_arg) 838 public __parser_fill_align<_CharT>, // provides __fill and uses __flags 839 public _Flags // provides __flags 840 { 841 public: 842 using char_type = _CharT; 843 844 _LIBCPP_HIDE_FROM_ABI constexpr __parser_pointer() { 845 // Implements LWG3612 Inconsistent pointer alignment in std::format. 846 // The issue's current status is "Tentatively Ready" and libc++ status is 847 // still experimental. 848 // 849 // TODO FMT Validate this with the final resolution of LWG3612. 850 this->__alignment = _Flags::_Alignment::__right; 851 } 852 853 /** 854 * The low-level std-format-spec parse function. 855 * 856 * @pre __begin points at the beginning of the std-format-spec. This means 857 * directly after the ':'. 858 * @pre The std-format-spec parses the entire input, or the first unmatched 859 * character is a '}'. 860 * 861 * @returns The iterator pointing at the last parsed character. 862 */ 863 _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) { 864 auto __it = __parse(__parse_ctx); 865 __process_display_type(); 866 return __it; 867 } 868 869 protected: 870 /** 871 * The low-level std-format-spec parse function. 872 * 873 * @pre __begin points at the beginning of the std-format-spec. This means 874 * directly after the ':'. 875 * @pre The std-format-spec parses the entire input, or the first unmatched 876 * character is a '}'. 877 * 878 * @returns The iterator pointing at the last parsed character. 879 */ 880 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) { 881 auto __begin = __parse_ctx.begin(); 882 auto __end = __parse_ctx.end(); 883 if (__begin == __end) 884 return __begin; 885 886 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, static_cast<_Flags&>(*this)); 887 if (__begin == __end) 888 return __begin; 889 890 // An integer presentation type isn't defined in the Standard. 891 // Since a pointer is formatted as an integer it can be argued it's an 892 // integer presentation type. However there are two LWG-issues asserting it 893 // isn't an integer presentation type: 894 // - LWG3612 Inconsistent pointer alignment in std::format 895 // - LWG3644 std::format does not define "integer presentation type" 896 // 897 // There's a paper to make additional clarifications on the status of 898 // formatting pointers and proposes additional fields to be valid. That 899 // paper hasn't been reviewed by the Committee yet. 900 // - P2510 Formatting pointers 901 // 902 // The current implementation assumes formatting pointers isn't covered by 903 // "integer presentation type". 904 // TODO FMT Apply the LWG-issues/papers after approval/rejection by the Committee. 905 906 __begin = __parser_width::__parse(__begin, __end, __parse_ctx); 907 if (__begin == __end) 908 return __begin; 909 910 __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); 911 912 if (__begin != __end && *__begin != _CharT('}')) 913 __throw_format_error("The format-spec should consume the input or end with a '}'"); 914 915 return __begin; 916 } 917 918 /** Processes the parsed std-format-spec based on the parsed display type. */ 919 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { 920 switch (this->__type) { 921 case _Flags::_Type::__default: 922 this->__type = _Flags::_Type::__pointer; 923 break; 924 case _Flags::_Type::__pointer: 925 break; 926 default: 927 __throw_format_error("The format-spec type has a type not supported for a pointer argument"); 928 } 929 } 930 }; 931 932 /** Helper struct returned from @ref __get_string_alignment. */ 933 template <class _CharT> 934 struct _LIBCPP_TEMPLATE_VIS __string_alignment { 935 /** Points beyond the last character to write to the output. */ 936 const _CharT* __last; 937 /** 938 * The estimated number of columns in the output or 0. 939 * 940 * Only when the output needs to be aligned it's required to know the exact 941 * number of columns in the output. So if the formatted output has only a 942 * minimum width the exact size isn't important. It's only important to know 943 * the minimum has been reached. The minimum width is the width specified in 944 * the format-spec. 945 * 946 * For example in this code @code std::format("{:10}", MyString); @endcode 947 * the width estimation can stop once the algorithm has determined the output 948 * width is 10 columns. 949 * 950 * So if: 951 * * @ref __align == @c true the @ref __size is the estimated number of 952 * columns required. 953 * * @ref __align == @c false the @ref __size is the estimated number of 954 * columns required or 0 when the estimation algorithm stopped prematurely. 955 */ 956 ptrdiff_t __size; 957 /** 958 * Does the output need to be aligned. 959 * 960 * When alignment is needed the output algorithm needs to add the proper 961 * padding. Else the output algorithm just needs to copy the input up to 962 * @ref __last. 963 */ 964 bool __align; 965 }; 966 967 #ifndef _LIBCPP_HAS_NO_UNICODE 968 namespace __detail { 969 970 /** 971 * Unicode column width estimates. 972 * 973 * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 974 * Depending on format the relation between the number of code units stored and 975 * the number of output columns differs. The first relation is the number of 976 * code units forming a code point. (The text assumes the code units are 977 * unsigned.) 978 * - UTF-8 The number of code units is between one and four. The first 127 979 * Unicode code points match the ASCII character set. When the highest bit is 980 * set it means the code point has more than one code unit. 981 * - UTF-16: The number of code units is between 1 and 2. When the first 982 * code unit is in the range [0xd800,0xdfff) it means the code point uses two 983 * code units. 984 * - UTF-32: The number of code units is always one. 985 * 986 * The code point to the number of columns isn't well defined. The code uses the 987 * estimations defined in [format.string.std]/11. This list might change in the 988 * future. 989 * 990 * The algorithm of @ref __get_string_alignment uses two different scanners: 991 * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes 992 * 1 code unit is 1 column. This scanner stops when it can't be sure the 993 * assumption is valid: 994 * - UTF-8 when the code point is encoded in more than 1 code unit. 995 * - UTF-16 and UTF-32 when the first multi-column code point is encountered. 996 * (The code unit's value is lower than 0xd800 so the 2 code unit encoding 997 * is irrelevant for this scanner.) 998 * Due to these assumptions the scanner is faster than the full scanner. It 999 * can process all text only containing ASCII. For UTF-16/32 it can process 1000 * most (all?) European languages. (Note the set it can process might be 1001 * reduced in the future, due to updates in the scanning rules.) 1002 * - The full scanner @ref __estimate_column_width. This scanner, if needed, 1003 * converts multiple code units into one code point then converts the code 1004 * point to a column width. 1005 * 1006 * See also: 1007 * - [format.string.general]/11 1008 * - https://en.wikipedia.org/wiki/UTF-8#Encoding 1009 * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 1010 */ 1011 1012 /** 1013 * The first 2 column code point. 1014 * 1015 * This is the point where the fast UTF-16/32 scanner needs to stop processing. 1016 */ 1017 inline constexpr uint32_t __two_column_code_point = 0x1100; 1018 1019 /** Helper concept for an UTF-8 character type. */ 1020 template <class _CharT> 1021 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>; 1022 1023 /** Helper concept for an UTF-16 character type. */ 1024 template <class _CharT> 1025 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>; 1026 1027 /** Helper concept for an UTF-32 character type. */ 1028 template <class _CharT> 1029 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>; 1030 1031 /** Helper concept for an UTF-16 or UTF-32 character type. */ 1032 template <class _CharT> 1033 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>; 1034 1035 /** 1036 * Converts a code point to the column width. 1037 * 1038 * The estimations are conforming to [format.string.general]/11 1039 * 1040 * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 1041 * character. 1042 */ 1043 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept { 1044 _LIBCPP_ASSERT(__c < 0x10000, 1045 "Use __column_width_4 or __column_width for larger values"); 1046 1047 // clang-format off 1048 return 1 + (__c >= 0x1100 && (__c <= 0x115f || 1049 (__c >= 0x2329 && (__c <= 0x232a || 1050 (__c >= 0x2e80 && (__c <= 0x303e || 1051 (__c >= 0x3040 && (__c <= 0xa4cf || 1052 (__c >= 0xac00 && (__c <= 0xd7a3 || 1053 (__c >= 0xf900 && (__c <= 0xfaff || 1054 (__c >= 0xfe10 && (__c <= 0xfe19 || 1055 (__c >= 0xfe30 && (__c <= 0xfe6f || 1056 (__c >= 0xff00 && (__c <= 0xff60 || 1057 (__c >= 0xffe0 && (__c <= 0xffe6 1058 )))))))))))))))))))); 1059 // clang-format on 1060 } 1061 1062 /** 1063 * @overload 1064 * 1065 * This version expects a value greater than or equal to 0x1'0000, which is a 1066 * 4-byte UTF-8 character. 1067 */ 1068 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept { 1069 _LIBCPP_ASSERT(__c >= 0x10000, 1070 "Use __column_width_3 or __column_width for smaller values"); 1071 1072 // clang-format off 1073 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || 1074 (__c >= 0x1'f900 && (__c <= 0x1'f9ff || 1075 (__c >= 0x2'0000 && (__c <= 0x2'fffd || 1076 (__c >= 0x3'0000 && (__c <= 0x3'fffd 1077 )))))))); 1078 // clang-format on 1079 } 1080 1081 /** 1082 * @overload 1083 * 1084 * The general case, accepting all values. 1085 */ 1086 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept { 1087 if (__c < 0x10000) 1088 return __column_width_3(__c); 1089 1090 return __column_width_4(__c); 1091 } 1092 1093 /** 1094 * Estimate the column width for the UTF-8 sequence using the fast algorithm. 1095 */ 1096 template <__utf8_character _CharT> 1097 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 1098 __estimate_column_width_fast(const _CharT* __first, 1099 const _CharT* __last) noexcept { 1100 return _VSTD::find_if(__first, __last, 1101 [](unsigned char __c) { return __c & 0x80; }); 1102 } 1103 1104 /** 1105 * @overload 1106 * 1107 * The implementation for UTF-16/32. 1108 */ 1109 template <__utf16_or_32_character _CharT> 1110 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* 1111 __estimate_column_width_fast(const _CharT* __first, 1112 const _CharT* __last) noexcept { 1113 return _VSTD::find_if(__first, __last, 1114 [](uint32_t __c) { return __c >= 0x1100; }); 1115 } 1116 1117 template <class _CharT> 1118 struct _LIBCPP_TEMPLATE_VIS __column_width_result { 1119 /** The number of output columns. */ 1120 size_t __width; 1121 /** 1122 * The last parsed element. 1123 * 1124 * This limits the original output to fit in the wanted number of columns. 1125 */ 1126 const _CharT* __ptr; 1127 }; 1128 1129 /** 1130 * Small helper to determine the width of malformed Unicode. 1131 * 1132 * @note This function's only needed for UTF-8. During scanning UTF-8 there 1133 * are multiple place where it can be detected that the Unicode is malformed. 1134 * UTF-16 only requires 1 test and UTF-32 requires no testing. 1135 */ 1136 template <__utf8_character _CharT> 1137 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 1138 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last, 1139 size_t __maximum, size_t __result) noexcept { 1140 size_t __size = __last - __first; 1141 size_t __n = _VSTD::min(__size, __maximum); 1142 return {__result + __n, __first + __n}; 1143 } 1144 1145 /** 1146 * Determines the number of output columns needed to render the input. 1147 * 1148 * @note When the scanner encounters malformed Unicode it acts as-if every code 1149 * unit at the end of the input is one output column. It's expected the output 1150 * terminal will replace these malformed code units with a one column 1151 * replacement characters. 1152 * 1153 * @param __first Points to the first element of the input range. 1154 * @param __last Points beyond the last element of the input range. 1155 * @param __maximum The maximum number of output columns. The returned number 1156 * of estimated output columns will not exceed this value. 1157 */ 1158 template <__utf8_character _CharT> 1159 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 1160 __estimate_column_width(const _CharT* __first, const _CharT* __last, 1161 size_t __maximum) noexcept { 1162 size_t __result = 0; 1163 1164 while (__first != __last) { 1165 // Based on the number of leading 1 bits the number of code units in the 1166 // code point can be determined. See 1167 // https://en.wikipedia.org/wiki/UTF-8#Encoding 1168 switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) { 1169 case 0: // 1-code unit encoding: all 1 column 1170 ++__result; 1171 ++__first; 1172 break; 1173 1174 case 2: // 2-code unit encoding: all 1 column 1175 // Malformed Unicode. 1176 if (__last - __first < 2) [[unlikely]] 1177 return __estimate_column_width_malformed(__first, __last, __maximum, 1178 __result); 1179 __first += 2; 1180 ++__result; 1181 break; 1182 1183 case 3: // 3-code unit encoding: either 1 or 2 columns 1184 // Malformed Unicode. 1185 if (__last - __first < 3) [[unlikely]] 1186 return __estimate_column_width_malformed(__first, __last, __maximum, 1187 __result); 1188 { 1189 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f; 1190 __c <<= 6; 1191 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 1192 __c <<= 6; 1193 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 1194 __result += __column_width_3(__c); 1195 if (__result > __maximum) 1196 return {__result - 2, __first - 3}; 1197 } 1198 break; 1199 case 4: // 4-code unit encoding: either 1 or 2 columns 1200 // Malformed Unicode. 1201 if (__last - __first < 4) [[unlikely]] 1202 return __estimate_column_width_malformed(__first, __last, __maximum, 1203 __result); 1204 { 1205 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07; 1206 __c <<= 6; 1207 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 1208 __c <<= 6; 1209 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 1210 __c <<= 6; 1211 __c |= static_cast<unsigned char>(*__first++) & 0x3f; 1212 __result += __column_width_4(__c); 1213 if (__result > __maximum) 1214 return {__result - 2, __first - 4}; 1215 } 1216 break; 1217 default: 1218 // Malformed Unicode. 1219 return __estimate_column_width_malformed(__first, __last, __maximum, 1220 __result); 1221 } 1222 1223 if (__result >= __maximum) 1224 return {__result, __first}; 1225 } 1226 return {__result, __first}; 1227 } 1228 1229 template <__utf16_character _CharT> 1230 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 1231 __estimate_column_width(const _CharT* __first, const _CharT* __last, 1232 size_t __maximum) noexcept { 1233 size_t __result = 0; 1234 1235 while (__first != __last) { 1236 uint32_t __c = *__first; 1237 // Is the code unit part of a surrogate pair? See 1238 // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 1239 if (__c >= 0xd800 && __c <= 0xDfff) { 1240 // Malformed Unicode. 1241 if (__last - __first < 2) [[unlikely]] 1242 return {__result + 1, __first + 1}; 1243 1244 __c -= 0xd800; 1245 __c <<= 10; 1246 __c += (*(__first + 1) - 0xdc00); 1247 __c += 0x10000; 1248 1249 __result += __column_width_4(__c); 1250 if (__result > __maximum) 1251 return {__result - 2, __first}; 1252 __first += 2; 1253 } else { 1254 __result += __column_width_3(__c); 1255 if (__result > __maximum) 1256 return {__result - 2, __first}; 1257 ++__first; 1258 } 1259 1260 if (__result >= __maximum) 1261 return {__result, __first}; 1262 } 1263 1264 return {__result, __first}; 1265 } 1266 1267 template <__utf32_character _CharT> 1268 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 1269 __estimate_column_width(const _CharT* __first, const _CharT* __last, 1270 size_t __maximum) noexcept { 1271 size_t __result = 0; 1272 1273 while (__first != __last) { 1274 uint32_t __c = *__first; 1275 __result += __column_width(__c); 1276 1277 if (__result > __maximum) 1278 return {__result - 2, __first}; 1279 1280 ++__first; 1281 if (__result >= __maximum) 1282 return {__result, __first}; 1283 } 1284 1285 return {__result, __first}; 1286 } 1287 1288 } // namespace __detail 1289 1290 template <class _CharT> 1291 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> 1292 __get_string_alignment(const _CharT* __first, const _CharT* __last, 1293 ptrdiff_t __width, ptrdiff_t __precision) noexcept { 1294 _LIBCPP_ASSERT(__width != 0 || __precision != -1, 1295 "The function has no effect and shouldn't be used"); 1296 1297 // TODO FMT There might be more optimizations possible: 1298 // If __precision == __format::__number_max and the encoding is: 1299 // * UTF-8 : 4 * (__last - __first) >= __width 1300 // * UTF-16 : 2 * (__last - __first) >= __width 1301 // * UTF-32 : (__last - __first) >= __width 1302 // In these cases it's certain the output is at least the requested width. 1303 // It's unknown how often this happens in practice. For now the improvement 1304 // isn't implemented. 1305 1306 /* 1307 * First assume there are no special Unicode code units in the input. 1308 * - Apply the precision (this may reduce the size of the input). When 1309 * __precison == -1 this step is omitted. 1310 * - Scan for special code units in the input. 1311 * If our assumption was correct the __pos will be at the end of the input. 1312 */ 1313 const ptrdiff_t __length = __last - __first; 1314 const _CharT* __limit = 1315 __first + 1316 (__precision == -1 ? __length : _VSTD::min(__length, __precision)); 1317 ptrdiff_t __size = __limit - __first; 1318 const _CharT* __pos = 1319 __detail::__estimate_column_width_fast(__first, __limit); 1320 1321 if (__pos == __limit) 1322 return {__limit, __size, __size < __width}; 1323 1324 /* 1325 * Our assumption was wrong, there are special Unicode code units. 1326 * The range [__first, __pos) contains a set of code units with the 1327 * following property: 1328 * Every _CharT in the range will be rendered in 1 column. 1329 * 1330 * If there's no maximum width and the parsed size already exceeds the 1331 * minimum required width. The real size isn't important. So bail out. 1332 */ 1333 if (__precision == -1 && (__pos - __first) >= __width) 1334 return {__last, 0, false}; 1335 1336 /* If there's a __precision, truncate the output to that width. */ 1337 ptrdiff_t __prefix = __pos - __first; 1338 if (__precision != -1) { 1339 _LIBCPP_ASSERT(__precision > __prefix, "Logic error."); 1340 auto __lengh_info = __detail::__estimate_column_width( 1341 __pos, __last, __precision - __prefix); 1342 __size = __lengh_info.__width + __prefix; 1343 return {__lengh_info.__ptr, __size, __size < __width}; 1344 } 1345 1346 /* Else use __width to determine the number of required padding characters. */ 1347 _LIBCPP_ASSERT(__width > __prefix, "Logic error."); 1348 /* 1349 * The column width is always one or two columns. For the precision the wanted 1350 * column width is the maximum, for the width it's the minimum. Using the 1351 * width estimation with its truncating behavior will result in the wrong 1352 * result in the following case: 1353 * - The last code unit processed requires two columns and exceeds the 1354 * maximum column width. 1355 * By increasing the __maximum by one avoids this issue. (It means it may 1356 * pass one code point more than required to determine the proper result; 1357 * that however isn't a problem for the algorithm.) 1358 */ 1359 size_t __maximum = 1 + __width - __prefix; 1360 auto __lengh_info = 1361 __detail::__estimate_column_width(__pos, __last, __maximum); 1362 if (__lengh_info.__ptr != __last) { 1363 // Consumed the width number of code units. The exact size of the string 1364 // is unknown. We only know we don't need to align the output. 1365 _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >= 1366 __width, 1367 "Logic error"); 1368 return {__last, 0, false}; 1369 } 1370 1371 __size = __lengh_info.__width + __prefix; 1372 return {__last, __size, __size < __width}; 1373 } 1374 #else // _LIBCPP_HAS_NO_UNICODE 1375 template <class _CharT> 1376 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> 1377 __get_string_alignment(const _CharT* __first, const _CharT* __last, 1378 ptrdiff_t __width, ptrdiff_t __precision) noexcept { 1379 const ptrdiff_t __length = __last - __first; 1380 const _CharT* __limit = 1381 __first + 1382 (__precision == -1 ? __length : _VSTD::min(__length, __precision)); 1383 ptrdiff_t __size = __limit - __first; 1384 return {__limit, __size, __size < __width}; 1385 } 1386 #endif // _LIBCPP_HAS_NO_UNICODE 1387 1388 /// These fields are a filter for which elements to parse. 1389 /// 1390 /// They default to false so when a new field is added it needs to be opted in 1391 /// explicitly. 1392 struct __fields { 1393 uint8_t __sign_ : 1 {false}; 1394 uint8_t __alternate_form_ : 1 {false}; 1395 uint8_t __zero_padding_ : 1 {false}; 1396 uint8_t __precision_ : 1 {false}; 1397 uint8_t __locale_specific_form_ : 1 {false}; 1398 uint8_t __type_ : 1 {false}; 1399 }; 1400 1401 // By not placing this constant in the formatter class it's not duplicated for 1402 // char and wchar_t. 1403 inline constexpr __fields __fields_integral{ 1404 .__sign_ = true, 1405 .__alternate_form_ = true, 1406 .__zero_padding_ = true, 1407 .__locale_specific_form_ = true, 1408 .__type_ = true}; 1409 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; 1410 inline constexpr __fields __fields_pointer{.__type_ = true}; 1411 1412 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { 1413 /// No alignment is set in the format string. 1414 __default, 1415 __left, 1416 __center, 1417 __right, 1418 __zero_padding 1419 }; 1420 1421 enum class _LIBCPP_ENUM_VIS __sign : uint8_t { 1422 /// No sign is set in the format string. 1423 /// 1424 /// The sign isn't allowed for certain format-types. By using this value 1425 /// it's possible to detect whether or not the user explicitly set the sign 1426 /// flag. For formatting purposes it behaves the same as \ref __minus. 1427 __default, 1428 __minus, 1429 __plus, 1430 __space 1431 }; 1432 1433 enum class _LIBCPP_ENUM_VIS __type : uint8_t { 1434 __default, 1435 __string, 1436 __binary_lower_case, 1437 __binary_upper_case, 1438 __octal, 1439 __decimal, 1440 __hexadecimal_lower_case, 1441 __hexadecimal_upper_case, 1442 __pointer, 1443 __char, 1444 __hexfloat_lower_case, 1445 __hexfloat_upper_case, 1446 __scientific_lower_case, 1447 __scientific_upper_case, 1448 __fixed_lower_case, 1449 __fixed_upper_case, 1450 __general_lower_case, 1451 __general_upper_case 1452 }; 1453 1454 struct __std { 1455 __alignment __alignment_ : 3; 1456 __sign __sign_ : 2; 1457 bool __alternate_form_ : 1; 1458 bool __locale_specific_form_ : 1; 1459 __type __type_; 1460 }; 1461 1462 struct __chrono { 1463 __alignment __alignment_ : 3; 1464 bool __weekday_name_ : 1; 1465 bool __month_name_ : 1; 1466 }; 1467 1468 /// Contains the parsed formatting specifications. 1469 /// 1470 /// This contains information for both the std-format-spec and the 1471 /// chrono-format-spec. This results in some unused members for both 1472 /// specifications. However these unused members don't increase the size 1473 /// of the structure. 1474 /// 1475 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 1476 /// kept stable. 1477 template <class _CharT> 1478 struct __parsed_specifications { 1479 union { 1480 // The field __alignment_ is the first element in __std_ and __chrono_. 1481 // This allows the code to always inspect this value regards which member 1482 // of the union is the active member [class.union.general]/2. 1483 // 1484 // This is needed since the generic output routines handle the alignment of 1485 // the output. 1486 __alignment __alignment_ : 3; 1487 __std __std_; 1488 __chrono __chrono_; 1489 }; 1490 1491 /// The requested width. 1492 /// 1493 /// When the format-spec used an arg-id for this field it has already been 1494 /// replaced with the value of that arg-id. 1495 int32_t __width_; 1496 1497 /// The requested precision. 1498 /// 1499 /// When the format-spec used an arg-id for this field it has already been 1500 /// replaced with the value of that arg-id. 1501 int32_t __precision_; 1502 1503 _CharT __fill_; 1504 1505 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 1506 1507 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 1508 }; 1509 1510 // Validate the struct is small and cheap to copy since the struct is passed by 1511 // value in formatting functions. 1512 static_assert(sizeof(__parsed_specifications<char>) == 16); 1513 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 1514 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 1515 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 1516 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 1517 # endif 1518 1519 /// The parser for the std-format-spec. 1520 /// 1521 /// Note this class is a member of std::formatter specializations. It's 1522 /// expected developers will create their own formatter specializations that 1523 /// inherit from the std::formatter specializations. This means this class 1524 /// must be ABI stable. To aid the stability the unused bits in the class are 1525 /// set to zero. That way they can be repurposed if a future revision of the 1526 /// Standards adds new fields to std-format-spec. 1527 template <class _CharT> 1528 class _LIBCPP_TEMPLATE_VIS __parser { 1529 public: 1530 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields) 1531 -> decltype(__parse_ctx.begin()) { 1532 1533 const _CharT* __begin = __parse_ctx.begin(); 1534 const _CharT* __end = __parse_ctx.end(); 1535 if (__begin == __end) 1536 return __begin; 1537 1538 if (__parse_fill_align(__begin, __end) && __begin == __end) 1539 return __begin; 1540 1541 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end) 1542 return __begin; 1543 1544 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end) 1545 return __begin; 1546 1547 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end) 1548 return __begin; 1549 1550 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end) 1551 return __begin; 1552 1553 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end) 1554 return __begin; 1555 1556 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end) 1557 return __begin; 1558 1559 if (__fields.__type_) { 1560 __parse_type(__begin); 1561 1562 // When __type_ is false the calling parser is expected to do additional 1563 // parsing. In that case that parser should do the end of format string 1564 // validation. 1565 if (__begin != __end && *__begin != _CharT('}')) 1566 __throw_format_error("The format-spec should consume the input or end with a '}'"); 1567 } 1568 1569 return __begin; 1570 } 1571 1572 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 1573 _LIBCPP_HIDE_FROM_ABI 1574 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 1575 return __parsed_specifications<_CharT>{ 1576 .__std_ = 1577 __std{.__alignment_ = __alignment_, 1578 .__sign_ = __sign_, 1579 .__alternate_form_ = __alternate_form_, 1580 .__locale_specific_form_ = __locale_specific_form_, 1581 .__type_ = __type_}, 1582 .__width_{__get_width(__ctx)}, 1583 .__precision_{__get_precision(__ctx)}, 1584 .__fill_{__fill_}}; 1585 } 1586 1587 __alignment __alignment_ : 3 {__alignment::__default}; 1588 __sign __sign_ : 2 {__sign::__default}; 1589 bool __alternate_form_ : 1 {false}; 1590 bool __locale_specific_form_ : 1 {false}; 1591 bool __reserved_0_ : 1 {false}; 1592 __type __type_{__type::__default}; 1593 1594 // These two flags are used for formatting chrono. Since the struct has 1595 // padding space left it's added to this structure. 1596 bool __weekday_name_ : 1 {false}; 1597 bool __month_name_ : 1 {false}; 1598 1599 uint8_t __reserved_1_ : 6 {0}; 1600 uint8_t __reserved_2_ : 6 {0}; 1601 // These two flags are only used internally and not part of the 1602 // __parsed_specifications. Therefore put them at the end. 1603 bool __width_as_arg_ : 1 {false}; 1604 bool __precision_as_arg_ : 1 {false}; 1605 1606 /// The requested width, either the value or the arg-id. 1607 int32_t __width_{0}; 1608 1609 /// The requested precision, either the value or the arg-id. 1610 int32_t __precision_{-1}; 1611 1612 // LWG 3576 will probably change this to always accept a Unicode code point 1613 // To avoid changing the size with that change align the field so when it 1614 // becomes 32-bit its alignment will remain the same. That also means the 1615 // size will remain the same. (D2572 addresses the solution for LWG 3576.) 1616 _CharT __fill_{_CharT(' ')}; 1617 1618 private: 1619 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 1620 switch (__c) { 1621 case _CharT('<'): 1622 __alignment_ = __alignment::__left; 1623 return true; 1624 1625 case _CharT('^'): 1626 __alignment_ = __alignment::__center; 1627 return true; 1628 1629 case _CharT('>'): 1630 __alignment_ = __alignment::__right; 1631 return true; 1632 } 1633 return false; 1634 } 1635 1636 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) { 1637 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " 1638 "undefined behavior by evaluating data not in the input"); 1639 if (__begin + 1 != __end) { 1640 if (__parse_alignment(*(__begin + 1))) { 1641 if (*__begin == _CharT('{') || *__begin == _CharT('}')) 1642 __throw_format_error("The format-spec fill field contains an invalid character"); 1643 1644 __fill_ = *__begin; 1645 __begin += 2; 1646 return true; 1647 } 1648 } 1649 1650 if (!__parse_alignment(*__begin)) 1651 return false; 1652 1653 ++__begin; 1654 return true; 1655 } 1656 1657 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) { 1658 switch (*__begin) { 1659 case _CharT('-'): 1660 __sign_ = __sign::__minus; 1661 break; 1662 case _CharT('+'): 1663 __sign_ = __sign::__plus; 1664 break; 1665 case _CharT(' '): 1666 __sign_ = __sign::__space; 1667 break; 1668 default: 1669 return false; 1670 } 1671 ++__begin; 1672 return true; 1673 } 1674 1675 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) { 1676 if (*__begin != _CharT('#')) 1677 return false; 1678 1679 __alternate_form_ = true; 1680 ++__begin; 1681 return true; 1682 } 1683 1684 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) { 1685 if (*__begin != _CharT('0')) 1686 return false; 1687 1688 if (__alignment_ == __alignment::__default) 1689 __alignment_ = __alignment::__zero_padding; 1690 ++__begin; 1691 return true; 1692 } 1693 1694 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) { 1695 if (*__begin == _CharT('0')) 1696 __throw_format_error("A format-spec width field shouldn't have a leading zero"); 1697 1698 if (*__begin == _CharT('{')) { 1699 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 1700 __width_as_arg_ = true; 1701 __width_ = __r.__value; 1702 __begin = __r.__ptr; 1703 return true; 1704 } 1705 1706 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 1707 return false; 1708 1709 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 1710 __width_ = __r.__value; 1711 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " 1712 "due to validations in this function"); 1713 __begin = __r.__ptr; 1714 return true; 1715 } 1716 1717 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end, 1718 auto& __parse_ctx) { 1719 if (*__begin != _CharT('.')) 1720 return false; 1721 1722 ++__begin; 1723 if (__begin == __end) 1724 __throw_format_error("End of input while parsing format-spec precision"); 1725 1726 if (*__begin == _CharT('{')) { 1727 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 1728 __precision_as_arg_ = true; 1729 __precision_ = __arg_id.__value; 1730 __begin = __arg_id.__ptr; 1731 return true; 1732 } 1733 1734 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 1735 __throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); 1736 1737 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 1738 __precision_ = __r.__value; 1739 __precision_as_arg_ = false; 1740 __begin = __r.__ptr; 1741 return true; 1742 } 1743 1744 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) { 1745 if (*__begin != _CharT('L')) 1746 return false; 1747 1748 __locale_specific_form_ = true; 1749 ++__begin; 1750 return true; 1751 } 1752 1753 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) { 1754 // Determines the type. It does not validate whether the selected type is 1755 // valid. Most formatters have optional fields that are only allowed for 1756 // certain types. These parsers need to do validation after the type has 1757 // been parsed. So its easier to implement the validation for all types in 1758 // the specific parse function. 1759 switch (*__begin) { 1760 case 'A': 1761 __type_ = __type::__hexfloat_upper_case; 1762 break; 1763 case 'B': 1764 __type_ = __type::__binary_upper_case; 1765 break; 1766 case 'E': 1767 __type_ = __type::__scientific_upper_case; 1768 break; 1769 case 'F': 1770 __type_ = __type::__fixed_upper_case; 1771 break; 1772 case 'G': 1773 __type_ = __type::__general_upper_case; 1774 break; 1775 case 'X': 1776 __type_ = __type::__hexadecimal_upper_case; 1777 break; 1778 case 'a': 1779 __type_ = __type::__hexfloat_lower_case; 1780 break; 1781 case 'b': 1782 __type_ = __type::__binary_lower_case; 1783 break; 1784 case 'c': 1785 __type_ = __type::__char; 1786 break; 1787 case 'd': 1788 __type_ = __type::__decimal; 1789 break; 1790 case 'e': 1791 __type_ = __type::__scientific_lower_case; 1792 break; 1793 case 'f': 1794 __type_ = __type::__fixed_lower_case; 1795 break; 1796 case 'g': 1797 __type_ = __type::__general_lower_case; 1798 break; 1799 case 'o': 1800 __type_ = __type::__octal; 1801 break; 1802 case 'p': 1803 __type_ = __type::__pointer; 1804 break; 1805 case 's': 1806 __type_ = __type::__string; 1807 break; 1808 case 'x': 1809 __type_ = __type::__hexadecimal_lower_case; 1810 break; 1811 default: 1812 return; 1813 } 1814 ++__begin; 1815 } 1816 1817 _LIBCPP_HIDE_FROM_ABI 1818 int32_t __get_width(auto& __ctx) const { 1819 if (!__width_as_arg_) 1820 return __width_; 1821 1822 int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 1823 if (__result == 0) 1824 __throw_format_error("A format-spec width field replacement should have a positive value"); 1825 return __result; 1826 } 1827 1828 _LIBCPP_HIDE_FROM_ABI 1829 int32_t __get_precision(auto& __ctx) const { 1830 if (!__precision_as_arg_) 1831 return __precision_; 1832 1833 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 1834 } 1835 }; 1836 1837 // Validates whether the reserved bitfields don't change the size. 1838 static_assert(sizeof(__parser<char>) == 16); 1839 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 1840 static_assert(sizeof(__parser<wchar_t>) == 16); 1841 # endif 1842 1843 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 1844 switch (__type) { 1845 case __format_spec::__type::__default: 1846 case __format_spec::__type::__string: 1847 break; 1848 1849 default: 1850 std::__throw_format_error("The format-spec type has a type not supported for a string argument"); 1851 } 1852 } 1853 1854 template <class _CharT> 1855 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) { 1856 if (__parser.__sign_ != __sign::__default) 1857 std::__throw_format_error("A sign field isn't allowed in this format-spec"); 1858 1859 if (__parser.__alternate_form_) 1860 std::__throw_format_error("An alternate form field isn't allowed in this format-spec"); 1861 1862 if (__parser.__alignment_ == __alignment::__zero_padding) 1863 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec"); 1864 1865 if (__parser.__alignment_ == __alignment::__default) 1866 __parser.__alignment_ = __alignment::__left; 1867 } 1868 1869 template <class _CharT> 1870 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) { 1871 __format_spec::__process_display_type_bool_string(__parser); 1872 } 1873 1874 template <class _CharT> 1875 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_integer(__parser<_CharT>& __parser) { 1876 if (__parser.__alignment_ == __alignment::__default) 1877 __parser.__alignment_ = __alignment::__right; 1878 } 1879 1880 template <class _CharT> 1881 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) { 1882 switch (__parser.__type_) { 1883 case __format_spec::__type::__default: 1884 __parser.__type_ = __format_spec::__type::__string; 1885 [[fallthrough]]; 1886 case __format_spec::__type::__string: 1887 __format_spec::__process_display_type_bool_string(__parser); 1888 break; 1889 1890 case __format_spec::__type::__binary_lower_case: 1891 case __format_spec::__type::__binary_upper_case: 1892 case __format_spec::__type::__octal: 1893 case __format_spec::__type::__decimal: 1894 case __format_spec::__type::__hexadecimal_lower_case: 1895 case __format_spec::__type::__hexadecimal_upper_case: 1896 __process_display_type_integer(__parser); 1897 break; 1898 1899 default: 1900 std::__throw_format_error("The format-spec type has a type not supported for a bool argument"); 1901 } 1902 } 1903 1904 template <class _CharT> 1905 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) { 1906 switch (__parser.__type_) { 1907 case __format_spec::__type::__default: 1908 __parser.__type_ = __format_spec::__type::__char; 1909 [[fallthrough]]; 1910 case __format_spec::__type::__char: 1911 __format_spec::__process_display_type_char(__parser); 1912 break; 1913 1914 case __format_spec::__type::__binary_lower_case: 1915 case __format_spec::__type::__binary_upper_case: 1916 case __format_spec::__type::__octal: 1917 case __format_spec::__type::__decimal: 1918 case __format_spec::__type::__hexadecimal_lower_case: 1919 case __format_spec::__type::__hexadecimal_upper_case: 1920 __format_spec::__process_display_type_integer(__parser); 1921 break; 1922 1923 default: 1924 std::__throw_format_error("The format-spec type has a type not supported for a char argument"); 1925 } 1926 } 1927 1928 template <class _CharT> 1929 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) { 1930 switch (__parser.__type_) { 1931 case __format_spec::__type::__default: 1932 __parser.__type_ = __format_spec::__type::__decimal; 1933 [[fallthrough]]; 1934 case __format_spec::__type::__binary_lower_case: 1935 case __format_spec::__type::__binary_upper_case: 1936 case __format_spec::__type::__octal: 1937 case __format_spec::__type::__decimal: 1938 case __format_spec::__type::__hexadecimal_lower_case: 1939 case __format_spec::__type::__hexadecimal_upper_case: 1940 __format_spec::__process_display_type_integer(__parser); 1941 break; 1942 1943 case __format_spec::__type::__char: 1944 __format_spec::__process_display_type_char(__parser); 1945 break; 1946 1947 default: 1948 std::__throw_format_error("The format-spec type has a type not supported for an integer argument"); 1949 } 1950 } 1951 1952 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) { 1953 switch (__type) { 1954 case __format_spec::__type::__default: 1955 case __format_spec::__type::__pointer: 1956 break; 1957 1958 default: 1959 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument"); 1960 } 1961 } 1962 1963 } // namespace __format_spec 1964 1965 #endif //_LIBCPP_STD_VER > 17 1966 1967 _LIBCPP_END_NAMESPACE_STD 1968 1969 _LIBCPP_POP_MACROS 1970 1971 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 1972