1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18 
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__config>
23 #include <__debug>
24 #include <__format/format_arg.h>
25 #include <__format/format_error.h>
26 #include <__format/format_parse_context.h>
27 #include <__format/format_string.h>
28 #include <__variant/monostate.h>
29 #include <bit>
30 #include <concepts>
31 #include <cstdint>
32 #include <type_traits>
33 
34 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
35 #  pragma GCC system_header
36 #endif
37 
38 _LIBCPP_PUSH_MACROS
39 #include <__undef_macros>
40 
41 _LIBCPP_BEGIN_NAMESPACE_STD
42 
43 #if _LIBCPP_STD_VER > 17
44 
45 namespace __format_spec {
46 
47 /**
48  * Contains the flags for the std-format-spec.
49  *
50  * Some format-options can only be used for specific C++ types and may depend on
51  * the selected format-type.
52  * * The C++type filtering can be done using the proper policies for
53  *   @ref __parser_std.
54  * * The format-type filtering needs to be done post parsing in the parser
55  *   derived from @ref __parser_std.
56  */
57 _LIBCPP_PACKED_BYTE_FOR_AIX
58 class _LIBCPP_TYPE_VIS _Flags {
59 public:
60   enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t {
61     /**
62      * No alignment is set in the format string.
63      *
64      * Zero-padding is ignored when an alignment is selected.
65      * The default alignment depends on the selected format-type.
66      */
67     __default,
68     __left,
69     __center,
70     __right
71   };
72   enum class _LIBCPP_ENUM_VIS _Sign : uint8_t {
73     /**
74      * No sign is set in the format string.
75      *
76      * The sign isn't allowed for certain format-types. By using this value
77      * it's possible to detect whether or not the user explicitly set the sign
78      * flag. For formatting purposes it behaves the same as @ref __minus.
79      */
80     __default,
81     __minus,
82     __plus,
83     __space
84   };
85 
86   _Alignment __alignment : 2 {_Alignment::__default};
87   _Sign __sign : 2 {_Sign::__default};
88   uint8_t __alternate_form : 1 {false};
89   uint8_t __zero_padding : 1 {false};
90   uint8_t __locale_specific_form : 1 {false};
91 
92   enum class _LIBCPP_ENUM_VIS _Type : uint8_t {
93     __default,
94     __string,
95     __binary_lower_case,
96     __binary_upper_case,
97     __octal,
98     __decimal,
99     __hexadecimal_lower_case,
100     __hexadecimal_upper_case,
101     __pointer,
102     __char,
103     __float_hexadecimal_lower_case,
104     __float_hexadecimal_upper_case,
105     __scientific_lower_case,
106     __scientific_upper_case,
107     __fixed_lower_case,
108     __fixed_upper_case,
109     __general_lower_case,
110     __general_upper_case
111   };
112 
113   _Type __type{_Type::__default};
114 };
115 _LIBCPP_PACKED_BYTE_FOR_AIX_END
116 
117 namespace __detail {
118 template <class _CharT>
119 _LIBCPP_HIDE_FROM_ABI constexpr bool
120 __parse_alignment(_CharT __c, _Flags& __flags) noexcept {
121   switch (__c) {
122   case _CharT('<'):
123     __flags.__alignment = _Flags::_Alignment::__left;
124     return true;
125 
126   case _CharT('^'):
127     __flags.__alignment = _Flags::_Alignment::__center;
128     return true;
129 
130   case _CharT('>'):
131     __flags.__alignment = _Flags::_Alignment::__right;
132     return true;
133   }
134   return false;
135 }
136 } // namespace __detail
137 
138 template <class _CharT>
139 class _LIBCPP_TEMPLATE_VIS __parser_fill_align {
140 public:
141   // TODO FMT The standard doesn't specify this character is a Unicode
142   // character. Validate what fmt and MSVC have implemented.
143   _CharT __fill{_CharT(' ')};
144 
145 protected:
146   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
147   __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) {
148     _LIBCPP_ASSERT(__begin != __end,
149                    "When called with an empty input the function will cause "
150                    "undefined behavior by evaluating data not in the input");
151     if (__begin + 1 != __end) {
152       if (__detail::__parse_alignment(*(__begin + 1), __flags)) {
153         if (*__begin == _CharT('{') || *__begin == _CharT('}'))
154           __throw_format_error(
155               "The format-spec fill field contains an invalid character");
156         __fill = *__begin;
157         return __begin + 2;
158       }
159     }
160 
161     if (__detail::__parse_alignment(*__begin, __flags))
162       return __begin + 1;
163 
164     return __begin;
165   }
166 };
167 
168 template <class _CharT>
169 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
170 __parse_sign(const _CharT* __begin, _Flags& __flags) noexcept {
171   switch (*__begin) {
172   case _CharT('-'):
173     __flags.__sign = _Flags::_Sign::__minus;
174     break;
175   case _CharT('+'):
176     __flags.__sign = _Flags::_Sign::__plus;
177     break;
178   case _CharT(' '):
179     __flags.__sign = _Flags::_Sign::__space;
180     break;
181   default:
182     return __begin;
183   }
184   return __begin + 1;
185 }
186 
187 template <class _CharT>
188 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
189 __parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept {
190   if (*__begin == _CharT('#')) {
191     __flags.__alternate_form = true;
192     ++__begin;
193   }
194 
195   return __begin;
196 }
197 
198 template <class _CharT>
199 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
200 __parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept {
201   if (*__begin == _CharT('0')) {
202     __flags.__zero_padding = true;
203     ++__begin;
204   }
205 
206   return __begin;
207 }
208 
209 template <class _CharT>
210 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
211 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
212   // This function is a wrapper to call the real parser. But it does the
213   // validation for the pre-conditions and post-conditions.
214   if (__begin == __end)
215     __throw_format_error("End of input while parsing format-spec arg-id");
216 
217   __format::__parse_number_result __r =
218       __format::__parse_arg_id(__begin, __end, __parse_ctx);
219 
220   if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
221     __throw_format_error("Invalid arg-id");
222 
223   ++__r.__ptr;
224   return __r;
225 }
226 
227 template <class _Context>
228 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
229 __substitute_arg_id(basic_format_arg<_Context> _Arg) {
230   return visit_format_arg(
231       [](auto __arg) -> uint32_t {
232         using _Type = decltype(__arg);
233         if constexpr (integral<_Type>) {
234           if constexpr (signed_integral<_Type>) {
235             if (__arg < 0)
236               __throw_format_error("A format-spec arg-id replacement shouldn't "
237                                    "have a negative value");
238           }
239 
240           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
241           if (static_cast<_CT>(__arg) >
242               static_cast<_CT>(__format::__number_max))
243             __throw_format_error("A format-spec arg-id replacement exceeds "
244                                  "the maximum supported value");
245 
246           return __arg;
247         } else if constexpr (same_as<_Type, monostate>)
248           __throw_format_error("Argument index out of bounds");
249         else
250           __throw_format_error("A format-spec arg-id replacement argument "
251                                "isn't an integral type");
252       },
253       _Arg);
254 }
255 
256 class _LIBCPP_TYPE_VIS __parser_width {
257 public:
258   /** Contains a width or an arg-id. */
259   uint32_t __width : 31 {0};
260   /** Determines whether the value stored is a width or an arg-id. */
261   uint32_t __width_as_arg : 1 {0};
262 
263   /**
264    * Does the supplied width field contain an arg-id?
265    *
266    * If @c true the formatter needs to call @ref __substitute_width_arg_id.
267    */
268   constexpr bool __width_needs_substitution() const noexcept { return __width_as_arg; }
269 
270 protected:
271   /**
272    * Does the supplied std-format-spec contain a width field?
273    *
274    * When the field isn't present there's no padding required. This can be used
275    * to optimize the formatting.
276    */
277   constexpr bool __has_width_field() const noexcept { return __width_as_arg || __width; }
278 
279   template <class _CharT>
280   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
281   __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
282     if (*__begin == _CharT('0'))
283       __throw_format_error(
284           "A format-spec width field shouldn't have a leading zero");
285 
286     if (*__begin == _CharT('{')) {
287       __format::__parse_number_result __r =
288           __parse_arg_id(++__begin, __end, __parse_ctx);
289       __width = __r.__value;
290       __width_as_arg = 1;
291       return __r.__ptr;
292     }
293 
294     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
295       return __begin;
296 
297     __format::__parse_number_result __r =
298         __format::__parse_number(__begin, __end);
299     __width = __r.__value;
300     _LIBCPP_ASSERT(__width != 0,
301                    "A zero value isn't allowed and should be impossible, "
302                    "due to validations in this function");
303     return __r.__ptr;
304   }
305 
306   _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_width_arg_id(auto __arg) {
307     _LIBCPP_ASSERT(__width_as_arg == 1,
308                    "Substitute width called when no substitution is required");
309 
310     // The clearing of the flag isn't required but looks better when debugging
311     // the code.
312     __width_as_arg = 0;
313     __width = __substitute_arg_id(__arg);
314     if (__width == 0)
315       __throw_format_error(
316           "A format-spec width field replacement should have a positive value");
317   }
318 };
319 
320 class _LIBCPP_TYPE_VIS __parser_precision {
321 public:
322   /** Contains a precision or an arg-id. */
323   uint32_t __precision : 31 {__format::__number_max};
324   /**
325    * Determines whether the value stored is a precision or an arg-id.
326    *
327    * @note Since @ref __precision == @ref __format::__number_max is a valid
328    * value, the default value contains an arg-id of INT32_MAX. (This number of
329    * arguments isn't supported by compilers.)  This is used to detect whether
330    * the std-format-spec contains a precision field.
331    */
332   uint32_t __precision_as_arg : 1 {1};
333 
334   /**
335    * Does the supplied precision field contain an arg-id?
336    *
337    * If @c true the formatter needs to call @ref __substitute_precision_arg_id.
338    */
339   constexpr bool __precision_needs_substitution() const noexcept {
340     return __precision_as_arg && __precision != __format::__number_max;
341   }
342 
343 protected:
344   /**
345    * Does the supplied std-format-spec contain a precision field?
346    *
347    * When the field isn't present there's no truncating required. This can be
348    * used to optimize the formatting.
349    */
350   constexpr bool __has_precision_field() const noexcept {
351 
352     return __precision_as_arg == 0 ||             // Contains a value?
353            __precision != __format::__number_max; // The arg-id is valid?
354   }
355 
356   template <class _CharT>
357   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
358   __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
359     if (*__begin != _CharT('.'))
360       return __begin;
361 
362     ++__begin;
363     if (__begin == __end)
364       __throw_format_error("End of input while parsing format-spec precision");
365 
366     if (*__begin == _CharT('{')) {
367       __format::__parse_number_result __arg_id =
368           __parse_arg_id(++__begin, __end, __parse_ctx);
369       _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max,
370                      "Unsupported number of arguments, since this number of "
371                      "arguments is used a special value");
372       __precision = __arg_id.__value;
373       return __arg_id.__ptr;
374     }
375 
376     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
377       __throw_format_error(
378           "The format-spec precision field doesn't contain a value or arg-id");
379 
380     __format::__parse_number_result __r =
381         __format::__parse_number(__begin, __end);
382     __precision = __r.__value;
383     __precision_as_arg = 0;
384     return __r.__ptr;
385   }
386 
387   _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_precision_arg_id(
388       auto __arg) {
389     _LIBCPP_ASSERT(
390         __precision_as_arg == 1 && __precision != __format::__number_max,
391         "Substitute precision called when no substitution is required");
392 
393     // The clearing of the flag isn't required but looks better when debugging
394     // the code.
395     __precision_as_arg = 0;
396     __precision = __substitute_arg_id(__arg);
397   }
398 };
399 
400 template <class _CharT>
401 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
402 __parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept {
403   if (*__begin == _CharT('L')) {
404     __flags.__locale_specific_form = true;
405     ++__begin;
406   }
407 
408   return __begin;
409 }
410 
411 template <class _CharT>
412 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
413 __parse_type(const _CharT* __begin, _Flags& __flags) {
414 
415   // Determines the type. It does not validate whether the selected type is
416   // valid. Most formatters have optional fields that are only allowed for
417   // certain types. These parsers need to do validation after the type has
418   // been parsed. So its easier to implement the validation for all types in
419   // the specific parse function.
420   switch (*__begin) {
421   case 'A':
422     __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case;
423     break;
424   case 'B':
425     __flags.__type = _Flags::_Type::__binary_upper_case;
426     break;
427   case 'E':
428     __flags.__type = _Flags::_Type::__scientific_upper_case;
429     break;
430   case 'F':
431     __flags.__type = _Flags::_Type::__fixed_upper_case;
432     break;
433   case 'G':
434     __flags.__type = _Flags::_Type::__general_upper_case;
435     break;
436   case 'X':
437     __flags.__type = _Flags::_Type::__hexadecimal_upper_case;
438     break;
439   case 'a':
440     __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case;
441     break;
442   case 'b':
443     __flags.__type = _Flags::_Type::__binary_lower_case;
444     break;
445   case 'c':
446     __flags.__type = _Flags::_Type::__char;
447     break;
448   case 'd':
449     __flags.__type = _Flags::_Type::__decimal;
450     break;
451   case 'e':
452     __flags.__type = _Flags::_Type::__scientific_lower_case;
453     break;
454   case 'f':
455     __flags.__type = _Flags::_Type::__fixed_lower_case;
456     break;
457   case 'g':
458     __flags.__type = _Flags::_Type::__general_lower_case;
459     break;
460   case 'o':
461     __flags.__type = _Flags::_Type::__octal;
462     break;
463   case 'p':
464     __flags.__type = _Flags::_Type::__pointer;
465     break;
466   case 's':
467     __flags.__type = _Flags::_Type::__string;
468     break;
469   case 'x':
470     __flags.__type = _Flags::_Type::__hexadecimal_lower_case;
471     break;
472   default:
473     return __begin;
474   }
475   return ++__begin;
476 }
477 
478 /**
479  * Process the parsed alignment and zero-padding state of arithmetic types.
480  *
481  * [format.string.std]/13
482  *   If the 0 character and an align option both appear, the 0 character is
483  *   ignored.
484  *
485  * For the formatter a @ref __default alignment means zero-padding.
486  */
487 _LIBCPP_HIDE_FROM_ABI constexpr void __process_arithmetic_alignment(_Flags& __flags) {
488   __flags.__zero_padding &= __flags.__alignment == _Flags::_Alignment::__default;
489   if (!__flags.__zero_padding && __flags.__alignment == _Flags::_Alignment::__default)
490     __flags.__alignment = _Flags::_Alignment::__right;
491 }
492 
493 /**
494  * The parser for the std-format-spec.
495  *
496  * [format.string.std]/1 specifies the std-format-spec:
497  *   fill-and-align sign # 0 width precision L type
498  *
499  * All these fields are optional. Whether these fields can be used depend on:
500  * - The type supplied to the format string.
501  *   E.g. A string never uses the sign field so the field may not be set.
502  *   This constrain is validated by the parsers in this file.
503  * - The supplied value for the optional type field.
504  *   E.g. A int formatted as decimal uses the sign field.
505  *   When formatted as a char the sign field may no longer be set.
506  *   This constrain isn't validated by the parsers in this file.
507  *
508  * The base classes are ordered to minimize the amount of padding.
509  *
510  * This implements the parser for the string types.
511  */
512 template <class _CharT>
513 class _LIBCPP_TEMPLATE_VIS __parser_string
514     : public __parser_width,              // provides __width(|as_arg)
515       public __parser_precision,          // provides __precision(|as_arg)
516       public __parser_fill_align<_CharT>, // provides __fill and uses __flags
517       public _Flags                       // provides __flags
518 {
519 public:
520   using char_type = _CharT;
521 
522   _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() {
523     this->__alignment = _Flags::_Alignment::__left;
524   }
525 
526   /**
527    * The low-level std-format-spec parse function.
528    *
529    * @pre __begin points at the beginning of the std-format-spec. This means
530    * directly after the ':'.
531    * @pre The std-format-spec parses the entire input, or the first unmatched
532    * character is a '}'.
533    *
534    * @returns The iterator pointing at the last parsed character.
535    */
536   _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
537       -> decltype(__parse_ctx.begin()) {
538     auto __it = __parse(__parse_ctx);
539     __process_display_type();
540     return __it;
541   }
542 
543 private:
544   /**
545    * Parses the std-format-spec.
546    *
547    * @throws __throw_format_error When @a __parse_ctx contains an ill-formed
548    *                               std-format-spec.
549    *
550    * @returns An iterator to the end of input or point at the closing '}'.
551    */
552   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
553       -> decltype(__parse_ctx.begin()) {
554 
555     auto __begin = __parse_ctx.begin();
556     auto __end = __parse_ctx.end();
557     if (__begin == __end)
558       return __begin;
559 
560     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
561                                                    static_cast<_Flags&>(*this));
562     if (__begin == __end)
563       return __begin;
564 
565     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
566     if (__begin == __end)
567       return __begin;
568 
569     __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
570     if (__begin == __end)
571       return __begin;
572 
573     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
574 
575     if (__begin != __end && *__begin != _CharT('}'))
576       __throw_format_error(
577           "The format-spec should consume the input or end with a '}'");
578 
579     return __begin;
580   }
581 
582   /** Processes the parsed std-format-spec based on the parsed display type. */
583   _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
584     switch (this->__type) {
585     case _Flags::_Type::__default:
586     case _Flags::_Type::__string:
587       break;
588 
589     default:
590       __throw_format_error("The format-spec type has a type not supported for "
591                            "a string argument");
592     }
593   }
594 };
595 
596 /**
597  * The parser for the std-format-spec.
598  *
599  * This implements the parser for the integral types. This includes the
600  * character type and boolean type.
601  *
602  * See @ref __parser_string.
603  */
604 template <class _CharT>
605 class _LIBCPP_TEMPLATE_VIS __parser_integral
606     : public __parser_width,              // provides __width(|as_arg)
607       public __parser_fill_align<_CharT>, // provides __fill and uses __flags
608       public _Flags                       // provides __flags
609 {
610 public:
611   using char_type = _CharT;
612 
613 protected:
614   /**
615    * The low-level std-format-spec parse function.
616    *
617    * @pre __begin points at the beginning of the std-format-spec. This means
618    * directly after the ':'.
619    * @pre The std-format-spec parses the entire input, or the first unmatched
620    * character is a '}'.
621    *
622    * @returns The iterator pointing at the last parsed character.
623    */
624   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
625       -> decltype(__parse_ctx.begin()) {
626     auto __begin = __parse_ctx.begin();
627     auto __end = __parse_ctx.end();
628     if (__begin == __end)
629       return __begin;
630 
631     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
632                                                    static_cast<_Flags&>(*this));
633     if (__begin == __end)
634       return __begin;
635 
636     __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
637     if (__begin == __end)
638       return __begin;
639 
640     __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
641     if (__begin == __end)
642       return __begin;
643 
644     __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
645     if (__begin == __end)
646       return __begin;
647 
648     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
649     if (__begin == __end)
650       return __begin;
651 
652     __begin =
653         __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
654     if (__begin == __end)
655       return __begin;
656 
657     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
658 
659     if (__begin != __end && *__begin != _CharT('}'))
660       __throw_format_error(
661           "The format-spec should consume the input or end with a '}'");
662 
663     return __begin;
664   }
665 
666   /** Handles the post-parsing updates for the integer types. */
667   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
668     __process_arithmetic_alignment(static_cast<_Flags&>(*this));
669   }
670 
671   /**
672    * Handles the post-parsing updates for the character types.
673    *
674    * Sets the alignment and validates the format flags set for a character type.
675    *
676    * At the moment the validation for a character and a Boolean behave the
677    * same, but this may change in the future.
678    * Specifically at the moment the locale-specific form is allowed for the
679    * char output type, but it has no effect on the output.
680    */
681   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); }
682 
683   /**
684    * Handles the post-parsing updates for the Boolean types.
685    *
686    * Sets the alignment and validates the format flags set for a Boolean type.
687    */
688   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() {
689     if (this->__sign != _Flags::_Sign::__default)
690       __throw_format_error("A sign field isn't allowed in this format-spec");
691 
692     if (this->__alternate_form)
693       __throw_format_error(
694           "An alternate form field isn't allowed in this format-spec");
695 
696     if (this->__zero_padding)
697       __throw_format_error(
698           "A zero-padding field isn't allowed in this format-spec");
699 
700     if (this->__alignment == _Flags::_Alignment::__default)
701       this->__alignment = _Flags::_Alignment::__left;
702   }
703 };
704 
705 /**
706  * The parser for the std-format-spec.
707  *
708  * This implements the parser for the floating-point types.
709  *
710  * See @ref __parser_string.
711  */
712 template <class _CharT>
713 class _LIBCPP_TEMPLATE_VIS __parser_floating_point
714     : public __parser_width,              // provides __width(|as_arg)
715       public __parser_precision,          // provides __precision(|as_arg)
716       public __parser_fill_align<_CharT>, // provides __fill and uses __flags
717       public _Flags                       // provides __flags
718 {
719 public:
720   using char_type = _CharT;
721 
722   /**
723    * The low-level std-format-spec parse function.
724    *
725    * @pre __begin points at the beginning of the std-format-spec. This means
726    * directly after the ':'.
727    * @pre The std-format-spec parses the entire input, or the first unmatched
728    * character is a '}'.
729    *
730    * @returns The iterator pointing at the last parsed character.
731    */
732   _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
733       -> decltype(__parse_ctx.begin()) {
734     auto __it = __parse(__parse_ctx);
735     __process_arithmetic_alignment(static_cast<_Flags&>(*this));
736     __process_display_type();
737     return __it;
738   }
739 protected:
740   /**
741    * The low-level std-format-spec parse function.
742    *
743    * @pre __begin points at the beginning of the std-format-spec. This means
744    * directly after the ':'.
745    * @pre The std-format-spec parses the entire input, or the first unmatched
746    * character is a '}'.
747    *
748    * @returns The iterator pointing at the last parsed character.
749    */
750   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
751       -> decltype(__parse_ctx.begin()) {
752     auto __begin = __parse_ctx.begin();
753     auto __end = __parse_ctx.end();
754     if (__begin == __end)
755       return __begin;
756 
757     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
758                                                    static_cast<_Flags&>(*this));
759     if (__begin == __end)
760       return __begin;
761 
762     __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
763     if (__begin == __end)
764       return __begin;
765 
766     __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
767     if (__begin == __end)
768       return __begin;
769 
770     __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
771     if (__begin == __end)
772       return __begin;
773 
774     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
775     if (__begin == __end)
776       return __begin;
777 
778     __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
779     if (__begin == __end)
780       return __begin;
781 
782     __begin =
783         __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
784     if (__begin == __end)
785       return __begin;
786 
787     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
788 
789     if (__begin != __end && *__begin != _CharT('}'))
790       __throw_format_error(
791           "The format-spec should consume the input or end with a '}'");
792 
793     return __begin;
794   }
795 
796   /** Processes the parsed std-format-spec based on the parsed display type. */
797   _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
798     switch (this->__type) {
799     case _Flags::_Type::__default:
800       // When no precision specified then it keeps default since that
801       // formatting differs from the other types.
802       if (this->__has_precision_field())
803         this->__type = _Flags::_Type::__general_lower_case;
804       break;
805     case _Flags::_Type::__float_hexadecimal_lower_case:
806     case _Flags::_Type::__float_hexadecimal_upper_case:
807       // Precision specific behavior will be handled later.
808       break;
809     case _Flags::_Type::__scientific_lower_case:
810     case _Flags::_Type::__scientific_upper_case:
811     case _Flags::_Type::__fixed_lower_case:
812     case _Flags::_Type::__fixed_upper_case:
813     case _Flags::_Type::__general_lower_case:
814     case _Flags::_Type::__general_upper_case:
815       if (!this->__has_precision_field()) {
816         // Set the default precision for the call to to_chars.
817         this->__precision = 6;
818         this->__precision_as_arg = false;
819       }
820       break;
821 
822     default:
823       __throw_format_error("The format-spec type has a type not supported for "
824                            "a floating-point argument");
825     }
826   }
827 };
828 
829 /**
830  * The parser for the std-format-spec.
831  *
832  * This implements the parser for the pointer types.
833  *
834  * See @ref __parser_string.
835  */
836 template <class _CharT>
837 class _LIBCPP_TEMPLATE_VIS __parser_pointer : public __parser_width,              // provides __width(|as_arg)
838                                               public __parser_fill_align<_CharT>, // provides __fill and uses __flags
839                                               public _Flags                       // provides __flags
840 {
841 public:
842   using char_type = _CharT;
843 
844   _LIBCPP_HIDE_FROM_ABI constexpr __parser_pointer() {
845     // Implements LWG3612 Inconsistent pointer alignment in std::format.
846     // The issue's current status is "Tentatively Ready" and libc++ status is
847     // still experimental.
848     //
849     // TODO FMT Validate this with the final resolution of LWG3612.
850     this->__alignment = _Flags::_Alignment::__right;
851   }
852 
853   /**
854    * The low-level std-format-spec parse function.
855    *
856    * @pre __begin points at the beginning of the std-format-spec. This means
857    * directly after the ':'.
858    * @pre The std-format-spec parses the entire input, or the first unmatched
859    * character is a '}'.
860    *
861    * @returns The iterator pointing at the last parsed character.
862    */
863   _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
864     auto __it = __parse(__parse_ctx);
865     __process_display_type();
866     return __it;
867   }
868 
869 protected:
870   /**
871    * The low-level std-format-spec parse function.
872    *
873    * @pre __begin points at the beginning of the std-format-spec. This means
874    * directly after the ':'.
875    * @pre The std-format-spec parses the entire input, or the first unmatched
876    * character is a '}'.
877    *
878    * @returns The iterator pointing at the last parsed character.
879    */
880   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
881     auto __begin = __parse_ctx.begin();
882     auto __end = __parse_ctx.end();
883     if (__begin == __end)
884       return __begin;
885 
886     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, static_cast<_Flags&>(*this));
887     if (__begin == __end)
888       return __begin;
889 
890     // An integer presentation type isn't defined in the Standard.
891     // Since a pointer is formatted as an integer it can be argued it's an
892     // integer presentation type. However there are two LWG-issues asserting it
893     // isn't an integer presentation type:
894     // - LWG3612 Inconsistent pointer alignment in std::format
895     // - LWG3644 std::format does not define "integer presentation type"
896     //
897     // There's a paper to make additional clarifications on the status of
898     // formatting pointers and proposes additional fields to be valid. That
899     // paper hasn't been reviewed by the Committee yet.
900     // - P2510 Formatting pointers
901     //
902     // The current implementation assumes formatting pointers isn't covered by
903     // "integer presentation type".
904     // TODO FMT Apply the LWG-issues/papers after approval/rejection by the Committee.
905 
906     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
907     if (__begin == __end)
908       return __begin;
909 
910     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
911 
912     if (__begin != __end && *__begin != _CharT('}'))
913       __throw_format_error("The format-spec should consume the input or end with a '}'");
914 
915     return __begin;
916   }
917 
918   /** Processes the parsed std-format-spec based on the parsed display type. */
919   _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
920     switch (this->__type) {
921     case _Flags::_Type::__default:
922       this->__type = _Flags::_Type::__pointer;
923       break;
924     case _Flags::_Type::__pointer:
925       break;
926     default:
927       __throw_format_error("The format-spec type has a type not supported for a pointer argument");
928     }
929   }
930 };
931 
932 /** Helper struct returned from @ref __get_string_alignment. */
933 template <class _CharT>
934 struct _LIBCPP_TEMPLATE_VIS __string_alignment {
935   /** Points beyond the last character to write to the output. */
936   const _CharT* __last;
937   /**
938    * The estimated number of columns in the output or 0.
939    *
940    * Only when the output needs to be aligned it's required to know the exact
941    * number of columns in the output. So if the formatted output has only a
942    * minimum width the exact size isn't important. It's only important to know
943    * the minimum has been reached. The minimum width is the width specified in
944    * the format-spec.
945    *
946    * For example in this code @code std::format("{:10}", MyString); @endcode
947    * the width estimation can stop once the algorithm has determined the output
948    * width is 10 columns.
949    *
950    * So if:
951    * * @ref __align == @c true the @ref __size is the estimated number of
952    *   columns required.
953    * * @ref __align == @c false the @ref __size is the estimated number of
954    *   columns required or 0 when the estimation algorithm stopped prematurely.
955    */
956   ptrdiff_t __size;
957   /**
958    * Does the output need to be aligned.
959    *
960    * When alignment is needed the output algorithm needs to add the proper
961    * padding. Else the output algorithm just needs to copy the input up to
962    * @ref __last.
963    */
964   bool __align;
965 };
966 
967 #ifndef _LIBCPP_HAS_NO_UNICODE
968 namespace __detail {
969 
970 /**
971  * Unicode column width estimates.
972  *
973  * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
974  * Depending on format the relation between the number of code units stored and
975  * the number of output columns differs. The first relation is the number of
976  * code units forming a code point. (The text assumes the code units are
977  * unsigned.)
978  * - UTF-8 The number of code units is between one and four. The first 127
979  *   Unicode code points match the ASCII character set. When the highest bit is
980  *   set it means the code point has more than one code unit.
981  * - UTF-16: The number of code units is between 1 and 2. When the first
982  *   code unit is in the range [0xd800,0xdfff) it means the code point uses two
983  *   code units.
984  * - UTF-32: The number of code units is always one.
985  *
986  * The code point to the number of columns isn't well defined. The code uses the
987  * estimations defined in [format.string.std]/11. This list might change in the
988  * future.
989  *
990  * The algorithm of @ref __get_string_alignment uses two different scanners:
991  * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
992  *   1 code unit is 1 column. This scanner stops when it can't be sure the
993  *   assumption is valid:
994  *   - UTF-8 when the code point is encoded in more than 1 code unit.
995  *   - UTF-16 and UTF-32 when the first multi-column code point is encountered.
996  *     (The code unit's value is lower than 0xd800 so the 2 code unit encoding
997  *     is irrelevant for this scanner.)
998  *   Due to these assumptions the scanner is faster than the full scanner. It
999  *   can process all text only containing ASCII. For UTF-16/32 it can process
1000  *   most (all?) European languages. (Note the set it can process might be
1001  *   reduced in the future, due to updates in the scanning rules.)
1002  * - The full scanner @ref __estimate_column_width. This scanner, if needed,
1003  *   converts multiple code units into one code point then converts the code
1004  *   point to a column width.
1005  *
1006  * See also:
1007  * - [format.string.general]/11
1008  * - https://en.wikipedia.org/wiki/UTF-8#Encoding
1009  * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1010  */
1011 
1012 /**
1013  * The first 2 column code point.
1014  *
1015  * This is the point where the fast UTF-16/32 scanner needs to stop processing.
1016  */
1017 inline constexpr uint32_t __two_column_code_point = 0x1100;
1018 
1019 /** Helper concept for an UTF-8 character type. */
1020 template <class _CharT>
1021 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
1022 
1023 /** Helper concept for an UTF-16 character type. */
1024 template <class _CharT>
1025 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
1026 
1027 /** Helper concept for an UTF-32 character type. */
1028 template <class _CharT>
1029 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
1030 
1031 /** Helper concept for an UTF-16 or UTF-32 character type. */
1032 template <class _CharT>
1033 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
1034 
1035 /**
1036  * Converts a code point to the column width.
1037  *
1038  * The estimations are conforming to [format.string.general]/11
1039  *
1040  * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
1041  * character.
1042  */
1043 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
1044   _LIBCPP_ASSERT(__c < 0x10000,
1045                  "Use __column_width_4 or __column_width for larger values");
1046 
1047   // clang-format off
1048   return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
1049              (__c >= 0x2329 && (__c <= 0x232a ||
1050              (__c >= 0x2e80 && (__c <= 0x303e ||
1051              (__c >= 0x3040 && (__c <= 0xa4cf ||
1052              (__c >= 0xac00 && (__c <= 0xd7a3 ||
1053              (__c >= 0xf900 && (__c <= 0xfaff ||
1054              (__c >= 0xfe10 && (__c <= 0xfe19 ||
1055              (__c >= 0xfe30 && (__c <= 0xfe6f ||
1056              (__c >= 0xff00 && (__c <= 0xff60 ||
1057              (__c >= 0xffe0 && (__c <= 0xffe6
1058              ))))))))))))))))))));
1059   // clang-format on
1060 }
1061 
1062 /**
1063  * @overload
1064  *
1065  * This version expects a value greater than or equal to 0x1'0000, which is a
1066  * 4-byte UTF-8 character.
1067  */
1068 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
1069   _LIBCPP_ASSERT(__c >= 0x10000,
1070                  "Use __column_width_3 or __column_width for smaller values");
1071 
1072   // clang-format off
1073   return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
1074              (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
1075              (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
1076              (__c >= 0x3'0000 && (__c <= 0x3'fffd
1077              ))))))));
1078   // clang-format on
1079 }
1080 
1081 /**
1082  * @overload
1083  *
1084  * The general case, accepting all values.
1085  */
1086 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
1087   if (__c < 0x10000)
1088     return __column_width_3(__c);
1089 
1090   return __column_width_4(__c);
1091 }
1092 
1093 /**
1094  * Estimate the column width for the UTF-8 sequence using the fast algorithm.
1095  */
1096 template <__utf8_character _CharT>
1097 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
1098 __estimate_column_width_fast(const _CharT* __first,
1099                              const _CharT* __last) noexcept {
1100   return _VSTD::find_if(__first, __last,
1101                         [](unsigned char __c) { return __c & 0x80; });
1102 }
1103 
1104 /**
1105  * @overload
1106  *
1107  * The implementation for UTF-16/32.
1108  */
1109 template <__utf16_or_32_character _CharT>
1110 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
1111 __estimate_column_width_fast(const _CharT* __first,
1112                              const _CharT* __last) noexcept {
1113   return _VSTD::find_if(__first, __last,
1114                         [](uint32_t __c) { return __c >= 0x1100; });
1115 }
1116 
1117 template <class _CharT>
1118 struct _LIBCPP_TEMPLATE_VIS __column_width_result {
1119   /** The number of output columns. */
1120   size_t __width;
1121   /**
1122    * The last parsed element.
1123    *
1124    * This limits the original output to fit in the wanted number of columns.
1125    */
1126   const _CharT* __ptr;
1127 };
1128 
1129 /**
1130  * Small helper to determine the width of malformed Unicode.
1131  *
1132  * @note This function's only needed for UTF-8. During scanning UTF-8 there
1133  * are multiple place where it can be detected that the Unicode is malformed.
1134  * UTF-16 only requires 1 test and UTF-32 requires no testing.
1135  */
1136 template <__utf8_character _CharT>
1137 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1138 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
1139                                   size_t __maximum, size_t __result) noexcept {
1140   size_t __size = __last - __first;
1141   size_t __n = _VSTD::min(__size, __maximum);
1142   return {__result + __n, __first + __n};
1143 }
1144 
1145 /**
1146  * Determines the number of output columns needed to render the input.
1147  *
1148  * @note When the scanner encounters malformed Unicode it acts as-if every code
1149  * unit at the end of the input is one output column. It's expected the output
1150  * terminal will replace these malformed code units with a one column
1151  * replacement characters.
1152  *
1153  * @param __first   Points to the first element of the input range.
1154  * @param __last    Points beyond the last element of the input range.
1155  * @param __maximum The maximum number of output columns. The returned number
1156  *                  of estimated output columns will not exceed this value.
1157  */
1158 template <__utf8_character _CharT>
1159 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1160 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1161                         size_t __maximum) noexcept {
1162   size_t __result = 0;
1163 
1164   while (__first != __last) {
1165     // Based on the number of leading 1 bits the number of code units in the
1166     // code point can be determined. See
1167     // https://en.wikipedia.org/wiki/UTF-8#Encoding
1168     switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
1169     case 0: // 1-code unit encoding: all 1 column
1170       ++__result;
1171       ++__first;
1172       break;
1173 
1174     case 2: // 2-code unit encoding: all 1 column
1175       // Malformed Unicode.
1176       if (__last - __first < 2) [[unlikely]]
1177         return __estimate_column_width_malformed(__first, __last, __maximum,
1178                                                  __result);
1179       __first += 2;
1180       ++__result;
1181       break;
1182 
1183     case 3: // 3-code unit encoding: either 1 or 2 columns
1184       // Malformed Unicode.
1185       if (__last - __first < 3) [[unlikely]]
1186         return __estimate_column_width_malformed(__first, __last, __maximum,
1187                                                  __result);
1188       {
1189         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
1190         __c <<= 6;
1191         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
1192         __c <<= 6;
1193         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
1194         __result += __column_width_3(__c);
1195         if (__result > __maximum)
1196           return {__result - 2, __first - 3};
1197       }
1198       break;
1199     case 4: // 4-code unit encoding: either 1 or 2 columns
1200       // Malformed Unicode.
1201       if (__last - __first < 4) [[unlikely]]
1202         return __estimate_column_width_malformed(__first, __last, __maximum,
1203                                                  __result);
1204       {
1205         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
1206         __c <<= 6;
1207         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
1208         __c <<= 6;
1209         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
1210         __c <<= 6;
1211         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
1212         __result += __column_width_4(__c);
1213         if (__result > __maximum)
1214           return {__result - 2, __first - 4};
1215       }
1216       break;
1217     default:
1218       // Malformed Unicode.
1219       return __estimate_column_width_malformed(__first, __last, __maximum,
1220                                                __result);
1221     }
1222 
1223     if (__result >= __maximum)
1224       return {__result, __first};
1225   }
1226   return {__result, __first};
1227 }
1228 
1229 template <__utf16_character _CharT>
1230 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1231 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1232                         size_t __maximum) noexcept {
1233   size_t __result = 0;
1234 
1235   while (__first != __last) {
1236     uint32_t __c = *__first;
1237     // Is the code unit part of a surrogate pair? See
1238     // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1239     if (__c >= 0xd800 && __c <= 0xDfff) {
1240       // Malformed Unicode.
1241       if (__last - __first < 2) [[unlikely]]
1242         return {__result + 1, __first + 1};
1243 
1244       __c -= 0xd800;
1245       __c <<= 10;
1246       __c += (*(__first + 1) - 0xdc00);
1247       __c += 0x10000;
1248 
1249       __result += __column_width_4(__c);
1250       if (__result > __maximum)
1251         return {__result - 2, __first};
1252       __first += 2;
1253     } else {
1254       __result += __column_width_3(__c);
1255       if (__result > __maximum)
1256         return {__result - 2, __first};
1257       ++__first;
1258     }
1259 
1260     if (__result >= __maximum)
1261       return {__result, __first};
1262   }
1263 
1264   return {__result, __first};
1265 }
1266 
1267 template <__utf32_character _CharT>
1268 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1269 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1270                         size_t __maximum) noexcept {
1271   size_t __result = 0;
1272 
1273   while (__first != __last) {
1274     uint32_t __c = *__first;
1275     __result += __column_width(__c);
1276 
1277     if (__result > __maximum)
1278       return {__result - 2, __first};
1279 
1280     ++__first;
1281     if (__result >= __maximum)
1282       return {__result, __first};
1283   }
1284 
1285   return {__result, __first};
1286 }
1287 
1288 } // namespace __detail
1289 
1290 template <class _CharT>
1291 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1292 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1293                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1294   _LIBCPP_ASSERT(__width != 0 || __precision != -1,
1295                  "The function has no effect and shouldn't be used");
1296 
1297   // TODO FMT There might be more optimizations possible:
1298   // If __precision == __format::__number_max and the encoding is:
1299   // * UTF-8  : 4 * (__last - __first) >= __width
1300   // * UTF-16 : 2 * (__last - __first) >= __width
1301   // * UTF-32 : (__last - __first) >= __width
1302   // In these cases it's certain the output is at least the requested width.
1303   // It's unknown how often this happens in practice. For now the improvement
1304   // isn't implemented.
1305 
1306   /*
1307    * First assume there are no special Unicode code units in the input.
1308    * - Apply the precision (this may reduce the size of the input). When
1309    *   __precison == -1 this step is omitted.
1310    * - Scan for special code units in the input.
1311    * If our assumption was correct the __pos will be at the end of the input.
1312    */
1313   const ptrdiff_t __length = __last - __first;
1314   const _CharT* __limit =
1315       __first +
1316       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1317   ptrdiff_t __size = __limit - __first;
1318   const _CharT* __pos =
1319       __detail::__estimate_column_width_fast(__first, __limit);
1320 
1321   if (__pos == __limit)
1322     return {__limit, __size, __size < __width};
1323 
1324   /*
1325    * Our assumption was wrong, there are special Unicode code units.
1326    * The range [__first, __pos) contains a set of code units with the
1327    * following property:
1328    *      Every _CharT in the range will be rendered in 1 column.
1329    *
1330    * If there's no maximum width and the parsed size already exceeds the
1331    *   minimum required width. The real size isn't important. So bail out.
1332    */
1333   if (__precision == -1 && (__pos - __first) >= __width)
1334     return {__last, 0, false};
1335 
1336   /* If there's a __precision, truncate the output to that width. */
1337   ptrdiff_t __prefix = __pos - __first;
1338   if (__precision != -1) {
1339     _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
1340     auto __lengh_info = __detail::__estimate_column_width(
1341         __pos, __last, __precision - __prefix);
1342     __size = __lengh_info.__width + __prefix;
1343     return {__lengh_info.__ptr, __size, __size < __width};
1344   }
1345 
1346   /* Else use __width to determine the number of required padding characters. */
1347   _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
1348   /*
1349    * The column width is always one or two columns. For the precision the wanted
1350    * column width is the maximum, for the width it's the minimum. Using the
1351    * width estimation with its truncating behavior will result in the wrong
1352    * result in the following case:
1353    * - The last code unit processed requires two columns and exceeds the
1354    *   maximum column width.
1355    * By increasing the __maximum by one avoids this issue. (It means it may
1356    * pass one code point more than required to determine the proper result;
1357    * that however isn't a problem for the algorithm.)
1358    */
1359   size_t __maximum = 1 + __width - __prefix;
1360   auto __lengh_info =
1361       __detail::__estimate_column_width(__pos, __last, __maximum);
1362   if (__lengh_info.__ptr != __last) {
1363     // Consumed the width number of code units. The exact size of the string
1364     // is unknown. We only know we don't need to align the output.
1365     _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >=
1366                        __width,
1367                    "Logic error");
1368     return {__last, 0, false};
1369   }
1370 
1371   __size = __lengh_info.__width + __prefix;
1372   return {__last, __size, __size < __width};
1373 }
1374 #else  // _LIBCPP_HAS_NO_UNICODE
1375 template <class _CharT>
1376 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1377 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1378                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1379   const ptrdiff_t __length = __last - __first;
1380   const _CharT* __limit =
1381       __first +
1382       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1383   ptrdiff_t __size = __limit - __first;
1384   return {__limit, __size, __size < __width};
1385 }
1386 #endif // _LIBCPP_HAS_NO_UNICODE
1387 
1388 /// These fields are a filter for which elements to parse.
1389 ///
1390 /// They default to false so when a new field is added it needs to be opted in
1391 /// explicitly.
1392 struct __fields {
1393   uint8_t __sign_ : 1 {false};
1394   uint8_t __alternate_form_ : 1 {false};
1395   uint8_t __zero_padding_ : 1 {false};
1396   uint8_t __precision_ : 1 {false};
1397   uint8_t __locale_specific_form_ : 1 {false};
1398   uint8_t __type_ : 1 {false};
1399 };
1400 
1401 // By not placing this constant in the formatter class it's not duplicated for
1402 // char and wchar_t.
1403 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
1404 
1405 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
1406   /// No alignment is set in the format string.
1407   __default,
1408   __left,
1409   __center,
1410   __right,
1411   __zero_padding
1412 };
1413 
1414 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
1415   /// No sign is set in the format string.
1416   ///
1417   /// The sign isn't allowed for certain format-types. By using this value
1418   /// it's possible to detect whether or not the user explicitly set the sign
1419   /// flag. For formatting purposes it behaves the same as \ref __minus.
1420   __default,
1421   __minus,
1422   __plus,
1423   __space
1424 };
1425 
1426 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
1427   __default,
1428   __string,
1429   __binary_lower_case,
1430   __binary_upper_case,
1431   __octal,
1432   __decimal,
1433   __hexadecimal_lower_case,
1434   __hexadecimal_upper_case,
1435   __pointer,
1436   __char,
1437   __hexfloat_lower_case,
1438   __hexfloat_upper_case,
1439   __scientific_lower_case,
1440   __scientific_upper_case,
1441   __fixed_lower_case,
1442   __fixed_upper_case,
1443   __general_lower_case,
1444   __general_upper_case
1445 };
1446 
1447 struct __std {
1448   __alignment __alignment_ : 3;
1449   __sign __sign_ : 2;
1450   bool __alternate_form_ : 1;
1451   bool __locale_specific_form_ : 1;
1452   __type __type_;
1453 };
1454 
1455 struct __chrono {
1456   __alignment __alignment_ : 3;
1457   bool __weekday_name_ : 1;
1458   bool __month_name_ : 1;
1459 };
1460 
1461 /// Contains the parsed formatting specifications.
1462 ///
1463 /// This contains information for both the std-format-spec and the
1464 /// chrono-format-spec. This results in some unused members for both
1465 /// specifications. However these unused members don't increase the size
1466 /// of the structure.
1467 ///
1468 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
1469 /// kept stable.
1470 template <class _CharT>
1471 struct __parsed_specifications {
1472   union {
1473     // The field __alignment_ is the first element in __std_ and __chrono_.
1474     // This allows the code to always inspect this value regards which member
1475     // of the union is the active member [class.union.general]/2.
1476     //
1477     // This is needed since the generic output routines handle the alignment of
1478     // the output.
1479     __alignment __alignment_ : 3;
1480     __std __std_;
1481     __chrono __chrono_;
1482   };
1483 
1484   /// The requested width.
1485   ///
1486   /// When the format-spec used an arg-id for this field it has already been
1487   /// replaced with the value of that arg-id.
1488   int32_t __width_;
1489 
1490   /// The requested precision.
1491   ///
1492   /// When the format-spec used an arg-id for this field it has already been
1493   /// replaced with the value of that arg-id.
1494   int32_t __precision_;
1495 
1496   _CharT __fill_;
1497 
1498   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
1499 
1500   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
1501 };
1502 
1503 // Validate the struct is small and cheap to copy since the struct is passed by
1504 // value in formatting functions.
1505 static_assert(sizeof(__parsed_specifications<char>) == 16);
1506 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
1507 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
1508 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
1509 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
1510 #  endif
1511 
1512 /// The parser for the std-format-spec.
1513 ///
1514 /// Note this class is a member of std::formatter specializations. It's
1515 /// expected developers will create their own formatter specializations that
1516 /// inherit from the std::formatter specializations. This means this class
1517 /// must be ABI stable. To aid the stability the unused bits in the class are
1518 /// set to zero. That way they can be repurposed if a future revision of the
1519 /// Standards adds new fields to std-format-spec.
1520 template <class _CharT>
1521 class _LIBCPP_TEMPLATE_VIS __parser {
1522 public:
1523   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
1524       -> decltype(__parse_ctx.begin()) {
1525 
1526     const _CharT* __begin = __parse_ctx.begin();
1527     const _CharT* __end = __parse_ctx.end();
1528     if (__begin == __end)
1529       return __begin;
1530 
1531     if (__parse_fill_align(__begin, __end) && __begin == __end)
1532       return __begin;
1533 
1534     if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
1535       return __begin;
1536 
1537     if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
1538       return __begin;
1539 
1540     if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
1541       return __begin;
1542 
1543     if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
1544       return __begin;
1545 
1546     if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
1547       return __begin;
1548 
1549     if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
1550       return __begin;
1551 
1552     if (__fields.__type_) {
1553       __parse_type(__begin);
1554 
1555       // When __type_ is false the calling parser is expected to do additional
1556       // parsing. In that case that parser should do the end of format string
1557       // validation.
1558       if (__begin != __end && *__begin != _CharT('}'))
1559         __throw_format_error("The format-spec should consume the input or end with a '}'");
1560     }
1561 
1562     return __begin;
1563   }
1564 
1565   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
1566   _LIBCPP_HIDE_FROM_ABI
1567   __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
1568     return __parsed_specifications<_CharT>{
1569         .__std_ =
1570             __std{.__alignment_            = __alignment_,
1571                   .__sign_                 = __sign_,
1572                   .__alternate_form_       = __alternate_form_,
1573                   .__locale_specific_form_ = __locale_specific_form_,
1574                   .__type_                 = __type_},
1575         .__width_{__get_width(__ctx)},
1576         .__precision_{__get_precision(__ctx)},
1577         .__fill_{__fill_}};
1578   }
1579 
1580   __alignment __alignment_ : 3 {__alignment::__default};
1581   __sign __sign_ : 2 {__sign::__default};
1582   bool __alternate_form_ : 1 {false};
1583   bool __locale_specific_form_ : 1 {false};
1584   bool __reserved_0_ : 1 {false};
1585   __type __type_{__type::__default};
1586 
1587   // These two flags are used for formatting chrono. Since the struct has
1588   // padding space left it's added to this structure.
1589   bool __weekday_name_ : 1 {false};
1590   bool __month_name_ : 1 {false};
1591 
1592   uint8_t __reserved_1_ : 6 {0};
1593   uint8_t __reserved_2_ : 6 {0};
1594   // These two flags are only used internally and not part of the
1595   // __parsed_specifications. Therefore put them at the end.
1596   bool __width_as_arg_ : 1 {false};
1597   bool __precision_as_arg_ : 1 {false};
1598 
1599   /// The requested width, either the value or the arg-id.
1600   int32_t __width_{0};
1601 
1602   /// The requested precision, either the value or the arg-id.
1603   int32_t __precision_{-1};
1604 
1605   // LWG 3576 will probably change this to always accept a Unicode code point
1606   // To avoid changing the size with that change align the field so when it
1607   // becomes 32-bit its alignment will remain the same. That also means the
1608   // size will remain the same. (D2572 addresses the solution for LWG 3576.)
1609   _CharT __fill_{_CharT(' ')};
1610 
1611 private:
1612   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
1613     switch (__c) {
1614     case _CharT('<'):
1615       __alignment_ = __alignment::__left;
1616       return true;
1617 
1618     case _CharT('^'):
1619       __alignment_ = __alignment::__center;
1620       return true;
1621 
1622     case _CharT('>'):
1623       __alignment_ = __alignment::__right;
1624       return true;
1625     }
1626     return false;
1627   }
1628 
1629   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) {
1630     _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
1631                                      "undefined behavior by evaluating data not in the input");
1632     if (__begin + 1 != __end) {
1633       if (__parse_alignment(*(__begin + 1))) {
1634         if (*__begin == _CharT('{') || *__begin == _CharT('}'))
1635           __throw_format_error("The format-spec fill field contains an invalid character");
1636 
1637         __fill_ = *__begin;
1638         __begin += 2;
1639         return true;
1640       }
1641     }
1642 
1643     if (!__parse_alignment(*__begin))
1644       return false;
1645 
1646     ++__begin;
1647     return true;
1648   }
1649 
1650   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) {
1651     switch (*__begin) {
1652     case _CharT('-'):
1653       __sign_ = __sign::__minus;
1654       break;
1655     case _CharT('+'):
1656       __sign_ = __sign::__plus;
1657       break;
1658     case _CharT(' '):
1659       __sign_ = __sign::__space;
1660       break;
1661     default:
1662       return false;
1663     }
1664     ++__begin;
1665     return true;
1666   }
1667 
1668   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) {
1669     if (*__begin != _CharT('#'))
1670       return false;
1671 
1672     __alternate_form_ = true;
1673     ++__begin;
1674     return true;
1675   }
1676 
1677   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) {
1678     if (*__begin != _CharT('0'))
1679       return false;
1680 
1681     if (__alignment_ == __alignment::__default)
1682       __alignment_ = __alignment::__zero_padding;
1683     ++__begin;
1684     return true;
1685   }
1686 
1687   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) {
1688     if (*__begin == _CharT('0'))
1689       __throw_format_error("A format-spec width field shouldn't have a leading zero");
1690 
1691     if (*__begin == _CharT('{')) {
1692       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
1693       __width_as_arg_ = true;
1694       __width_ = __r.__value;
1695       __begin = __r.__ptr;
1696       return true;
1697     }
1698 
1699     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
1700       return false;
1701 
1702     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
1703     __width_ = __r.__value;
1704     _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
1705                                   "due to validations in this function");
1706     __begin = __r.__ptr;
1707     return true;
1708   }
1709 
1710   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end,
1711                                                          auto& __parse_ctx) {
1712     if (*__begin != _CharT('.'))
1713       return false;
1714 
1715     ++__begin;
1716     if (__begin == __end)
1717       __throw_format_error("End of input while parsing format-spec precision");
1718 
1719     if (*__begin == _CharT('{')) {
1720       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
1721       __precision_as_arg_ = true;
1722       __precision_ = __arg_id.__value;
1723       __begin = __arg_id.__ptr;
1724       return true;
1725     }
1726 
1727     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
1728       __throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
1729 
1730     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
1731     __precision_ = __r.__value;
1732     __precision_as_arg_ = false;
1733     __begin = __r.__ptr;
1734     return true;
1735   }
1736 
1737   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) {
1738     if (*__begin != _CharT('L'))
1739       return false;
1740 
1741     __locale_specific_form_ = true;
1742     ++__begin;
1743     return true;
1744   }
1745 
1746   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) {
1747     // Determines the type. It does not validate whether the selected type is
1748     // valid. Most formatters have optional fields that are only allowed for
1749     // certain types. These parsers need to do validation after the type has
1750     // been parsed. So its easier to implement the validation for all types in
1751     // the specific parse function.
1752     switch (*__begin) {
1753     case 'A':
1754       __type_ = __type::__hexfloat_upper_case;
1755       break;
1756     case 'B':
1757       __type_ = __type::__binary_upper_case;
1758       break;
1759     case 'E':
1760       __type_ = __type::__scientific_upper_case;
1761       break;
1762     case 'F':
1763       __type_ = __type::__fixed_upper_case;
1764       break;
1765     case 'G':
1766       __type_ = __type::__general_upper_case;
1767       break;
1768     case 'X':
1769       __type_ = __type::__hexadecimal_upper_case;
1770       break;
1771     case 'a':
1772       __type_ = __type::__hexfloat_lower_case;
1773       break;
1774     case 'b':
1775       __type_ = __type::__binary_lower_case;
1776       break;
1777     case 'c':
1778       __type_ = __type::__char;
1779       break;
1780     case 'd':
1781       __type_ = __type::__decimal;
1782       break;
1783     case 'e':
1784       __type_ = __type::__scientific_lower_case;
1785       break;
1786     case 'f':
1787       __type_ = __type::__fixed_lower_case;
1788       break;
1789     case 'g':
1790       __type_ = __type::__general_lower_case;
1791       break;
1792     case 'o':
1793       __type_ = __type::__octal;
1794       break;
1795     case 'p':
1796       __type_ = __type::__pointer;
1797       break;
1798     case 's':
1799       __type_ = __type::__string;
1800       break;
1801     case 'x':
1802       __type_ = __type::__hexadecimal_lower_case;
1803       break;
1804     default:
1805       return;
1806     }
1807     ++__begin;
1808   }
1809 
1810   _LIBCPP_HIDE_FROM_ABI
1811   int32_t __get_width(auto& __ctx) const {
1812     if (!__width_as_arg_)
1813       return __width_;
1814 
1815     int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_));
1816     if (__result == 0)
1817       __throw_format_error("A format-spec width field replacement should have a positive value");
1818     return __result;
1819   }
1820 
1821   _LIBCPP_HIDE_FROM_ABI
1822   int32_t __get_precision(auto& __ctx) const {
1823     if (!__precision_as_arg_)
1824       return __precision_;
1825 
1826     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
1827   }
1828 };
1829 
1830 // Validates whether the reserved bitfields don't change the size.
1831 static_assert(sizeof(__parser<char>) == 16);
1832 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
1833 static_assert(sizeof(__parser<wchar_t>) == 16);
1834 #  endif
1835 
1836 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
1837   switch (__type) {
1838   case __format_spec::__type::__default:
1839   case __format_spec::__type::__string:
1840     break;
1841 
1842   default:
1843     __throw_format_error("The format-spec type has a type not supported for "
1844                          "a string argument");
1845   }
1846 }
1847 
1848 } // namespace __format_spec
1849 
1850 #endif //_LIBCPP_STD_VER > 17
1851 
1852 _LIBCPP_END_NAMESPACE_STD
1853 
1854 _LIBCPP_POP_MACROS
1855 
1856 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1857