1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18 
19 #include <__algorithm/copy_n.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__concepts/arithmetic.h>
23 #include <__concepts/same_as.h>
24 #include <__config>
25 #include <__format/format_arg.h>
26 #include <__format/format_error.h>
27 #include <__format/format_parse_context.h>
28 #include <__format/format_string.h>
29 #include <__format/unicode.h>
30 #include <__format/width_estimation_table.h>
31 #include <__iterator/concepts.h>
32 #include <__iterator/iterator_traits.h> // iter_value_t
33 #include <__memory/addressof.h>
34 #include <__type_traits/common_type.h>
35 #include <__type_traits/is_constant_evaluated.h>
36 #include <__type_traits/is_trivially_copyable.h>
37 #include <__variant/monostate.h>
38 #include <cstdint>
39 #include <string>
40 #include <string_view>
41 
42 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
43 #  pragma GCC system_header
44 #endif
45 
46 _LIBCPP_PUSH_MACROS
47 #include <__undef_macros>
48 
49 _LIBCPP_BEGIN_NAMESPACE_STD
50 
51 #if _LIBCPP_STD_VER >= 20
52 
53 namespace __format_spec {
54 
55 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void
__throw_invalid_option_format_error(const char * __id,const char * __option)56 __throw_invalid_option_format_error(const char* __id, const char* __option) {
57   std::__throw_format_error(
58       (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
59 }
60 
__throw_invalid_type_format_error(const char * __id)61 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
62   std::__throw_format_error(
63       (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
64 }
65 
66 template <contiguous_iterator _Iterator, class _ParseContext>
67 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
__parse_arg_id(_Iterator __begin,_Iterator __end,_ParseContext & __ctx)68 __parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) {
69   using _CharT = iter_value_t<_Iterator>;
70   // This function is a wrapper to call the real parser. But it does the
71   // validation for the pre-conditions and post-conditions.
72   if (__begin == __end)
73     std::__throw_format_error("End of input while parsing an argument index");
74 
75   __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx);
76 
77   if (__r.__last == __end || *__r.__last != _CharT('}'))
78     std::__throw_format_error("The argument index is invalid");
79 
80   ++__r.__last;
81   return __r;
82 }
83 
84 template <class _Context>
__substitute_arg_id(basic_format_arg<_Context> __format_arg)85 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
86   // [format.string.std]/8
87   //   If the corresponding formatting argument is not of integral type...
88   // This wording allows char and bool too. LWG-3720 changes the wording to
89   //    If the corresponding formatting argument is not of standard signed or
90   //    unsigned integer type,
91   // This means the 128-bit will not be valid anymore.
92   // TODO FMT Verify this resolution is accepted and add a test to verify
93   //          128-bit integrals fail and switch to visit_format_arg.
94   return std::__visit_format_arg(
95       [](auto __arg) -> uint32_t {
96         using _Type = decltype(__arg);
97         if constexpr (same_as<_Type, monostate>)
98           std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
99 
100         // [format.string.std]/8
101         // If { arg-idopt } is used in a width or precision, the value of the
102         // corresponding formatting argument is used in its place. If the
103         // corresponding formatting argument is not of standard signed or unsigned
104         // integer type, or its value is negative for precision or non-positive for
105         // width, an exception of type format_error is thrown.
106         //
107         // When an integral is used in a format function, it is stored as one of
108         // the types checked below. Other integral types are promoted. For example,
109         // a signed char is stored as an int.
110         if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || //
111                       same_as<_Type, long long> || same_as<_Type, unsigned long long>) {
112           if constexpr (signed_integral<_Type>) {
113             if (__arg < 0)
114               std::__throw_format_error("An argument index may not have a negative value");
115           }
116 
117           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
118           if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
119             std::__throw_format_error("The value of the argument index exceeds its maximum value");
120 
121           return __arg;
122         } else
123           std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
124       },
125       __format_arg);
126 }
127 
128 /// These fields are a filter for which elements to parse.
129 ///
130 /// They default to false so when a new field is added it needs to be opted in
131 /// explicitly.
132 // TODO FMT Use an ABI tag for this struct.
133 struct __fields {
134   uint16_t __sign_                 : 1 {false};
135   uint16_t __alternate_form_       : 1 {false};
136   uint16_t __zero_padding_         : 1 {false};
137   uint16_t __precision_            : 1 {false};
138   uint16_t __locale_specific_form_ : 1 {false};
139   uint16_t __type_                 : 1 {false};
140   // Determines the valid values for fill.
141   //
142   // Originally the fill could be any character except { and }. Range-based
143   // formatters use the colon to mark the beginning of the
144   // underlying-format-spec. To avoid parsing ambiguities these formatter
145   // specializations prohibit the use of the colon as a fill character.
146   uint16_t __use_range_fill_ : 1 {false};
147   uint16_t __clear_brackets_ : 1 {false};
148   uint16_t __consume_all_    : 1 {false};
149 };
150 
151 // By not placing this constant in the formatter class it's not duplicated for
152 // char and wchar_t.
153 inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true};
154 inline constexpr __fields __fields_integral{
155     .__sign_                 = true,
156     .__alternate_form_       = true,
157     .__zero_padding_         = true,
158     .__locale_specific_form_ = true,
159     .__type_                 = true,
160     .__consume_all_          = true};
161 inline constexpr __fields __fields_floating_point{
162     .__sign_                 = true,
163     .__alternate_form_       = true,
164     .__zero_padding_         = true,
165     .__precision_            = true,
166     .__locale_specific_form_ = true,
167     .__type_                 = true,
168     .__consume_all_          = true};
169 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true};
170 inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true};
171 
172 #  if _LIBCPP_STD_VER >= 23
173 inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true};
174 inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true};
175 inline constexpr __fields __fields_fill_align_width{};
176 #  endif
177 
178 enum class __alignment : uint8_t {
179   /// No alignment is set in the format string.
180   __default,
181   __left,
182   __center,
183   __right,
184   __zero_padding
185 };
186 
187 enum class __sign : uint8_t {
188   /// No sign is set in the format string.
189   ///
190   /// The sign isn't allowed for certain format-types. By using this value
191   /// it's possible to detect whether or not the user explicitly set the sign
192   /// flag. For formatting purposes it behaves the same as \ref __minus.
193   __default,
194   __minus,
195   __plus,
196   __space
197 };
198 
199 enum class __type : uint8_t {
200   __default = 0,
201   __string,
202   __binary_lower_case,
203   __binary_upper_case,
204   __octal,
205   __decimal,
206   __hexadecimal_lower_case,
207   __hexadecimal_upper_case,
208   __pointer_lower_case,
209   __pointer_upper_case,
210   __char,
211   __hexfloat_lower_case,
212   __hexfloat_upper_case,
213   __scientific_lower_case,
214   __scientific_upper_case,
215   __fixed_lower_case,
216   __fixed_upper_case,
217   __general_lower_case,
218   __general_upper_case,
219   __debug
220 };
221 
__create_type_mask(__type __t)222 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
223   uint32_t __shift = static_cast<uint32_t>(__t);
224   if (__shift == 0)
225     return 1;
226 
227   if (__shift > 31)
228     std::__throw_format_error("The type does not fit in the mask");
229 
230   return 1 << __shift;
231 }
232 
233 inline constexpr uint32_t __type_mask_integer =
234     __create_type_mask(__type::__binary_lower_case) |      //
235     __create_type_mask(__type::__binary_upper_case) |      //
236     __create_type_mask(__type::__decimal) |                //
237     __create_type_mask(__type::__octal) |                  //
238     __create_type_mask(__type::__hexadecimal_lower_case) | //
239     __create_type_mask(__type::__hexadecimal_upper_case);
240 
241 struct __std {
242   __alignment __alignment_     : 3;
243   __sign __sign_               : 2;
244   bool __alternate_form_       : 1;
245   bool __locale_specific_form_ : 1;
246   __type __type_;
247 };
248 
249 struct __chrono {
250   __alignment __alignment_     : 3;
251   bool __locale_specific_form_ : 1;
252   bool __hour_                 : 1;
253   bool __weekday_name_         : 1;
254   bool __weekday_              : 1;
255   bool __day_of_year_          : 1;
256   bool __week_of_year_         : 1;
257   bool __month_name_           : 1;
258 };
259 
260 // The fill UCS scalar value.
261 //
262 // This is always an array, with 1, 2, or 4 elements.
263 // The size of the data structure is always 32-bits.
264 template <class _CharT>
265 struct __code_point;
266 
267 template <>
268 struct __code_point<char> {
269   char __data[4] = {' '};
270 };
271 
272 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
273 template <>
274 struct __code_point<wchar_t> {
275   wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
276 };
277 #  endif
278 
279 /// Contains the parsed formatting specifications.
280 ///
281 /// This contains information for both the std-format-spec and the
282 /// chrono-format-spec. This results in some unused members for both
283 /// specifications. However these unused members don't increase the size
284 /// of the structure.
285 ///
286 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
287 /// kept stable.
288 template <class _CharT>
289 struct __parsed_specifications {
290   union {
291     // The field __alignment_ is the first element in __std_ and __chrono_.
292     // This allows the code to always inspect this value regards which member
293     // of the union is the active member [class.union.general]/2.
294     //
295     // This is needed since the generic output routines handle the alignment of
296     // the output.
297     __alignment __alignment_ : 3;
298     __std __std_;
299     __chrono __chrono_;
300   };
301 
302   /// The requested width.
303   ///
304   /// When the format-spec used an arg-id for this field it has already been
305   /// replaced with the value of that arg-id.
306   int32_t __width_;
307 
308   /// The requested precision.
309   ///
310   /// When the format-spec used an arg-id for this field it has already been
311   /// replaced with the value of that arg-id.
312   int32_t __precision_;
313 
314   __code_point<_CharT> __fill_;
315 
316   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
317 
318   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
319 };
320 
321 // Validate the struct is small and cheap to copy since the struct is passed by
322 // value in formatting functions.
323 static_assert(sizeof(__parsed_specifications<char>) == 16);
324 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
325 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
326 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
327 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
328 #  endif
329 
330 /// The parser for the std-format-spec.
331 ///
332 /// Note this class is a member of std::formatter specializations. It's
333 /// expected developers will create their own formatter specializations that
334 /// inherit from the std::formatter specializations. This means this class
335 /// must be ABI stable. To aid the stability the unused bits in the class are
336 /// set to zero. That way they can be repurposed if a future revision of the
337 /// Standards adds new fields to std-format-spec.
338 template <class _CharT>
339 class _LIBCPP_TEMPLATE_VIS __parser {
340 public:
341   // Parses the format specification.
342   //
343   // Depending on whether the parsing is done compile-time or run-time
344   // the method slightly differs.
345   // - Only parses a field when it is in the __fields. Accepting all
346   //   fields and then validating the valid ones has a performance impact.
347   //   This is faster but gives slighly worse error messages.
348   // - At compile-time when a field is not accepted the parser will still
349   //   parse it and give an error when it's present. This gives a more
350   //   accurate error.
351   // The idea is that most times the format instead of the vformat
352   // functions are used. In that case the error will be detected during
353   // compilation and there is no need to pay for the run-time overhead.
354   template <class _ParseContext>
355   _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) {
356     auto __begin = __ctx.begin();
357     auto __end   = __ctx.end();
358     if (__begin == __end)
359       return __begin;
360 
361     if (__parse_fill_align(__begin, __end, __fields.__use_range_fill_) && __begin == __end)
362       return __begin;
363 
364     if (__fields.__sign_) {
365       if (__parse_sign(__begin) && __begin == __end)
366         return __begin;
367     } else if (std::is_constant_evaluated() && __parse_sign(__begin)) {
368       std::__throw_format_error("The format specification does not allow the sign option");
369     }
370 
371     if (__fields.__alternate_form_) {
372       if (__parse_alternate_form(__begin) && __begin == __end)
373         return __begin;
374     } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) {
375       std::__throw_format_error("The format specifier does not allow the alternate form option");
376     }
377 
378     if (__fields.__zero_padding_) {
379       if (__parse_zero_padding(__begin) && __begin == __end)
380         return __begin;
381     } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) {
382       std::__throw_format_error("The format specifier does not allow the zero-padding option");
383     }
384 
385     if (__parse_width(__begin, __end, __ctx) && __begin == __end)
386       return __begin;
387 
388     if (__fields.__precision_) {
389       if (__parse_precision(__begin, __end, __ctx) && __begin == __end)
390         return __begin;
391     } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) {
392       std::__throw_format_error("The format specifier does not allow the precision option");
393     }
394 
395     if (__fields.__locale_specific_form_) {
396       if (__parse_locale_specific_form(__begin) && __begin == __end)
397         return __begin;
398     } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) {
399       std::__throw_format_error("The format specifier does not allow the locale-specific form option");
400     }
401 
402     if (__fields.__clear_brackets_) {
403       if (__parse_clear_brackets(__begin) && __begin == __end)
404         return __begin;
405     } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) {
406       std::__throw_format_error("The format specifier does not allow the n option");
407     }
408 
409     if (__fields.__type_)
410       __parse_type(__begin);
411 
412     if (!__fields.__consume_all_)
413       return __begin;
414 
415     if (__begin != __end && *__begin != _CharT('}'))
416       std::__throw_format_error("The format specifier should consume the input or end with a '}'");
417 
418     return __begin;
419   }
420 
421   // Validates the selected the parsed data.
422   //
423   // The valid fields in the parser may depend on the display type
424   // selected. But the type is the last optional field, so by the time
425   // it's known an option can't be used, it already has been parsed.
426   // This does the validation again.
427   //
428   // For example an integral may have a sign, zero-padding, or alternate
429   // form when the type option is not 'c'. So the generic approach is:
430   //
431   // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
432   // if (__parser.__type_ == __format_spec::__type::__char) {
433   //   __parser.__validate((__format_spec::__fields_bool, "an integer");
434   //   ... // more char adjustments
435   // } else {
436   //   ... // validate an integral type.
437   // }
438   //
439   // For some types all valid options need a second validation run, like
440   // boolean types.
441   //
442   // Depending on whether the validation is done at compile-time or
443   // run-time the error differs
444   // - run-time the exception is thrown and contains the type of field
445   //   being validated.
446   // - at compile-time the line with `std::__throw_format_error` is shown
447   //   in the output. In that case it's important for the error to be on one
448   //   line.
449   // Note future versions of C++ may allow better compile-time error
450   // reporting.
451   _LIBCPP_HIDE_FROM_ABI constexpr void
452   __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const {
453     if (!__fields.__sign_ && __sign_ != __sign::__default) {
454       if (std::is_constant_evaluated())
455         std::__throw_format_error("The format specifier does not allow the sign option");
456       else
457         __format_spec::__throw_invalid_option_format_error(__id, "sign");
458     }
459 
460     if (!__fields.__alternate_form_ && __alternate_form_) {
461       if (std::is_constant_evaluated())
462         std::__throw_format_error("The format specifier does not allow the alternate form option");
463       else
464         __format_spec::__throw_invalid_option_format_error(__id, "alternate form");
465     }
466 
467     if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) {
468       if (std::is_constant_evaluated())
469         std::__throw_format_error("The format specifier does not allow the zero-padding option");
470       else
471         __format_spec::__throw_invalid_option_format_error(__id, "zero-padding");
472     }
473 
474     if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id.
475       if (std::is_constant_evaluated())
476         std::__throw_format_error("The format specifier does not allow the precision option");
477       else
478         __format_spec::__throw_invalid_option_format_error(__id, "precision");
479     }
480 
481     if (!__fields.__locale_specific_form_ && __locale_specific_form_) {
482       if (std::is_constant_evaluated())
483         std::__throw_format_error("The format specifier does not allow the locale-specific form option");
484       else
485         __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
486     }
487 
488     if ((__create_type_mask(__type_) & __type_mask) == 0) {
489       if (std::is_constant_evaluated())
490         std::__throw_format_error("The format specifier uses an invalid value for the type option");
491       else
492         __format_spec::__throw_invalid_type_format_error(__id);
493     }
494   }
495 
496   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
497   _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
498     return __parsed_specifications<_CharT>{
499         .__std_ = __std{.__alignment_            = __alignment_,
500                         .__sign_                 = __sign_,
501                         .__alternate_form_       = __alternate_form_,
502                         .__locale_specific_form_ = __locale_specific_form_,
503                         .__type_                 = __type_},
504         .__width_{__get_width(__ctx)},
505         .__precision_{__get_precision(__ctx)},
506         .__fill_{__fill_}};
507   }
508 
509   _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
510     return __parsed_specifications<_CharT>{
511         .__chrono_ =
512             __chrono{.__alignment_            = __alignment_,
513                      .__locale_specific_form_ = __locale_specific_form_,
514                      .__hour_                 = __hour_,
515                      .__weekday_name_         = __weekday_name_,
516                      .__weekday_              = __weekday_,
517                      .__day_of_year_          = __day_of_year_,
518                      .__week_of_year_         = __week_of_year_,
519                      .__month_name_           = __month_name_},
520         .__width_{__get_width(__ctx)},
521         .__precision_{__get_precision(__ctx)},
522         .__fill_{__fill_}};
523   }
524 
525   __alignment __alignment_     : 3 {__alignment::__default};
526   __sign __sign_               : 2 {__sign::__default};
527   bool __alternate_form_       : 1 {false};
528   bool __locale_specific_form_ : 1 {false};
529   bool __clear_brackets_       : 1 {false};
530   __type __type_{__type::__default};
531 
532   // These flags are only used for formatting chrono. Since the struct has
533   // padding space left it's added to this structure.
534   bool __hour_ : 1 {false};
535 
536   bool __weekday_name_ : 1 {false};
537   bool __weekday_      : 1 {false};
538 
539   bool __day_of_year_  : 1 {false};
540   bool __week_of_year_ : 1 {false};
541 
542   bool __month_name_ : 1 {false};
543 
544   uint8_t __reserved_0_ : 2 {0};
545   uint8_t __reserved_1_ : 6 {0};
546   // These two flags are only used internally and not part of the
547   // __parsed_specifications. Therefore put them at the end.
548   bool __width_as_arg_     : 1 {false};
549   bool __precision_as_arg_ : 1 {false};
550 
551   /// The requested width, either the value or the arg-id.
552   int32_t __width_{0};
553 
554   /// The requested precision, either the value or the arg-id.
555   int32_t __precision_{-1};
556 
557   __code_point<_CharT> __fill_{};
558 
559 private:
560   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
561     switch (__c) {
562     case _CharT('<'):
563       __alignment_ = __alignment::__left;
564       return true;
565 
566     case _CharT('^'):
567       __alignment_ = __alignment::__center;
568       return true;
569 
570     case _CharT('>'):
571       __alignment_ = __alignment::__right;
572       return true;
573     }
574     return false;
575   }
576 
577   _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) {
578     // The forbidden fill characters all code points formed from a single code unit, thus the
579     // check can be omitted when more code units are used.
580     if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':')))
581       std::__throw_format_error("The fill option contains an invalid value");
582     else if (__fill == _CharT('{') || __fill == _CharT('}'))
583       std::__throw_format_error("The fill option contains an invalid value");
584   }
585 
586 #  ifndef _LIBCPP_HAS_NO_UNICODE
587   // range-fill and tuple-fill are identical
588   template <contiguous_iterator _Iterator>
589     requires same_as<_CharT, char>
590 #    ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
591           || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
592 #    endif
593   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
594     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
595         __begin != __end,
596         "when called with an empty input the function will cause "
597         "undefined behavior by evaluating data not in the input");
598     __unicode::__code_point_view<_CharT> __view{__begin, __end};
599     __unicode::__consume_result __consumed = __view.__consume();
600     if (__consumed.__status != __unicode::__consume_result::__ok)
601       std::__throw_format_error("The format specifier contains malformed Unicode characters");
602 
603     if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
604       ptrdiff_t __code_units = __view.__position() - __begin;
605       if (__code_units == 1)
606         // The forbidden fill characters all are code points encoded
607         // in one code unit, thus the check can be omitted when more
608         // code units are used.
609         __validate_fill_character(*__begin, __use_range_fill);
610 
611       std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
612       __begin += __code_units + 1;
613       return true;
614     }
615 
616     if (!__parse_alignment(*__begin))
617       return false;
618 
619     ++__begin;
620     return true;
621   }
622 
623 #    ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
624   template <contiguous_iterator _Iterator>
625     requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
626   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
627     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
628         __begin != __end,
629         "when called with an empty input the function will cause "
630         "undefined behavior by evaluating data not in the input");
631     if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
632       if (!__unicode::__is_scalar_value(*__begin))
633         std::__throw_format_error("The fill option contains an invalid value");
634 
635       __validate_fill_character(*__begin, __use_range_fill);
636 
637       __fill_.__data[0] = *__begin;
638       __begin += 2;
639       return true;
640     }
641 
642     if (!__parse_alignment(*__begin))
643       return false;
644 
645     ++__begin;
646     return true;
647   }
648 
649 #    endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
650 
651 #  else // _LIBCPP_HAS_NO_UNICODE
652   // range-fill and tuple-fill are identical
653   template <contiguous_iterator _Iterator>
654   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
655     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
656         __begin != __end,
657         "when called with an empty input the function will cause "
658         "undefined behavior by evaluating data not in the input");
659     if (__begin + 1 != __end) {
660       if (__parse_alignment(*(__begin + 1))) {
661         __validate_fill_character(*__begin, __use_range_fill);
662 
663         __fill_.__data[0] = *__begin;
664         __begin += 2;
665         return true;
666       }
667     }
668 
669     if (!__parse_alignment(*__begin))
670       return false;
671 
672     ++__begin;
673     return true;
674   }
675 
676 #  endif // _LIBCPP_HAS_NO_UNICODE
677 
678   template <contiguous_iterator _Iterator>
679   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
680     switch (*__begin) {
681     case _CharT('-'):
682       __sign_ = __sign::__minus;
683       break;
684     case _CharT('+'):
685       __sign_ = __sign::__plus;
686       break;
687     case _CharT(' '):
688       __sign_ = __sign::__space;
689       break;
690     default:
691       return false;
692     }
693     ++__begin;
694     return true;
695   }
696 
697   template <contiguous_iterator _Iterator>
698   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
699     if (*__begin != _CharT('#'))
700       return false;
701 
702     __alternate_form_ = true;
703     ++__begin;
704     return true;
705   }
706 
707   template <contiguous_iterator _Iterator>
708   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
709     if (*__begin != _CharT('0'))
710       return false;
711 
712     if (__alignment_ == __alignment::__default)
713       __alignment_ = __alignment::__zero_padding;
714     ++__begin;
715     return true;
716   }
717 
718   template <contiguous_iterator _Iterator>
719   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) {
720     if (*__begin == _CharT('0'))
721       std::__throw_format_error("The width option should not have a leading zero");
722 
723     if (*__begin == _CharT('{')) {
724       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
725       __width_as_arg_                     = true;
726       __width_                            = __r.__value;
727       __begin                             = __r.__last;
728       return true;
729     }
730 
731     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
732       return false;
733 
734     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
735     __width_                            = __r.__value;
736     _LIBCPP_ASSERT_INTERNAL(__width_ != 0,
737                             "A zero value isn't allowed and should be impossible, "
738                             "due to validations in this function");
739     __begin = __r.__last;
740     return true;
741   }
742 
743   template <contiguous_iterator _Iterator>
744   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) {
745     if (*__begin != _CharT('.'))
746       return false;
747 
748     ++__begin;
749     if (__begin == __end)
750       std::__throw_format_error("End of input while parsing format specifier precision");
751 
752     if (*__begin == _CharT('{')) {
753       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
754       __precision_as_arg_                      = true;
755       __precision_                             = __arg_id.__value;
756       __begin                                  = __arg_id.__last;
757       return true;
758     }
759 
760     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
761       std::__throw_format_error("The precision option does not contain a value or an argument index");
762 
763     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
764     __precision_                        = __r.__value;
765     __precision_as_arg_                 = false;
766     __begin                             = __r.__last;
767     return true;
768   }
769 
770   template <contiguous_iterator _Iterator>
771   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
772     if (*__begin != _CharT('L'))
773       return false;
774 
775     __locale_specific_form_ = true;
776     ++__begin;
777     return true;
778   }
779 
780   template <contiguous_iterator _Iterator>
781   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) {
782     if (*__begin != _CharT('n'))
783       return false;
784 
785     __clear_brackets_ = true;
786     ++__begin;
787     return true;
788   }
789 
790   template <contiguous_iterator _Iterator>
791   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
792     // Determines the type. It does not validate whether the selected type is
793     // valid. Most formatters have optional fields that are only allowed for
794     // certain types. These parsers need to do validation after the type has
795     // been parsed. So its easier to implement the validation for all types in
796     // the specific parse function.
797     switch (*__begin) {
798     case 'A':
799       __type_ = __type::__hexfloat_upper_case;
800       break;
801     case 'B':
802       __type_ = __type::__binary_upper_case;
803       break;
804     case 'E':
805       __type_ = __type::__scientific_upper_case;
806       break;
807     case 'F':
808       __type_ = __type::__fixed_upper_case;
809       break;
810     case 'G':
811       __type_ = __type::__general_upper_case;
812       break;
813     case 'X':
814       __type_ = __type::__hexadecimal_upper_case;
815       break;
816     case 'a':
817       __type_ = __type::__hexfloat_lower_case;
818       break;
819     case 'b':
820       __type_ = __type::__binary_lower_case;
821       break;
822     case 'c':
823       __type_ = __type::__char;
824       break;
825     case 'd':
826       __type_ = __type::__decimal;
827       break;
828     case 'e':
829       __type_ = __type::__scientific_lower_case;
830       break;
831     case 'f':
832       __type_ = __type::__fixed_lower_case;
833       break;
834     case 'g':
835       __type_ = __type::__general_lower_case;
836       break;
837     case 'o':
838       __type_ = __type::__octal;
839       break;
840     case 'p':
841       __type_ = __type::__pointer_lower_case;
842       break;
843     case 'P':
844       __type_ = __type::__pointer_upper_case;
845       break;
846     case 's':
847       __type_ = __type::__string;
848       break;
849     case 'x':
850       __type_ = __type::__hexadecimal_lower_case;
851       break;
852 #  if _LIBCPP_STD_VER >= 23
853     case '?':
854       __type_ = __type::__debug;
855       break;
856 #  endif
857     default:
858       return;
859     }
860     ++__begin;
861   }
862 
863   _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const {
864     if (!__width_as_arg_)
865       return __width_;
866 
867     return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
868   }
869 
870   _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const {
871     if (!__precision_as_arg_)
872       return __precision_;
873 
874     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
875   }
876 };
877 
878 // Validates whether the reserved bitfields don't change the size.
879 static_assert(sizeof(__parser<char>) == 16);
880 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
881 static_assert(sizeof(__parser<wchar_t>) == 16);
882 #  endif
883 
884 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
885   switch (__type) {
886   case __format_spec::__type::__default:
887   case __format_spec::__type::__string:
888   case __format_spec::__type::__debug:
889     break;
890 
891   default:
892     std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
893   }
894 }
895 
896 template <class _CharT>
897 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) {
898   __parser.__validate(__format_spec::__fields_bool, __id);
899   if (__parser.__alignment_ == __alignment::__default)
900     __parser.__alignment_ = __alignment::__left;
901 }
902 
903 template <class _CharT>
904 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) {
905   __format_spec::__process_display_type_bool_string(__parser, __id);
906 }
907 
908 template <class _CharT>
909 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) {
910   switch (__parser.__type_) {
911   case __format_spec::__type::__default:
912   case __format_spec::__type::__string:
913     __format_spec::__process_display_type_bool_string(__parser, __id);
914     break;
915 
916   case __format_spec::__type::__binary_lower_case:
917   case __format_spec::__type::__binary_upper_case:
918   case __format_spec::__type::__octal:
919   case __format_spec::__type::__decimal:
920   case __format_spec::__type::__hexadecimal_lower_case:
921   case __format_spec::__type::__hexadecimal_upper_case:
922     break;
923 
924   default:
925     __format_spec::__throw_invalid_type_format_error(__id);
926   }
927 }
928 
929 template <class _CharT>
930 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) {
931   switch (__parser.__type_) {
932   case __format_spec::__type::__default:
933   case __format_spec::__type::__char:
934   case __format_spec::__type::__debug:
935     __format_spec::__process_display_type_char(__parser, __id);
936     break;
937 
938   case __format_spec::__type::__binary_lower_case:
939   case __format_spec::__type::__binary_upper_case:
940   case __format_spec::__type::__octal:
941   case __format_spec::__type::__decimal:
942   case __format_spec::__type::__hexadecimal_lower_case:
943   case __format_spec::__type::__hexadecimal_upper_case:
944     break;
945 
946   default:
947     __format_spec::__throw_invalid_type_format_error(__id);
948   }
949 }
950 
951 template <class _CharT>
952 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) {
953   switch (__parser.__type_) {
954   case __format_spec::__type::__default:
955   case __format_spec::__type::__binary_lower_case:
956   case __format_spec::__type::__binary_upper_case:
957   case __format_spec::__type::__octal:
958   case __format_spec::__type::__decimal:
959   case __format_spec::__type::__hexadecimal_lower_case:
960   case __format_spec::__type::__hexadecimal_upper_case:
961     break;
962 
963   case __format_spec::__type::__char:
964     __format_spec::__process_display_type_char(__parser, __id);
965     break;
966 
967   default:
968     __format_spec::__throw_invalid_type_format_error(__id);
969   }
970 }
971 
972 template <class _CharT>
973 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) {
974   switch (__parser.__type_) {
975   case __format_spec::__type::__default:
976   case __format_spec::__type::__hexfloat_lower_case:
977   case __format_spec::__type::__hexfloat_upper_case:
978     // Precision specific behavior will be handled later.
979     break;
980   case __format_spec::__type::__scientific_lower_case:
981   case __format_spec::__type::__scientific_upper_case:
982   case __format_spec::__type::__fixed_lower_case:
983   case __format_spec::__type::__fixed_upper_case:
984   case __format_spec::__type::__general_lower_case:
985   case __format_spec::__type::__general_upper_case:
986     if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
987       // Set the default precision for the call to to_chars.
988       __parser.__precision_ = 6;
989     break;
990 
991   default:
992     __format_spec::__throw_invalid_type_format_error(__id);
993   }
994 }
995 
996 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) {
997   switch (__type) {
998   case __format_spec::__type::__default:
999   case __format_spec::__type::__pointer_lower_case:
1000   case __format_spec::__type::__pointer_upper_case:
1001     break;
1002 
1003   default:
1004     __format_spec::__throw_invalid_type_format_error(__id);
1005   }
1006 }
1007 
1008 template <contiguous_iterator _Iterator>
1009 struct __column_width_result {
1010   /// The number of output columns.
1011   size_t __width_;
1012   /// One beyond the last code unit used in the estimation.
1013   ///
1014   /// This limits the original output to fit in the wanted number of columns.
1015   _Iterator __last_;
1016 };
1017 
1018 template <contiguous_iterator _Iterator>
1019 __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
1020 
1021 /// Since a column width can be two it's possible that the requested column
1022 /// width can't be achieved. Depending on the intended usage the policy can be
1023 /// selected.
1024 /// - When used as precision the maximum width may not be exceeded and the
1025 ///   result should be "rounded down" to the previous boundary.
1026 /// - When used as a width we're done once the minimum is reached, but
1027 ///   exceeding is not an issue. Rounding down is an issue since that will
1028 ///   result in writing fill characters. Therefore the result needs to be
1029 ///   "rounded up".
1030 enum class __column_width_rounding { __down, __up };
1031 
1032 #  ifndef _LIBCPP_HAS_NO_UNICODE
1033 
1034 namespace __detail {
1035 template <contiguous_iterator _Iterator>
1036 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
1037     _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
1038   using _CharT = iter_value_t<_Iterator>;
1039   __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
1040 
1041   __column_width_result<_Iterator> __result{0, __first};
1042   while (__result.__last_ != __last && __result.__width_ <= __maximum) {
1043     typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
1044     int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_);
1045 
1046     // When the next entry would exceed the maximum width the previous width
1047     // might be returned. For example when a width of 100 is requested the
1048     // returned width might be 99, since the next code point has an estimated
1049     // column width of 2. This depends on the rounding flag.
1050     // When the maximum is exceeded the loop will abort the next iteration.
1051     if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
1052       return __result;
1053 
1054     __result.__width_ += __width;
1055     __result.__last_ = __cluster.__last_;
1056   }
1057 
1058   return __result;
1059 }
1060 
1061 } // namespace __detail
1062 
1063 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1064 // Depending on format the relation between the number of code units stored and
1065 // the number of output columns differs. The first relation is the number of
1066 // code units forming a code point. (The text assumes the code units are
1067 // unsigned.)
1068 // - UTF-8 The number of code units is between one and four. The first 127
1069 //   Unicode code points match the ASCII character set. When the highest bit is
1070 //   set it means the code point has more than one code unit.
1071 // - UTF-16: The number of code units is between 1 and 2. When the first
1072 //   code unit is in the range [0xd800,0xdfff) it means the code point uses two
1073 //   code units.
1074 // - UTF-32: The number of code units is always one.
1075 //
1076 // The code point to the number of columns is specified in
1077 // [format.string.std]/11. This list might change in the future.
1078 //
1079 // Another thing to be taken into account is Grapheme clustering. This means
1080 // that in some cases multiple code points are combined one element in the
1081 // output. For example:
1082 // - an ASCII character with a combined diacritical mark
1083 // - an emoji with a skin tone modifier
1084 // - a group of combined people emoji to create a family
1085 // - a combination of flag emoji
1086 //
1087 // See also:
1088 // - [format.string.general]/11
1089 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
1090 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1091 
1092 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
1093 
1094 /// Determines the number of output columns needed to render the input.
1095 ///
1096 /// \note When the scanner encounters malformed Unicode it acts as-if every
1097 /// code unit is a one column code point. Typically a terminal uses the same
1098 /// strategy and replaces every malformed code unit with a one column
1099 /// replacement character.
1100 ///
1101 /// \param __first    Points to the first element of the input range.
1102 /// \param __last     Points beyond the last element of the input range.
1103 /// \param __maximum  The maximum number of output columns. The returned number
1104 ///                   of estimated output columns will not exceed this value.
1105 /// \param __rounding Selects the rounding method.
1106 ///                   \c __down result.__width_ <= __maximum
1107 ///                   \c __up result.__width_ <= __maximum + 1
1108 template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
1109 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
1110     basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
1111   // The width estimation is done in two steps:
1112   // - Quickly process for the ASCII part. ASCII has the following properties
1113   //   - One code unit is one code point
1114   //   - Every code point has an estimated width of one
1115   // - When needed it will a Unicode Grapheme clustering algorithm to find
1116   //   the proper place for truncation.
1117 
1118   if (__str.empty() || __maximum == 0)
1119     return {0, __str.begin()};
1120 
1121   // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1122   // character they might be part of an extended grapheme cluster. For example:
1123   //   an ASCII letter and a COMBINING ACUTE ACCENT
1124   // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1125   // need to scan one code unit beyond the requested precision. When this code
1126   // unit is non-ASCII we omit the current code unit and let the Grapheme
1127   // clustering algorithm do its work.
1128   auto __it = __str.begin();
1129   if (__format_spec::__is_ascii(*__it)) {
1130     do {
1131       --__maximum;
1132       ++__it;
1133       if (__it == __str.end())
1134         return {__str.size(), __str.end()};
1135 
1136       if (__maximum == 0) {
1137         if (__format_spec::__is_ascii(*__it))
1138           return {static_cast<size_t>(__it - __str.begin()), __it};
1139 
1140         break;
1141       }
1142     } while (__format_spec::__is_ascii(*__it));
1143     --__it;
1144     ++__maximum;
1145   }
1146 
1147   ptrdiff_t __ascii_size = __it - __str.begin();
1148   __column_width_result __result =
1149       __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
1150 
1151   __result.__width_ += __ascii_size;
1152   return __result;
1153 }
1154 #  else // !defined(_LIBCPP_HAS_NO_UNICODE)
1155 template <class _CharT>
1156 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
1157 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
1158   // When Unicode isn't supported assume ASCII and every code unit is one code
1159   // point. In ASCII the estimated column width is always one. Thus there's no
1160   // need for rounding.
1161   size_t __width_ = std::min(__str.size(), __maximum);
1162   return {__width_, __str.begin() + __width_};
1163 }
1164 
1165 #  endif // !defined(_LIBCPP_HAS_NO_UNICODE)
1166 
1167 } // namespace __format_spec
1168 
1169 #endif //_LIBCPP_STD_VER >= 20
1170 
1171 _LIBCPP_END_NAMESPACE_STD
1172 
1173 _LIBCPP_POP_MACROS
1174 
1175 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1176