1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18 
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__config>
23 #include <__debug>
24 #include <__format/format_arg.h>
25 #include <__format/format_error.h>
26 #include <__format/format_parse_context.h>
27 #include <__format/format_string.h>
28 #include <__variant/monostate.h>
29 #include <bit>
30 #include <concepts>
31 #include <cstdint>
32 #include <type_traits>
33 
34 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
35 #  pragma GCC system_header
36 #endif
37 
38 _LIBCPP_PUSH_MACROS
39 #include <__undef_macros>
40 
41 _LIBCPP_BEGIN_NAMESPACE_STD
42 
43 #if _LIBCPP_STD_VER > 17
44 
45 namespace __format_spec {
46 
47 template <class _CharT>
48 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
49 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
50   // This function is a wrapper to call the real parser. But it does the
51   // validation for the pre-conditions and post-conditions.
52   if (__begin == __end)
53     __throw_format_error("End of input while parsing format-spec arg-id");
54 
55   __format::__parse_number_result __r =
56       __format::__parse_arg_id(__begin, __end, __parse_ctx);
57 
58   if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
59     __throw_format_error("Invalid arg-id");
60 
61   ++__r.__ptr;
62   return __r;
63 }
64 
65 template <class _Context>
66 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
67 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
68   return visit_format_arg(
69       [](auto __arg) -> uint32_t {
70         using _Type = decltype(__arg);
71         if constexpr (integral<_Type>) {
72           if constexpr (signed_integral<_Type>) {
73             if (__arg < 0)
74               __throw_format_error("A format-spec arg-id replacement shouldn't "
75                                    "have a negative value");
76           }
77 
78           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
79           if (static_cast<_CT>(__arg) >
80               static_cast<_CT>(__format::__number_max))
81             __throw_format_error("A format-spec arg-id replacement exceeds "
82                                  "the maximum supported value");
83 
84           return __arg;
85         } else if constexpr (same_as<_Type, monostate>)
86           __throw_format_error("Argument index out of bounds");
87         else
88           __throw_format_error("A format-spec arg-id replacement argument "
89                                "isn't an integral type");
90       },
91       __format_arg);
92 }
93 
94 /** Helper struct returned from @ref __get_string_alignment. */
95 template <class _CharT>
96 struct _LIBCPP_TEMPLATE_VIS __string_alignment {
97   /** Points beyond the last character to write to the output. */
98   const _CharT* __last;
99   /**
100    * The estimated number of columns in the output or 0.
101    *
102    * Only when the output needs to be aligned it's required to know the exact
103    * number of columns in the output. So if the formatted output has only a
104    * minimum width the exact size isn't important. It's only important to know
105    * the minimum has been reached. The minimum width is the width specified in
106    * the format-spec.
107    *
108    * For example in this code @code std::format("{:10}", MyString); @endcode
109    * the width estimation can stop once the algorithm has determined the output
110    * width is 10 columns.
111    *
112    * So if:
113    * * @ref __align == @c true the @ref __size is the estimated number of
114    *   columns required.
115    * * @ref __align == @c false the @ref __size is the estimated number of
116    *   columns required or 0 when the estimation algorithm stopped prematurely.
117    */
118   ptrdiff_t __size;
119   /**
120    * Does the output need to be aligned.
121    *
122    * When alignment is needed the output algorithm needs to add the proper
123    * padding. Else the output algorithm just needs to copy the input up to
124    * @ref __last.
125    */
126   bool __align;
127 };
128 
129 #ifndef _LIBCPP_HAS_NO_UNICODE
130 namespace __detail {
131 
132 /**
133  * Unicode column width estimates.
134  *
135  * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
136  * Depending on format the relation between the number of code units stored and
137  * the number of output columns differs. The first relation is the number of
138  * code units forming a code point. (The text assumes the code units are
139  * unsigned.)
140  * - UTF-8 The number of code units is between one and four. The first 127
141  *   Unicode code points match the ASCII character set. When the highest bit is
142  *   set it means the code point has more than one code unit.
143  * - UTF-16: The number of code units is between 1 and 2. When the first
144  *   code unit is in the range [0xd800,0xdfff) it means the code point uses two
145  *   code units.
146  * - UTF-32: The number of code units is always one.
147  *
148  * The code point to the number of columns isn't well defined. The code uses the
149  * estimations defined in [format.string.std]/11. This list might change in the
150  * future.
151  *
152  * The algorithm of @ref __get_string_alignment uses two different scanners:
153  * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
154  *   1 code unit is 1 column. This scanner stops when it can't be sure the
155  *   assumption is valid:
156  *   - UTF-8 when the code point is encoded in more than 1 code unit.
157  *   - UTF-16 and UTF-32 when the first multi-column code point is encountered.
158  *     (The code unit's value is lower than 0xd800 so the 2 code unit encoding
159  *     is irrelevant for this scanner.)
160  *   Due to these assumptions the scanner is faster than the full scanner. It
161  *   can process all text only containing ASCII. For UTF-16/32 it can process
162  *   most (all?) European languages. (Note the set it can process might be
163  *   reduced in the future, due to updates in the scanning rules.)
164  * - The full scanner @ref __estimate_column_width. This scanner, if needed,
165  *   converts multiple code units into one code point then converts the code
166  *   point to a column width.
167  *
168  * See also:
169  * - [format.string.general]/11
170  * - https://en.wikipedia.org/wiki/UTF-8#Encoding
171  * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
172  */
173 
174 /**
175  * The first 2 column code point.
176  *
177  * This is the point where the fast UTF-16/32 scanner needs to stop processing.
178  */
179 inline constexpr uint32_t __two_column_code_point = 0x1100;
180 
181 /** Helper concept for an UTF-8 character type. */
182 template <class _CharT>
183 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
184 
185 /** Helper concept for an UTF-16 character type. */
186 template <class _CharT>
187 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
188 
189 /** Helper concept for an UTF-32 character type. */
190 template <class _CharT>
191 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
192 
193 /** Helper concept for an UTF-16 or UTF-32 character type. */
194 template <class _CharT>
195 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
196 
197 /**
198  * Converts a code point to the column width.
199  *
200  * The estimations are conforming to [format.string.general]/11
201  *
202  * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
203  * character.
204  */
205 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
206   _LIBCPP_ASSERT(__c < 0x10000,
207                  "Use __column_width_4 or __column_width for larger values");
208 
209   // clang-format off
210   return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
211              (__c >= 0x2329 && (__c <= 0x232a ||
212              (__c >= 0x2e80 && (__c <= 0x303e ||
213              (__c >= 0x3040 && (__c <= 0xa4cf ||
214              (__c >= 0xac00 && (__c <= 0xd7a3 ||
215              (__c >= 0xf900 && (__c <= 0xfaff ||
216              (__c >= 0xfe10 && (__c <= 0xfe19 ||
217              (__c >= 0xfe30 && (__c <= 0xfe6f ||
218              (__c >= 0xff00 && (__c <= 0xff60 ||
219              (__c >= 0xffe0 && (__c <= 0xffe6
220              ))))))))))))))))))));
221   // clang-format on
222 }
223 
224 /**
225  * @overload
226  *
227  * This version expects a value greater than or equal to 0x1'0000, which is a
228  * 4-byte UTF-8 character.
229  */
230 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
231   _LIBCPP_ASSERT(__c >= 0x10000,
232                  "Use __column_width_3 or __column_width for smaller values");
233 
234   // clang-format off
235   return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
236              (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
237              (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
238              (__c >= 0x3'0000 && (__c <= 0x3'fffd
239              ))))))));
240   // clang-format on
241 }
242 
243 /**
244  * @overload
245  *
246  * The general case, accepting all values.
247  */
248 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
249   if (__c < 0x10000)
250     return __column_width_3(__c);
251 
252   return __column_width_4(__c);
253 }
254 
255 /**
256  * Estimate the column width for the UTF-8 sequence using the fast algorithm.
257  */
258 template <__utf8_character _CharT>
259 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
260 __estimate_column_width_fast(const _CharT* __first,
261                              const _CharT* __last) noexcept {
262   return _VSTD::find_if(__first, __last,
263                         [](unsigned char __c) { return __c & 0x80; });
264 }
265 
266 /**
267  * @overload
268  *
269  * The implementation for UTF-16/32.
270  */
271 template <__utf16_or_32_character _CharT>
272 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
273 __estimate_column_width_fast(const _CharT* __first,
274                              const _CharT* __last) noexcept {
275   return _VSTD::find_if(__first, __last,
276                         [](uint32_t __c) { return __c >= 0x1100; });
277 }
278 
279 template <class _CharT>
280 struct _LIBCPP_TEMPLATE_VIS __column_width_result {
281   /** The number of output columns. */
282   size_t __width;
283   /**
284    * The last parsed element.
285    *
286    * This limits the original output to fit in the wanted number of columns.
287    */
288   const _CharT* __ptr;
289 };
290 
291 /**
292  * Small helper to determine the width of malformed Unicode.
293  *
294  * @note This function's only needed for UTF-8. During scanning UTF-8 there
295  * are multiple place where it can be detected that the Unicode is malformed.
296  * UTF-16 only requires 1 test and UTF-32 requires no testing.
297  */
298 template <__utf8_character _CharT>
299 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
300 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
301                                   size_t __maximum, size_t __result) noexcept {
302   size_t __size = __last - __first;
303   size_t __n = _VSTD::min(__size, __maximum);
304   return {__result + __n, __first + __n};
305 }
306 
307 /**
308  * Determines the number of output columns needed to render the input.
309  *
310  * @note When the scanner encounters malformed Unicode it acts as-if every code
311  * unit at the end of the input is one output column. It's expected the output
312  * terminal will replace these malformed code units with a one column
313  * replacement characters.
314  *
315  * @param __first   Points to the first element of the input range.
316  * @param __last    Points beyond the last element of the input range.
317  * @param __maximum The maximum number of output columns. The returned number
318  *                  of estimated output columns will not exceed this value.
319  */
320 template <__utf8_character _CharT>
321 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
322 __estimate_column_width(const _CharT* __first, const _CharT* __last,
323                         size_t __maximum) noexcept {
324   size_t __result = 0;
325 
326   while (__first != __last) {
327     // Based on the number of leading 1 bits the number of code units in the
328     // code point can be determined. See
329     // https://en.wikipedia.org/wiki/UTF-8#Encoding
330     switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
331     case 0: // 1-code unit encoding: all 1 column
332       ++__result;
333       ++__first;
334       break;
335 
336     case 2: // 2-code unit encoding: all 1 column
337       // Malformed Unicode.
338       if (__last - __first < 2) [[unlikely]]
339         return __estimate_column_width_malformed(__first, __last, __maximum,
340                                                  __result);
341       __first += 2;
342       ++__result;
343       break;
344 
345     case 3: // 3-code unit encoding: either 1 or 2 columns
346       // Malformed Unicode.
347       if (__last - __first < 3) [[unlikely]]
348         return __estimate_column_width_malformed(__first, __last, __maximum,
349                                                  __result);
350       {
351         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
352         __c <<= 6;
353         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
354         __c <<= 6;
355         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
356         __result += __column_width_3(__c);
357         if (__result > __maximum)
358           return {__result - 2, __first - 3};
359       }
360       break;
361     case 4: // 4-code unit encoding: either 1 or 2 columns
362       // Malformed Unicode.
363       if (__last - __first < 4) [[unlikely]]
364         return __estimate_column_width_malformed(__first, __last, __maximum,
365                                                  __result);
366       {
367         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
368         __c <<= 6;
369         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
370         __c <<= 6;
371         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
372         __c <<= 6;
373         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
374         __result += __column_width_4(__c);
375         if (__result > __maximum)
376           return {__result - 2, __first - 4};
377       }
378       break;
379     default:
380       // Malformed Unicode.
381       return __estimate_column_width_malformed(__first, __last, __maximum,
382                                                __result);
383     }
384 
385     if (__result >= __maximum)
386       return {__result, __first};
387   }
388   return {__result, __first};
389 }
390 
391 template <__utf16_character _CharT>
392 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
393 __estimate_column_width(const _CharT* __first, const _CharT* __last,
394                         size_t __maximum) noexcept {
395   size_t __result = 0;
396 
397   while (__first != __last) {
398     uint32_t __c = *__first;
399     // Is the code unit part of a surrogate pair? See
400     // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
401     if (__c >= 0xd800 && __c <= 0xDfff) {
402       // Malformed Unicode.
403       if (__last - __first < 2) [[unlikely]]
404         return {__result + 1, __first + 1};
405 
406       __c -= 0xd800;
407       __c <<= 10;
408       __c += (*(__first + 1) - 0xdc00);
409       __c += 0x10000;
410 
411       __result += __column_width_4(__c);
412       if (__result > __maximum)
413         return {__result - 2, __first};
414       __first += 2;
415     } else {
416       __result += __column_width_3(__c);
417       if (__result > __maximum)
418         return {__result - 2, __first};
419       ++__first;
420     }
421 
422     if (__result >= __maximum)
423       return {__result, __first};
424   }
425 
426   return {__result, __first};
427 }
428 
429 template <__utf32_character _CharT>
430 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
431 __estimate_column_width(const _CharT* __first, const _CharT* __last,
432                         size_t __maximum) noexcept {
433   size_t __result = 0;
434 
435   while (__first != __last) {
436     uint32_t __c = *__first;
437     __result += __column_width(__c);
438 
439     if (__result > __maximum)
440       return {__result - 2, __first};
441 
442     ++__first;
443     if (__result >= __maximum)
444       return {__result, __first};
445   }
446 
447   return {__result, __first};
448 }
449 
450 } // namespace __detail
451 
452 template <class _CharT>
453 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
454 __get_string_alignment(const _CharT* __first, const _CharT* __last,
455                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
456   _LIBCPP_ASSERT(__width != 0 || __precision != -1,
457                  "The function has no effect and shouldn't be used");
458 
459   // TODO FMT There might be more optimizations possible:
460   // If __precision == __format::__number_max and the encoding is:
461   // * UTF-8  : 4 * (__last - __first) >= __width
462   // * UTF-16 : 2 * (__last - __first) >= __width
463   // * UTF-32 : (__last - __first) >= __width
464   // In these cases it's certain the output is at least the requested width.
465   // It's unknown how often this happens in practice. For now the improvement
466   // isn't implemented.
467 
468   /*
469    * First assume there are no special Unicode code units in the input.
470    * - Apply the precision (this may reduce the size of the input). When
471    *   __precison == -1 this step is omitted.
472    * - Scan for special code units in the input.
473    * If our assumption was correct the __pos will be at the end of the input.
474    */
475   const ptrdiff_t __length = __last - __first;
476   const _CharT* __limit =
477       __first +
478       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
479   ptrdiff_t __size = __limit - __first;
480   const _CharT* __pos =
481       __detail::__estimate_column_width_fast(__first, __limit);
482 
483   if (__pos == __limit)
484     return {__limit, __size, __size < __width};
485 
486   /*
487    * Our assumption was wrong, there are special Unicode code units.
488    * The range [__first, __pos) contains a set of code units with the
489    * following property:
490    *      Every _CharT in the range will be rendered in 1 column.
491    *
492    * If there's no maximum width and the parsed size already exceeds the
493    *   minimum required width. The real size isn't important. So bail out.
494    */
495   if (__precision == -1 && (__pos - __first) >= __width)
496     return {__last, 0, false};
497 
498   /* If there's a __precision, truncate the output to that width. */
499   ptrdiff_t __prefix = __pos - __first;
500   if (__precision != -1) {
501     _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
502     auto __lengh_info = __detail::__estimate_column_width(
503         __pos, __last, __precision - __prefix);
504     __size = __lengh_info.__width + __prefix;
505     return {__lengh_info.__ptr, __size, __size < __width};
506   }
507 
508   /* Else use __width to determine the number of required padding characters. */
509   _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
510   /*
511    * The column width is always one or two columns. For the precision the wanted
512    * column width is the maximum, for the width it's the minimum. Using the
513    * width estimation with its truncating behavior will result in the wrong
514    * result in the following case:
515    * - The last code unit processed requires two columns and exceeds the
516    *   maximum column width.
517    * By increasing the __maximum by one avoids this issue. (It means it may
518    * pass one code point more than required to determine the proper result;
519    * that however isn't a problem for the algorithm.)
520    */
521   size_t __maximum = 1 + __width - __prefix;
522   auto __lengh_info =
523       __detail::__estimate_column_width(__pos, __last, __maximum);
524   if (__lengh_info.__ptr != __last) {
525     // Consumed the width number of code units. The exact size of the string
526     // is unknown. We only know we don't need to align the output.
527     _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >=
528                        __width,
529                    "Logic error");
530     return {__last, 0, false};
531   }
532 
533   __size = __lengh_info.__width + __prefix;
534   return {__last, __size, __size < __width};
535 }
536 #else  // _LIBCPP_HAS_NO_UNICODE
537 template <class _CharT>
538 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
539 __get_string_alignment(const _CharT* __first, const _CharT* __last,
540                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
541   const ptrdiff_t __length = __last - __first;
542   const _CharT* __limit =
543       __first +
544       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
545   ptrdiff_t __size = __limit - __first;
546   return {__limit, __size, __size < __width};
547 }
548 #endif // _LIBCPP_HAS_NO_UNICODE
549 
550 /// These fields are a filter for which elements to parse.
551 ///
552 /// They default to false so when a new field is added it needs to be opted in
553 /// explicitly.
554 struct __fields {
555   uint8_t __sign_ : 1 {false};
556   uint8_t __alternate_form_ : 1 {false};
557   uint8_t __zero_padding_ : 1 {false};
558   uint8_t __precision_ : 1 {false};
559   uint8_t __locale_specific_form_ : 1 {false};
560   uint8_t __type_ : 1 {false};
561 };
562 
563 // By not placing this constant in the formatter class it's not duplicated for
564 // char and wchar_t.
565 inline constexpr __fields __fields_integral{
566     .__sign_                 = true,
567     .__alternate_form_       = true,
568     .__zero_padding_         = true,
569     .__locale_specific_form_ = true,
570     .__type_                 = true};
571 inline constexpr __fields __fields_floating_point{
572     .__sign_                 = true,
573     .__alternate_form_       = true,
574     .__zero_padding_         = true,
575     .__precision_            = true,
576     .__locale_specific_form_ = true,
577     .__type_                 = true};
578 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
579 inline constexpr __fields __fields_pointer{.__type_ = true};
580 
581 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
582   /// No alignment is set in the format string.
583   __default,
584   __left,
585   __center,
586   __right,
587   __zero_padding
588 };
589 
590 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
591   /// No sign is set in the format string.
592   ///
593   /// The sign isn't allowed for certain format-types. By using this value
594   /// it's possible to detect whether or not the user explicitly set the sign
595   /// flag. For formatting purposes it behaves the same as \ref __minus.
596   __default,
597   __minus,
598   __plus,
599   __space
600 };
601 
602 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
603   __default,
604   __string,
605   __binary_lower_case,
606   __binary_upper_case,
607   __octal,
608   __decimal,
609   __hexadecimal_lower_case,
610   __hexadecimal_upper_case,
611   __pointer,
612   __char,
613   __hexfloat_lower_case,
614   __hexfloat_upper_case,
615   __scientific_lower_case,
616   __scientific_upper_case,
617   __fixed_lower_case,
618   __fixed_upper_case,
619   __general_lower_case,
620   __general_upper_case
621 };
622 
623 struct __std {
624   __alignment __alignment_ : 3;
625   __sign __sign_ : 2;
626   bool __alternate_form_ : 1;
627   bool __locale_specific_form_ : 1;
628   __type __type_;
629 };
630 
631 struct __chrono {
632   __alignment __alignment_ : 3;
633   bool __weekday_name_ : 1;
634   bool __month_name_ : 1;
635 };
636 
637 /// Contains the parsed formatting specifications.
638 ///
639 /// This contains information for both the std-format-spec and the
640 /// chrono-format-spec. This results in some unused members for both
641 /// specifications. However these unused members don't increase the size
642 /// of the structure.
643 ///
644 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
645 /// kept stable.
646 template <class _CharT>
647 struct __parsed_specifications {
648   union {
649     // The field __alignment_ is the first element in __std_ and __chrono_.
650     // This allows the code to always inspect this value regards which member
651     // of the union is the active member [class.union.general]/2.
652     //
653     // This is needed since the generic output routines handle the alignment of
654     // the output.
655     __alignment __alignment_ : 3;
656     __std __std_;
657     __chrono __chrono_;
658   };
659 
660   /// The requested width.
661   ///
662   /// When the format-spec used an arg-id for this field it has already been
663   /// replaced with the value of that arg-id.
664   int32_t __width_;
665 
666   /// The requested precision.
667   ///
668   /// When the format-spec used an arg-id for this field it has already been
669   /// replaced with the value of that arg-id.
670   int32_t __precision_;
671 
672   _CharT __fill_;
673 
674   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
675 
676   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
677 };
678 
679 // Validate the struct is small and cheap to copy since the struct is passed by
680 // value in formatting functions.
681 static_assert(sizeof(__parsed_specifications<char>) == 16);
682 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
683 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
684 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
685 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
686 #  endif
687 
688 /// The parser for the std-format-spec.
689 ///
690 /// Note this class is a member of std::formatter specializations. It's
691 /// expected developers will create their own formatter specializations that
692 /// inherit from the std::formatter specializations. This means this class
693 /// must be ABI stable. To aid the stability the unused bits in the class are
694 /// set to zero. That way they can be repurposed if a future revision of the
695 /// Standards adds new fields to std-format-spec.
696 template <class _CharT>
697 class _LIBCPP_TEMPLATE_VIS __parser {
698 public:
699   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
700       -> decltype(__parse_ctx.begin()) {
701 
702     const _CharT* __begin = __parse_ctx.begin();
703     const _CharT* __end = __parse_ctx.end();
704     if (__begin == __end)
705       return __begin;
706 
707     if (__parse_fill_align(__begin, __end) && __begin == __end)
708       return __begin;
709 
710     if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
711       return __begin;
712 
713     if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
714       return __begin;
715 
716     if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
717       return __begin;
718 
719     if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
720       return __begin;
721 
722     if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
723       return __begin;
724 
725     if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
726       return __begin;
727 
728     if (__fields.__type_) {
729       __parse_type(__begin);
730 
731       // When __type_ is false the calling parser is expected to do additional
732       // parsing. In that case that parser should do the end of format string
733       // validation.
734       if (__begin != __end && *__begin != _CharT('}'))
735         __throw_format_error("The format-spec should consume the input or end with a '}'");
736     }
737 
738     return __begin;
739   }
740 
741   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
742   _LIBCPP_HIDE_FROM_ABI
743   __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
744     return __parsed_specifications<_CharT>{
745         .__std_ =
746             __std{.__alignment_            = __alignment_,
747                   .__sign_                 = __sign_,
748                   .__alternate_form_       = __alternate_form_,
749                   .__locale_specific_form_ = __locale_specific_form_,
750                   .__type_                 = __type_},
751         .__width_{__get_width(__ctx)},
752         .__precision_{__get_precision(__ctx)},
753         .__fill_{__fill_}};
754   }
755 
756   __alignment __alignment_ : 3 {__alignment::__default};
757   __sign __sign_ : 2 {__sign::__default};
758   bool __alternate_form_ : 1 {false};
759   bool __locale_specific_form_ : 1 {false};
760   bool __reserved_0_ : 1 {false};
761   __type __type_{__type::__default};
762 
763   // These two flags are used for formatting chrono. Since the struct has
764   // padding space left it's added to this structure.
765   bool __weekday_name_ : 1 {false};
766   bool __month_name_ : 1 {false};
767 
768   uint8_t __reserved_1_ : 6 {0};
769   uint8_t __reserved_2_ : 6 {0};
770   // These two flags are only used internally and not part of the
771   // __parsed_specifications. Therefore put them at the end.
772   bool __width_as_arg_ : 1 {false};
773   bool __precision_as_arg_ : 1 {false};
774 
775   /// The requested width, either the value or the arg-id.
776   int32_t __width_{0};
777 
778   /// The requested precision, either the value or the arg-id.
779   int32_t __precision_{-1};
780 
781   // LWG 3576 will probably change this to always accept a Unicode code point
782   // To avoid changing the size with that change align the field so when it
783   // becomes 32-bit its alignment will remain the same. That also means the
784   // size will remain the same. (D2572 addresses the solution for LWG 3576.)
785   _CharT __fill_{_CharT(' ')};
786 
787 private:
788   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
789     switch (__c) {
790     case _CharT('<'):
791       __alignment_ = __alignment::__left;
792       return true;
793 
794     case _CharT('^'):
795       __alignment_ = __alignment::__center;
796       return true;
797 
798     case _CharT('>'):
799       __alignment_ = __alignment::__right;
800       return true;
801     }
802     return false;
803   }
804 
805   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) {
806     _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
807                                      "undefined behavior by evaluating data not in the input");
808     if (__begin + 1 != __end) {
809       if (__parse_alignment(*(__begin + 1))) {
810         if (*__begin == _CharT('{') || *__begin == _CharT('}'))
811           __throw_format_error("The format-spec fill field contains an invalid character");
812 
813         __fill_ = *__begin;
814         __begin += 2;
815         return true;
816       }
817     }
818 
819     if (!__parse_alignment(*__begin))
820       return false;
821 
822     ++__begin;
823     return true;
824   }
825 
826   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) {
827     switch (*__begin) {
828     case _CharT('-'):
829       __sign_ = __sign::__minus;
830       break;
831     case _CharT('+'):
832       __sign_ = __sign::__plus;
833       break;
834     case _CharT(' '):
835       __sign_ = __sign::__space;
836       break;
837     default:
838       return false;
839     }
840     ++__begin;
841     return true;
842   }
843 
844   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) {
845     if (*__begin != _CharT('#'))
846       return false;
847 
848     __alternate_form_ = true;
849     ++__begin;
850     return true;
851   }
852 
853   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) {
854     if (*__begin != _CharT('0'))
855       return false;
856 
857     if (__alignment_ == __alignment::__default)
858       __alignment_ = __alignment::__zero_padding;
859     ++__begin;
860     return true;
861   }
862 
863   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) {
864     if (*__begin == _CharT('0'))
865       __throw_format_error("A format-spec width field shouldn't have a leading zero");
866 
867     if (*__begin == _CharT('{')) {
868       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
869       __width_as_arg_ = true;
870       __width_ = __r.__value;
871       __begin = __r.__ptr;
872       return true;
873     }
874 
875     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
876       return false;
877 
878     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
879     __width_ = __r.__value;
880     _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
881                                   "due to validations in this function");
882     __begin = __r.__ptr;
883     return true;
884   }
885 
886   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end,
887                                                          auto& __parse_ctx) {
888     if (*__begin != _CharT('.'))
889       return false;
890 
891     ++__begin;
892     if (__begin == __end)
893       __throw_format_error("End of input while parsing format-spec precision");
894 
895     if (*__begin == _CharT('{')) {
896       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
897       __precision_as_arg_ = true;
898       __precision_ = __arg_id.__value;
899       __begin = __arg_id.__ptr;
900       return true;
901     }
902 
903     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
904       __throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
905 
906     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
907     __precision_ = __r.__value;
908     __precision_as_arg_ = false;
909     __begin = __r.__ptr;
910     return true;
911   }
912 
913   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) {
914     if (*__begin != _CharT('L'))
915       return false;
916 
917     __locale_specific_form_ = true;
918     ++__begin;
919     return true;
920   }
921 
922   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) {
923     // Determines the type. It does not validate whether the selected type is
924     // valid. Most formatters have optional fields that are only allowed for
925     // certain types. These parsers need to do validation after the type has
926     // been parsed. So its easier to implement the validation for all types in
927     // the specific parse function.
928     switch (*__begin) {
929     case 'A':
930       __type_ = __type::__hexfloat_upper_case;
931       break;
932     case 'B':
933       __type_ = __type::__binary_upper_case;
934       break;
935     case 'E':
936       __type_ = __type::__scientific_upper_case;
937       break;
938     case 'F':
939       __type_ = __type::__fixed_upper_case;
940       break;
941     case 'G':
942       __type_ = __type::__general_upper_case;
943       break;
944     case 'X':
945       __type_ = __type::__hexadecimal_upper_case;
946       break;
947     case 'a':
948       __type_ = __type::__hexfloat_lower_case;
949       break;
950     case 'b':
951       __type_ = __type::__binary_lower_case;
952       break;
953     case 'c':
954       __type_ = __type::__char;
955       break;
956     case 'd':
957       __type_ = __type::__decimal;
958       break;
959     case 'e':
960       __type_ = __type::__scientific_lower_case;
961       break;
962     case 'f':
963       __type_ = __type::__fixed_lower_case;
964       break;
965     case 'g':
966       __type_ = __type::__general_lower_case;
967       break;
968     case 'o':
969       __type_ = __type::__octal;
970       break;
971     case 'p':
972       __type_ = __type::__pointer;
973       break;
974     case 's':
975       __type_ = __type::__string;
976       break;
977     case 'x':
978       __type_ = __type::__hexadecimal_lower_case;
979       break;
980     default:
981       return;
982     }
983     ++__begin;
984   }
985 
986   _LIBCPP_HIDE_FROM_ABI
987   int32_t __get_width(auto& __ctx) const {
988     if (!__width_as_arg_)
989       return __width_;
990 
991     int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_));
992     if (__result == 0)
993       __throw_format_error("A format-spec width field replacement should have a positive value");
994     return __result;
995   }
996 
997   _LIBCPP_HIDE_FROM_ABI
998   int32_t __get_precision(auto& __ctx) const {
999     if (!__precision_as_arg_)
1000       return __precision_;
1001 
1002     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
1003   }
1004 };
1005 
1006 // Validates whether the reserved bitfields don't change the size.
1007 static_assert(sizeof(__parser<char>) == 16);
1008 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
1009 static_assert(sizeof(__parser<wchar_t>) == 16);
1010 #  endif
1011 
1012 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
1013   switch (__type) {
1014   case __format_spec::__type::__default:
1015   case __format_spec::__type::__string:
1016     break;
1017 
1018   default:
1019     std::__throw_format_error("The format-spec type has a type not supported for a string argument");
1020   }
1021 }
1022 
1023 template <class _CharT>
1024 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) {
1025   if (__parser.__sign_ != __sign::__default)
1026     std::__throw_format_error("A sign field isn't allowed in this format-spec");
1027 
1028   if (__parser.__alternate_form_)
1029     std::__throw_format_error("An alternate form field isn't allowed in this format-spec");
1030 
1031   if (__parser.__alignment_ == __alignment::__zero_padding)
1032     std::__throw_format_error("A zero-padding field isn't allowed in this format-spec");
1033 
1034   if (__parser.__alignment_ == __alignment::__default)
1035     __parser.__alignment_ = __alignment::__left;
1036 }
1037 
1038 template <class _CharT>
1039 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) {
1040   __format_spec::__process_display_type_bool_string(__parser);
1041 }
1042 
1043 template <class _CharT>
1044 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
1045   switch (__parser.__type_) {
1046   case __format_spec::__type::__default:
1047   case __format_spec::__type::__string:
1048     __format_spec::__process_display_type_bool_string(__parser);
1049     break;
1050 
1051   case __format_spec::__type::__binary_lower_case:
1052   case __format_spec::__type::__binary_upper_case:
1053   case __format_spec::__type::__octal:
1054   case __format_spec::__type::__decimal:
1055   case __format_spec::__type::__hexadecimal_lower_case:
1056   case __format_spec::__type::__hexadecimal_upper_case:
1057     break;
1058 
1059   default:
1060     std::__throw_format_error("The format-spec type has a type not supported for a bool argument");
1061   }
1062 }
1063 
1064 template <class _CharT>
1065 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
1066   switch (__parser.__type_) {
1067   case __format_spec::__type::__default:
1068   case __format_spec::__type::__char:
1069     __format_spec::__process_display_type_char(__parser);
1070     break;
1071 
1072   case __format_spec::__type::__binary_lower_case:
1073   case __format_spec::__type::__binary_upper_case:
1074   case __format_spec::__type::__octal:
1075   case __format_spec::__type::__decimal:
1076   case __format_spec::__type::__hexadecimal_lower_case:
1077   case __format_spec::__type::__hexadecimal_upper_case:
1078     break;
1079 
1080   default:
1081     std::__throw_format_error("The format-spec type has a type not supported for a char argument");
1082   }
1083 }
1084 
1085 template <class _CharT>
1086 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
1087   switch (__parser.__type_) {
1088   case __format_spec::__type::__default:
1089   case __format_spec::__type::__binary_lower_case:
1090   case __format_spec::__type::__binary_upper_case:
1091   case __format_spec::__type::__octal:
1092   case __format_spec::__type::__decimal:
1093   case __format_spec::__type::__hexadecimal_lower_case:
1094   case __format_spec::__type::__hexadecimal_upper_case:
1095     break;
1096 
1097   case __format_spec::__type::__char:
1098     __format_spec::__process_display_type_char(__parser);
1099     break;
1100 
1101   default:
1102     std::__throw_format_error("The format-spec type has a type not supported for an integer argument");
1103   }
1104 }
1105 
1106 template <class _CharT>
1107 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
1108   switch (__parser.__type_) {
1109   case __format_spec::__type::__default:
1110     // When no precision specified then it keeps default since that
1111     // formatting differs from the other types.
1112     if (__parser.__precision_as_arg_ || __parser.__precision_ != -1)
1113       __parser.__type_ = __format_spec::__type::__general_lower_case;
1114     break;
1115   case __format_spec::__type::__hexfloat_lower_case:
1116   case __format_spec::__type::__hexfloat_upper_case:
1117     // Precision specific behavior will be handled later.
1118     break;
1119   case __format_spec::__type::__scientific_lower_case:
1120   case __format_spec::__type::__scientific_upper_case:
1121   case __format_spec::__type::__fixed_lower_case:
1122   case __format_spec::__type::__fixed_upper_case:
1123   case __format_spec::__type::__general_lower_case:
1124   case __format_spec::__type::__general_upper_case:
1125     if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
1126       // Set the default precision for the call to to_chars.
1127       __parser.__precision_ = 6;
1128     break;
1129 
1130   default:
1131     std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument");
1132   }
1133 }
1134 
1135 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) {
1136   switch (__type) {
1137   case __format_spec::__type::__default:
1138   case __format_spec::__type::__pointer:
1139     break;
1140 
1141   default:
1142     std::__throw_format_error("The format-spec type has a type not supported for a pointer argument");
1143   }
1144 }
1145 
1146 } // namespace __format_spec
1147 
1148 #endif //_LIBCPP_STD_VER > 17
1149 
1150 _LIBCPP_END_NAMESPACE_STD
1151 
1152 _LIBCPP_POP_MACROS
1153 
1154 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1155