1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__config>
23 #include <__debug>
24 #include <__format/format_arg.h>
25 #include <__format/format_error.h>
26 #include <__format/format_parse_context.h>
27 #include <__format/format_string.h>
28 #include <__format/unicode.h>
29 #include <__variant/monostate.h>
30 #include <bit>
31 #include <concepts>
32 #include <cstdint>
33 #include <string_view>
34 #include <type_traits>
35
36 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
37 # pragma GCC system_header
38 #endif
39
40 _LIBCPP_PUSH_MACROS
41 #include <__undef_macros>
42
43 _LIBCPP_BEGIN_NAMESPACE_STD
44
45 #if _LIBCPP_STD_VER > 17
46
47 namespace __format_spec {
48
49 template <class _CharT>
50 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
__parse_arg_id(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)51 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
52 // This function is a wrapper to call the real parser. But it does the
53 // validation for the pre-conditions and post-conditions.
54 if (__begin == __end)
55 __throw_format_error("End of input while parsing format-spec arg-id");
56
57 __format::__parse_number_result __r =
58 __format::__parse_arg_id(__begin, __end, __parse_ctx);
59
60 if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
61 __throw_format_error("Invalid arg-id");
62
63 ++__r.__ptr;
64 return __r;
65 }
66
67 template <class _Context>
68 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
__substitute_arg_id(basic_format_arg<_Context> __format_arg)69 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
70 return visit_format_arg(
71 [](auto __arg) -> uint32_t {
72 using _Type = decltype(__arg);
73 if constexpr (integral<_Type>) {
74 if constexpr (signed_integral<_Type>) {
75 if (__arg < 0)
76 __throw_format_error("A format-spec arg-id replacement shouldn't "
77 "have a negative value");
78 }
79
80 using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
81 if (static_cast<_CT>(__arg) >
82 static_cast<_CT>(__format::__number_max))
83 __throw_format_error("A format-spec arg-id replacement exceeds "
84 "the maximum supported value");
85
86 return __arg;
87 } else if constexpr (same_as<_Type, monostate>)
88 __throw_format_error("Argument index out of bounds");
89 else
90 __throw_format_error("A format-spec arg-id replacement argument "
91 "isn't an integral type");
92 },
93 __format_arg);
94 }
95
96 /// These fields are a filter for which elements to parse.
97 ///
98 /// They default to false so when a new field is added it needs to be opted in
99 /// explicitly.
100 struct __fields {
101 uint8_t __sign_ : 1 {false};
102 uint8_t __alternate_form_ : 1 {false};
103 uint8_t __zero_padding_ : 1 {false};
104 uint8_t __precision_ : 1 {false};
105 uint8_t __locale_specific_form_ : 1 {false};
106 uint8_t __type_ : 1 {false};
107 };
108
109 // By not placing this constant in the formatter class it's not duplicated for
110 // char and wchar_t.
111 inline constexpr __fields __fields_integral{
112 .__sign_ = true,
113 .__alternate_form_ = true,
114 .__zero_padding_ = true,
115 .__locale_specific_form_ = true,
116 .__type_ = true};
117 inline constexpr __fields __fields_floating_point{
118 .__sign_ = true,
119 .__alternate_form_ = true,
120 .__zero_padding_ = true,
121 .__precision_ = true,
122 .__locale_specific_form_ = true,
123 .__type_ = true};
124 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
125 inline constexpr __fields __fields_pointer{.__type_ = true};
126
127 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
128 /// No alignment is set in the format string.
129 __default,
130 __left,
131 __center,
132 __right,
133 __zero_padding
134 };
135
136 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
137 /// No sign is set in the format string.
138 ///
139 /// The sign isn't allowed for certain format-types. By using this value
140 /// it's possible to detect whether or not the user explicitly set the sign
141 /// flag. For formatting purposes it behaves the same as \ref __minus.
142 __default,
143 __minus,
144 __plus,
145 __space
146 };
147
148 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
149 __default,
150 __string,
151 __binary_lower_case,
152 __binary_upper_case,
153 __octal,
154 __decimal,
155 __hexadecimal_lower_case,
156 __hexadecimal_upper_case,
157 __pointer,
158 __char,
159 __hexfloat_lower_case,
160 __hexfloat_upper_case,
161 __scientific_lower_case,
162 __scientific_upper_case,
163 __fixed_lower_case,
164 __fixed_upper_case,
165 __general_lower_case,
166 __general_upper_case
167 };
168
169 struct __std {
170 __alignment __alignment_ : 3;
171 __sign __sign_ : 2;
172 bool __alternate_form_ : 1;
173 bool __locale_specific_form_ : 1;
174 __type __type_;
175 };
176
177 struct __chrono {
178 __alignment __alignment_ : 3;
179 bool __weekday_name_ : 1;
180 bool __month_name_ : 1;
181 };
182
183 /// Contains the parsed formatting specifications.
184 ///
185 /// This contains information for both the std-format-spec and the
186 /// chrono-format-spec. This results in some unused members for both
187 /// specifications. However these unused members don't increase the size
188 /// of the structure.
189 ///
190 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
191 /// kept stable.
192 template <class _CharT>
193 struct __parsed_specifications {
194 union {
195 // The field __alignment_ is the first element in __std_ and __chrono_.
196 // This allows the code to always inspect this value regards which member
197 // of the union is the active member [class.union.general]/2.
198 //
199 // This is needed since the generic output routines handle the alignment of
200 // the output.
201 __alignment __alignment_ : 3;
202 __std __std_;
203 __chrono __chrono_;
204 };
205
206 /// The requested width.
207 ///
208 /// When the format-spec used an arg-id for this field it has already been
209 /// replaced with the value of that arg-id.
210 int32_t __width_;
211
212 /// The requested precision.
213 ///
214 /// When the format-spec used an arg-id for this field it has already been
215 /// replaced with the value of that arg-id.
216 int32_t __precision_;
217
218 _CharT __fill_;
219
__has_width__parsed_specifications220 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
221
__has_precision__parsed_specifications222 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
223 };
224
225 // Validate the struct is small and cheap to copy since the struct is passed by
226 // value in formatting functions.
227 static_assert(sizeof(__parsed_specifications<char>) == 16);
228 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
229 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
230 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
231 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
232 # endif
233
234 /// The parser for the std-format-spec.
235 ///
236 /// Note this class is a member of std::formatter specializations. It's
237 /// expected developers will create their own formatter specializations that
238 /// inherit from the std::formatter specializations. This means this class
239 /// must be ABI stable. To aid the stability the unused bits in the class are
240 /// set to zero. That way they can be repurposed if a future revision of the
241 /// Standards adds new fields to std-format-spec.
242 template <class _CharT>
243 class _LIBCPP_TEMPLATE_VIS __parser {
244 public:
245 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
246 -> decltype(__parse_ctx.begin()) {
247
248 const _CharT* __begin = __parse_ctx.begin();
249 const _CharT* __end = __parse_ctx.end();
250 if (__begin == __end)
251 return __begin;
252
253 if (__parse_fill_align(__begin, __end) && __begin == __end)
254 return __begin;
255
256 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
257 return __begin;
258
259 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
260 return __begin;
261
262 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
263 return __begin;
264
265 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
266 return __begin;
267
268 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
269 return __begin;
270
271 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
272 return __begin;
273
274 if (__fields.__type_) {
275 __parse_type(__begin);
276
277 // When __type_ is false the calling parser is expected to do additional
278 // parsing. In that case that parser should do the end of format string
279 // validation.
280 if (__begin != __end && *__begin != _CharT('}'))
281 __throw_format_error("The format-spec should consume the input or end with a '}'");
282 }
283
284 return __begin;
285 }
286
287 /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
288 _LIBCPP_HIDE_FROM_ABI
__get_parsed_std_specifications(auto & __ctx)289 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
290 return __parsed_specifications<_CharT>{
291 .__std_ =
292 __std{.__alignment_ = __alignment_,
293 .__sign_ = __sign_,
294 .__alternate_form_ = __alternate_form_,
295 .__locale_specific_form_ = __locale_specific_form_,
296 .__type_ = __type_},
297 .__width_{__get_width(__ctx)},
298 .__precision_{__get_precision(__ctx)},
299 .__fill_{__fill_}};
300 }
301
302 __alignment __alignment_ : 3 {__alignment::__default};
303 __sign __sign_ : 2 {__sign::__default};
304 bool __alternate_form_ : 1 {false};
305 bool __locale_specific_form_ : 1 {false};
306 bool __reserved_0_ : 1 {false};
307 __type __type_{__type::__default};
308
309 // These two flags are used for formatting chrono. Since the struct has
310 // padding space left it's added to this structure.
311 bool __weekday_name_ : 1 {false};
312 bool __month_name_ : 1 {false};
313
314 uint8_t __reserved_1_ : 6 {0};
315 uint8_t __reserved_2_ : 6 {0};
316 // These two flags are only used internally and not part of the
317 // __parsed_specifications. Therefore put them at the end.
318 bool __width_as_arg_ : 1 {false};
319 bool __precision_as_arg_ : 1 {false};
320
321 /// The requested width, either the value or the arg-id.
322 int32_t __width_{0};
323
324 /// The requested precision, either the value or the arg-id.
325 int32_t __precision_{-1};
326
327 // LWG 3576 will probably change this to always accept a Unicode code point
328 // To avoid changing the size with that change align the field so when it
329 // becomes 32-bit its alignment will remain the same. That also means the
330 // size will remain the same. (D2572 addresses the solution for LWG 3576.)
331 _CharT __fill_{_CharT(' ')};
332
333 private:
__parse_alignment(_CharT __c)334 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
335 switch (__c) {
336 case _CharT('<'):
337 __alignment_ = __alignment::__left;
338 return true;
339
340 case _CharT('^'):
341 __alignment_ = __alignment::__center;
342 return true;
343
344 case _CharT('>'):
345 __alignment_ = __alignment::__right;
346 return true;
347 }
348 return false;
349 }
350
__parse_fill_align(const _CharT * & __begin,const _CharT * __end)351 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) {
352 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
353 "undefined behavior by evaluating data not in the input");
354 if (__begin + 1 != __end) {
355 if (__parse_alignment(*(__begin + 1))) {
356 if (*__begin == _CharT('{') || *__begin == _CharT('}'))
357 __throw_format_error("The format-spec fill field contains an invalid character");
358
359 __fill_ = *__begin;
360 __begin += 2;
361 return true;
362 }
363 }
364
365 if (!__parse_alignment(*__begin))
366 return false;
367
368 ++__begin;
369 return true;
370 }
371
__parse_sign(const _CharT * & __begin)372 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) {
373 switch (*__begin) {
374 case _CharT('-'):
375 __sign_ = __sign::__minus;
376 break;
377 case _CharT('+'):
378 __sign_ = __sign::__plus;
379 break;
380 case _CharT(' '):
381 __sign_ = __sign::__space;
382 break;
383 default:
384 return false;
385 }
386 ++__begin;
387 return true;
388 }
389
__parse_alternate_form(const _CharT * & __begin)390 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) {
391 if (*__begin != _CharT('#'))
392 return false;
393
394 __alternate_form_ = true;
395 ++__begin;
396 return true;
397 }
398
__parse_zero_padding(const _CharT * & __begin)399 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) {
400 if (*__begin != _CharT('0'))
401 return false;
402
403 if (__alignment_ == __alignment::__default)
404 __alignment_ = __alignment::__zero_padding;
405 ++__begin;
406 return true;
407 }
408
__parse_width(const _CharT * & __begin,const _CharT * __end,auto & __parse_ctx)409 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) {
410 if (*__begin == _CharT('0'))
411 __throw_format_error("A format-spec width field shouldn't have a leading zero");
412
413 if (*__begin == _CharT('{')) {
414 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
415 __width_as_arg_ = true;
416 __width_ = __r.__value;
417 __begin = __r.__ptr;
418 return true;
419 }
420
421 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
422 return false;
423
424 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
425 __width_ = __r.__value;
426 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
427 "due to validations in this function");
428 __begin = __r.__ptr;
429 return true;
430 }
431
__parse_precision(const _CharT * & __begin,const _CharT * __end,auto & __parse_ctx)432 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end,
433 auto& __parse_ctx) {
434 if (*__begin != _CharT('.'))
435 return false;
436
437 ++__begin;
438 if (__begin == __end)
439 __throw_format_error("End of input while parsing format-spec precision");
440
441 if (*__begin == _CharT('{')) {
442 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
443 __precision_as_arg_ = true;
444 __precision_ = __arg_id.__value;
445 __begin = __arg_id.__ptr;
446 return true;
447 }
448
449 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
450 __throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
451
452 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
453 __precision_ = __r.__value;
454 __precision_as_arg_ = false;
455 __begin = __r.__ptr;
456 return true;
457 }
458
__parse_locale_specific_form(const _CharT * & __begin)459 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) {
460 if (*__begin != _CharT('L'))
461 return false;
462
463 __locale_specific_form_ = true;
464 ++__begin;
465 return true;
466 }
467
__parse_type(const _CharT * & __begin)468 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) {
469 // Determines the type. It does not validate whether the selected type is
470 // valid. Most formatters have optional fields that are only allowed for
471 // certain types. These parsers need to do validation after the type has
472 // been parsed. So its easier to implement the validation for all types in
473 // the specific parse function.
474 switch (*__begin) {
475 case 'A':
476 __type_ = __type::__hexfloat_upper_case;
477 break;
478 case 'B':
479 __type_ = __type::__binary_upper_case;
480 break;
481 case 'E':
482 __type_ = __type::__scientific_upper_case;
483 break;
484 case 'F':
485 __type_ = __type::__fixed_upper_case;
486 break;
487 case 'G':
488 __type_ = __type::__general_upper_case;
489 break;
490 case 'X':
491 __type_ = __type::__hexadecimal_upper_case;
492 break;
493 case 'a':
494 __type_ = __type::__hexfloat_lower_case;
495 break;
496 case 'b':
497 __type_ = __type::__binary_lower_case;
498 break;
499 case 'c':
500 __type_ = __type::__char;
501 break;
502 case 'd':
503 __type_ = __type::__decimal;
504 break;
505 case 'e':
506 __type_ = __type::__scientific_lower_case;
507 break;
508 case 'f':
509 __type_ = __type::__fixed_lower_case;
510 break;
511 case 'g':
512 __type_ = __type::__general_lower_case;
513 break;
514 case 'o':
515 __type_ = __type::__octal;
516 break;
517 case 'p':
518 __type_ = __type::__pointer;
519 break;
520 case 's':
521 __type_ = __type::__string;
522 break;
523 case 'x':
524 __type_ = __type::__hexadecimal_lower_case;
525 break;
526 default:
527 return;
528 }
529 ++__begin;
530 }
531
532 _LIBCPP_HIDE_FROM_ABI
__get_width(auto & __ctx)533 int32_t __get_width(auto& __ctx) const {
534 if (!__width_as_arg_)
535 return __width_;
536
537 int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_));
538 if (__result == 0)
539 __throw_format_error("A format-spec width field replacement should have a positive value");
540 return __result;
541 }
542
543 _LIBCPP_HIDE_FROM_ABI
__get_precision(auto & __ctx)544 int32_t __get_precision(auto& __ctx) const {
545 if (!__precision_as_arg_)
546 return __precision_;
547
548 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
549 }
550 };
551
552 // Validates whether the reserved bitfields don't change the size.
553 static_assert(sizeof(__parser<char>) == 16);
554 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
555 static_assert(sizeof(__parser<wchar_t>) == 16);
556 # endif
557
__process_display_type_string(__format_spec::__type __type)558 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
559 switch (__type) {
560 case __format_spec::__type::__default:
561 case __format_spec::__type::__string:
562 break;
563
564 default:
565 std::__throw_format_error("The format-spec type has a type not supported for a string argument");
566 }
567 }
568
569 template <class _CharT>
__process_display_type_bool_string(__parser<_CharT> & __parser)570 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) {
571 if (__parser.__sign_ != __sign::__default)
572 std::__throw_format_error("A sign field isn't allowed in this format-spec");
573
574 if (__parser.__alternate_form_)
575 std::__throw_format_error("An alternate form field isn't allowed in this format-spec");
576
577 if (__parser.__alignment_ == __alignment::__zero_padding)
578 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec");
579
580 if (__parser.__alignment_ == __alignment::__default)
581 __parser.__alignment_ = __alignment::__left;
582 }
583
584 template <class _CharT>
__process_display_type_char(__parser<_CharT> & __parser)585 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) {
586 __format_spec::__process_display_type_bool_string(__parser);
587 }
588
589 template <class _CharT>
__process_parsed_bool(__parser<_CharT> & __parser)590 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
591 switch (__parser.__type_) {
592 case __format_spec::__type::__default:
593 case __format_spec::__type::__string:
594 __format_spec::__process_display_type_bool_string(__parser);
595 break;
596
597 case __format_spec::__type::__binary_lower_case:
598 case __format_spec::__type::__binary_upper_case:
599 case __format_spec::__type::__octal:
600 case __format_spec::__type::__decimal:
601 case __format_spec::__type::__hexadecimal_lower_case:
602 case __format_spec::__type::__hexadecimal_upper_case:
603 break;
604
605 default:
606 std::__throw_format_error("The format-spec type has a type not supported for a bool argument");
607 }
608 }
609
610 template <class _CharT>
__process_parsed_char(__parser<_CharT> & __parser)611 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
612 switch (__parser.__type_) {
613 case __format_spec::__type::__default:
614 case __format_spec::__type::__char:
615 __format_spec::__process_display_type_char(__parser);
616 break;
617
618 case __format_spec::__type::__binary_lower_case:
619 case __format_spec::__type::__binary_upper_case:
620 case __format_spec::__type::__octal:
621 case __format_spec::__type::__decimal:
622 case __format_spec::__type::__hexadecimal_lower_case:
623 case __format_spec::__type::__hexadecimal_upper_case:
624 break;
625
626 default:
627 std::__throw_format_error("The format-spec type has a type not supported for a char argument");
628 }
629 }
630
631 template <class _CharT>
__process_parsed_integer(__parser<_CharT> & __parser)632 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
633 switch (__parser.__type_) {
634 case __format_spec::__type::__default:
635 case __format_spec::__type::__binary_lower_case:
636 case __format_spec::__type::__binary_upper_case:
637 case __format_spec::__type::__octal:
638 case __format_spec::__type::__decimal:
639 case __format_spec::__type::__hexadecimal_lower_case:
640 case __format_spec::__type::__hexadecimal_upper_case:
641 break;
642
643 case __format_spec::__type::__char:
644 __format_spec::__process_display_type_char(__parser);
645 break;
646
647 default:
648 std::__throw_format_error("The format-spec type has a type not supported for an integer argument");
649 }
650 }
651
652 template <class _CharT>
__process_parsed_floating_point(__parser<_CharT> & __parser)653 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
654 switch (__parser.__type_) {
655 case __format_spec::__type::__default:
656 // When no precision specified then it keeps default since that
657 // formatting differs from the other types.
658 if (__parser.__precision_as_arg_ || __parser.__precision_ != -1)
659 __parser.__type_ = __format_spec::__type::__general_lower_case;
660 break;
661 case __format_spec::__type::__hexfloat_lower_case:
662 case __format_spec::__type::__hexfloat_upper_case:
663 // Precision specific behavior will be handled later.
664 break;
665 case __format_spec::__type::__scientific_lower_case:
666 case __format_spec::__type::__scientific_upper_case:
667 case __format_spec::__type::__fixed_lower_case:
668 case __format_spec::__type::__fixed_upper_case:
669 case __format_spec::__type::__general_lower_case:
670 case __format_spec::__type::__general_upper_case:
671 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
672 // Set the default precision for the call to to_chars.
673 __parser.__precision_ = 6;
674 break;
675
676 default:
677 std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument");
678 }
679 }
680
__process_display_type_pointer(__format_spec::__type __type)681 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) {
682 switch (__type) {
683 case __format_spec::__type::__default:
684 case __format_spec::__type::__pointer:
685 break;
686
687 default:
688 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument");
689 }
690 }
691
692 template <class _CharT>
693 struct __column_width_result {
694 /// The number of output columns.
695 size_t __width_;
696 /// One beyond the last code unit used in the estimation.
697 ///
698 /// This limits the original output to fit in the wanted number of columns.
699 const _CharT* __last_;
700 };
701
702 /// Since a column width can be two it's possible that the requested column
703 /// width can't be achieved. Depending on the intended usage the policy can be
704 /// selected.
705 /// - When used as precision the maximum width may not be exceeded and the
706 /// result should be "rounded down" to the previous boundary.
707 /// - When used as a width we're done once the minimum is reached, but
708 /// exceeding is not an issue. Rounding down is an issue since that will
709 /// result in writing fill characters. Therefore the result needs to be
710 /// "rounded up".
711 enum class __column_width_rounding { __down, __up };
712
713 # ifndef _LIBCPP_HAS_NO_UNICODE
714
715 namespace __detail {
716
717 /// Converts a code point to the column width.
718 ///
719 /// The estimations are conforming to [format.string.general]/11
720 ///
721 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
722 /// character.
__column_width_3(uint32_t __c)723 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept {
724 _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values");
725
726 // clang-format off
727 return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
728 (__c >= 0x2329 && (__c <= 0x232a ||
729 (__c >= 0x2e80 && (__c <= 0x303e ||
730 (__c >= 0x3040 && (__c <= 0xa4cf ||
731 (__c >= 0xac00 && (__c <= 0xd7a3 ||
732 (__c >= 0xf900 && (__c <= 0xfaff ||
733 (__c >= 0xfe10 && (__c <= 0xfe19 ||
734 (__c >= 0xfe30 && (__c <= 0xfe6f ||
735 (__c >= 0xff00 && (__c <= 0xff60 ||
736 (__c >= 0xffe0 && (__c <= 0xffe6
737 ))))))))))))))))))));
738 // clang-format on
739 }
740
741 /// @overload
742 ///
743 /// This version expects a value greater than or equal to 0x1'0000, which is a
744 /// 4-byte UTF-8 character.
__column_width_4(uint32_t __c)745 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept {
746 _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values");
747
748 // clang-format off
749 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
750 (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
751 (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
752 (__c >= 0x3'0000 && (__c <= 0x3'fffd
753 ))))))));
754 // clang-format on
755 }
756
757 /// @overload
758 ///
759 /// The general case, accepting all values.
__column_width(uint32_t __c)760 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept {
761 if (__c < 0x10000)
762 return __detail::__column_width_3(__c);
763
764 return __detail::__column_width_4(__c);
765 }
766
767 template <class _CharT>
__estimate_column_width_grapheme_clustering(const _CharT * __first,const _CharT * __last,size_t __maximum,__column_width_rounding __rounding)768 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering(
769 const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
770 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
771
772 __column_width_result<_CharT> __result{0, __first};
773 while (__result.__last_ != __last && __result.__width_ <= __maximum) {
774 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
775 int __width = __detail::__column_width(__cluster.__code_point_);
776
777 // When the next entry would exceed the maximum width the previous width
778 // might be returned. For example when a width of 100 is requested the
779 // returned width might be 99, since the next code point has an estimated
780 // column width of 2. This depends on the rounding flag.
781 // When the maximum is exceeded the loop will abort the next iteration.
782 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
783 return __result;
784
785 __result.__width_ += __width;
786 __result.__last_ = __cluster.__last_;
787 }
788
789 return __result;
790 }
791
792 } // namespace __detail
793
794 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
795 // Depending on format the relation between the number of code units stored and
796 // the number of output columns differs. The first relation is the number of
797 // code units forming a code point. (The text assumes the code units are
798 // unsigned.)
799 // - UTF-8 The number of code units is between one and four. The first 127
800 // Unicode code points match the ASCII character set. When the highest bit is
801 // set it means the code point has more than one code unit.
802 // - UTF-16: The number of code units is between 1 and 2. When the first
803 // code unit is in the range [0xd800,0xdfff) it means the code point uses two
804 // code units.
805 // - UTF-32: The number of code units is always one.
806 //
807 // The code point to the number of columns is specified in
808 // [format.string.std]/11. This list might change in the future.
809 //
810 // Another thing to be taken into account is Grapheme clustering. This means
811 // that in some cases multiple code points are combined one element in the
812 // output. For example:
813 // - an ASCII character with a combined diacritical mark
814 // - an emoji with a skin tone modifier
815 // - a group of combined people emoji to create a family
816 // - a combination of flag emoji
817 //
818 // See also:
819 // - [format.string.general]/11
820 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
821 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
822
__is_ascii(char32_t __c)823 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
824
825 /// Determines the number of output columns needed to render the input.
826 ///
827 /// \note When the scanner encounters malformed Unicode it acts as-if every
828 /// code unit is a one column code point. Typically a terminal uses the same
829 /// strategy and replaces every malformed code unit with a one column
830 /// replacement character.
831 ///
832 /// \param __first Points to the first element of the input range.
833 /// \param __last Points beyond the last element of the input range.
834 /// \param __maximum The maximum number of output columns. The returned number
835 /// of estimated output columns will not exceed this value.
836 /// \param __rounding Selects the rounding method.
837 /// \c __down result.__width_ <= __maximum
838 /// \c __up result.__width_ <= __maximum + 1
839 template <class _CharT>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding __rounding)840 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width(
841 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
842 // The width estimation is done in two steps:
843 // - Quickly process for the ASCII part. ASCII has the following properties
844 // - One code unit is one code point
845 // - Every code point has an estimated width of one
846 // - When needed it will a Unicode Grapheme clustering algorithm to find
847 // the proper place for truncation.
848
849 if (__str.empty() || __maximum == 0)
850 return {0, __str.begin()};
851
852 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
853 // character they might be part of an extended grapheme cluster. For example:
854 // an ASCII letter and a COMBINING ACUTE ACCENT
855 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
856 // need to scan one code unit beyond the requested precision. When this code
857 // unit is non-ASCII we omit the current code unit and let the Grapheme
858 // clustering algorithm do its work.
859 const _CharT* __it = __str.begin();
860 if (__is_ascii(*__it)) {
861 do {
862 --__maximum;
863 ++__it;
864 if (__it == __str.end())
865 return {__str.size(), __str.end()};
866
867 if (__maximum == 0) {
868 if (__is_ascii(*__it))
869 return {static_cast<size_t>(__it - __str.begin()), __it};
870
871 break;
872 }
873 } while (__is_ascii(*__it));
874 --__it;
875 ++__maximum;
876 }
877
878 ptrdiff_t __ascii_size = __it - __str.begin();
879 __column_width_result __result =
880 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
881
882 __result.__width_ += __ascii_size;
883 return __result;
884 }
885 # else // !defined(_LIBCPP_HAS_NO_UNICODE)
886 template <class _CharT>
887 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding)888 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
889 // When Unicode isn't supported assume ASCII and every code unit is one code
890 // point. In ASCII the estimated column width is always one. Thus there's no
891 // need for rounding.
892 size_t __width_ = _VSTD::min(__str.size(), __maximum);
893 return {__width_, __str.begin() + __width_};
894 }
895
896 # endif // !defined(_LIBCPP_HAS_NO_UNICODE)
897
898 } // namespace __format_spec
899
900 #endif //_LIBCPP_STD_VER > 17
901
902 _LIBCPP_END_NAMESPACE_STD
903
904 _LIBCPP_POP_MACROS
905
906 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
907