1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "parser.h"
10 
11 #include "src/__support/arg_list.h"
12 
13 #include "src/__support/CPP/Bit.h"
14 #include "src/__support/ctype_utils.h"
15 #include "src/__support/str_to_integer.h"
16 
17 namespace __llvm_libc {
18 namespace printf_core {
19 
20 #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
21 
22 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
23 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
24 #else
25 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
26 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
27 
28 FormatSection Parser::get_next_section() {
29   FormatSection section;
30   section.raw_string = str + cur_pos;
31   size_t starting_pos = cur_pos;
32   if (str[cur_pos] == '%') {
33     // format section
34     section.has_conv = true;
35 
36     ++cur_pos;
37     [[maybe_unused]] size_t conv_index = 0;
38 
39     section.flags = parse_flags(&cur_pos);
40 
41     // handle width
42     section.min_width = 0;
43     if (str[cur_pos] == '*') {
44       ++cur_pos;
45 
46       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
47     } else if (internal::isdigit(str[cur_pos])) {
48       char *int_end;
49       section.min_width =
50           internal::strtointeger<int>(str + cur_pos, &int_end, 10);
51       cur_pos = int_end - str;
52     }
53     if (section.min_width < 0) {
54       section.min_width = -section.min_width;
55       section.flags =
56           static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
57     }
58 
59     // handle precision
60     section.precision = -1; // negative precisions are ignored.
61     if (str[cur_pos] == '.') {
62       ++cur_pos;
63       section.precision = 0; // if there's a . but no specified precision, the
64                              // precision is implicitly 0.
65       if (str[cur_pos] == '*') {
66         ++cur_pos;
67 
68         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
69 
70       } else if (internal::isdigit(str[cur_pos])) {
71         char *int_end;
72         section.precision =
73             internal::strtointeger<int>(str + cur_pos, &int_end, 10);
74         cur_pos = int_end - str;
75       }
76     }
77 
78     LengthModifier lm = parse_length_modifier(&cur_pos);
79 
80     section.length_modifier = lm;
81     section.conv_name = str[cur_pos];
82     switch (str[cur_pos]) {
83     case ('%'):
84       break;
85     case ('c'):
86       section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
87       break;
88     case ('d'):
89     case ('i'):
90     case ('o'):
91     case ('x'):
92     case ('X'):
93     case ('u'):
94       switch (lm) {
95       case (LengthModifier::hh):
96       case (LengthModifier::h):
97       case (LengthModifier::none):
98         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
99         break;
100       case (LengthModifier::l):
101         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
102         break;
103       case (LengthModifier::ll):
104       case (LengthModifier::L): // This isn't in the standard, but is in other
105                                 // libc implementations.
106         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
107         break;
108       case (LengthModifier::j):
109         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
110         break;
111       case (LengthModifier::z):
112         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
113         break;
114       case (LengthModifier::t):
115         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
116         break;
117       }
118       break;
119     case ('f'):
120     case ('F'):
121     case ('e'):
122     case ('E'):
123     case ('a'):
124     case ('A'):
125     case ('g'):
126     case ('G'):
127       if (lm != LengthModifier::L)
128         section.conv_val_raw =
129             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
130       else
131         section.conv_val_raw = bit_cast<__uint128_t>(
132             GET_ARG_VAL_SIMPLEST(long double, conv_index));
133       break;
134     case ('n'):
135     case ('p'):
136     case ('s'):
137       section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
138       break;
139     default:
140       // if the conversion is undefined, change this to a raw section.
141       section.has_conv = false;
142       break;
143     }
144     ++cur_pos;
145   } else {
146     // raw section
147     section.has_conv = false;
148     while (str[cur_pos] != '%' && str[cur_pos] != '\0')
149       ++cur_pos;
150   }
151   section.raw_len = cur_pos - starting_pos;
152   return section;
153 }
154 
155 FormatFlags Parser::parse_flags(size_t *local_pos) {
156   bool found_flag = true;
157   FormatFlags flags = FormatFlags(0);
158   while (found_flag) {
159     switch (str[*local_pos]) {
160     case '-':
161       flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
162       break;
163     case '+':
164       flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
165       break;
166     case ' ':
167       flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
168       break;
169     case '#':
170       flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
171       break;
172     case '0':
173       flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
174       break;
175     default:
176       found_flag = false;
177     }
178     if (found_flag)
179       ++*local_pos;
180   }
181   return flags;
182 }
183 
184 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
185   switch (str[*local_pos]) {
186   case ('l'):
187     if (str[*local_pos + 1] == 'l') {
188       *local_pos += 2;
189       return LengthModifier::ll;
190     } else {
191       ++*local_pos;
192       return LengthModifier::l;
193     }
194   case ('h'):
195     if (str[cur_pos + 1] == 'h') {
196       *local_pos += 2;
197       return LengthModifier::hh;
198     } else {
199       ++*local_pos;
200       return LengthModifier::h;
201     }
202   case ('L'):
203     ++*local_pos;
204     return LengthModifier::L;
205   case ('j'):
206     ++*local_pos;
207     return LengthModifier::j;
208   case ('z'):
209     ++*local_pos;
210     return LengthModifier::z;
211   case ('t'):
212     ++*local_pos;
213     return LengthModifier::t;
214   default:
215     return LengthModifier::none;
216   }
217 }
218 
219 } // namespace printf_core
220 } // namespace __llvm_libc
221