1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
10
11 #include "parser.h"
12
13 #include "src/__support/arg_list.h"
14
15 #include "src/__support/CPP/Bit.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/ctype_utils.h"
18 #include "src/__support/str_to_integer.h"
19
20 namespace __llvm_libc {
21 namespace printf_core {
22
23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
25 #else
26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
28
get_next_section()29 FormatSection Parser::get_next_section() {
30 FormatSection section;
31 section.raw_string = str + cur_pos;
32 size_t starting_pos = cur_pos;
33 if (str[cur_pos] == '%') {
34 // format section
35 section.has_conv = true;
36
37 ++cur_pos;
38 [[maybe_unused]] size_t conv_index = 0;
39
40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
41 conv_index = parse_index(&cur_pos);
42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
43
44 section.flags = parse_flags(&cur_pos);
45
46 // handle width
47 section.min_width = 0;
48 if (str[cur_pos] == '*') {
49 ++cur_pos;
50
51 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
52 } else if (internal::isdigit(str[cur_pos])) {
53 char *int_end;
54 section.min_width =
55 internal::strtointeger<int>(str + cur_pos, &int_end, 10);
56 cur_pos = int_end - str;
57 }
58 if (section.min_width < 0) {
59 section.min_width = -section.min_width;
60 section.flags =
61 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
62 }
63
64 // handle precision
65 section.precision = -1; // negative precisions are ignored.
66 if (str[cur_pos] == '.') {
67 ++cur_pos;
68 section.precision = 0; // if there's a . but no specified precision, the
69 // precision is implicitly 0.
70 if (str[cur_pos] == '*') {
71 ++cur_pos;
72
73 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
74
75 } else if (internal::isdigit(str[cur_pos])) {
76 char *int_end;
77 section.precision =
78 internal::strtointeger<int>(str + cur_pos, &int_end, 10);
79 cur_pos = int_end - str;
80 }
81 }
82
83 LengthModifier lm = parse_length_modifier(&cur_pos);
84
85 section.length_modifier = lm;
86 section.conv_name = str[cur_pos];
87 switch (str[cur_pos]) {
88 case ('%'):
89 break;
90 case ('c'):
91 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
92 break;
93 case ('d'):
94 case ('i'):
95 case ('o'):
96 case ('x'):
97 case ('X'):
98 case ('u'):
99 switch (lm) {
100 case (LengthModifier::hh):
101 case (LengthModifier::h):
102 case (LengthModifier::none):
103 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
104 break;
105 case (LengthModifier::l):
106 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
107 break;
108 case (LengthModifier::ll):
109 case (LengthModifier::L): // This isn't in the standard, but is in other
110 // libc implementations.
111 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
112 break;
113 case (LengthModifier::j):
114 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
115 break;
116 case (LengthModifier::z):
117 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
118 break;
119 case (LengthModifier::t):
120 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
121 break;
122 }
123 break;
124 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
125 case ('f'):
126 case ('F'):
127 case ('e'):
128 case ('E'):
129 case ('a'):
130 case ('A'):
131 case ('g'):
132 case ('G'):
133 if (lm != LengthModifier::L)
134 section.conv_val_raw =
135 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
136 else
137 section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>(
138 GET_ARG_VAL_SIMPLEST(long double, conv_index));
139 break;
140 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
141 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
142 case ('n'):
143 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
144 case ('p'):
145 case ('s'):
146 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
147 break;
148 default:
149 // if the conversion is undefined, change this to a raw section.
150 section.has_conv = false;
151 break;
152 }
153 ++cur_pos;
154 } else {
155 // raw section
156 section.has_conv = false;
157 while (str[cur_pos] != '%' && str[cur_pos] != '\0')
158 ++cur_pos;
159 }
160 section.raw_len = cur_pos - starting_pos;
161 return section;
162 }
163
parse_flags(size_t * local_pos)164 FormatFlags Parser::parse_flags(size_t *local_pos) {
165 bool found_flag = true;
166 FormatFlags flags = FormatFlags(0);
167 while (found_flag) {
168 switch (str[*local_pos]) {
169 case '-':
170 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
171 break;
172 case '+':
173 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
174 break;
175 case ' ':
176 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
177 break;
178 case '#':
179 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
180 break;
181 case '0':
182 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
183 break;
184 default:
185 found_flag = false;
186 }
187 if (found_flag)
188 ++*local_pos;
189 }
190 return flags;
191 }
192
parse_length_modifier(size_t * local_pos)193 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
194 switch (str[*local_pos]) {
195 case ('l'):
196 if (str[*local_pos + 1] == 'l') {
197 *local_pos += 2;
198 return LengthModifier::ll;
199 } else {
200 ++*local_pos;
201 return LengthModifier::l;
202 }
203 case ('h'):
204 if (str[*local_pos + 1] == 'h') {
205 *local_pos += 2;
206 return LengthModifier::hh;
207 } else {
208 ++*local_pos;
209 return LengthModifier::h;
210 }
211 case ('L'):
212 ++*local_pos;
213 return LengthModifier::L;
214 case ('j'):
215 ++*local_pos;
216 return LengthModifier::j;
217 case ('z'):
218 ++*local_pos;
219 return LengthModifier::z;
220 case ('t'):
221 ++*local_pos;
222 return LengthModifier::t;
223 default:
224 return LengthModifier::none;
225 }
226 }
227
228 //----------------------------------------------------
229 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
230 //----------------------------------------------------
231
232 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
233
parse_index(size_t * local_pos)234 size_t Parser::parse_index(size_t *local_pos) {
235 if (internal::isdigit(str[*local_pos])) {
236 char *int_end;
237 size_t index =
238 internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
239 if (int_end[0] != '$')
240 return 0;
241 *local_pos = 1 + int_end - str;
242 return index;
243 }
244 return 0;
245 }
246
get_type_desc(size_t index)247 Parser::TypeDesc Parser::get_type_desc(size_t index) {
248 // index mode is assumed, and the indicies start at 1, so an index
249 // of 0 is invalid.
250 size_t local_pos = 0;
251
252 while (str[local_pos]) {
253 if (str[local_pos] == '%') {
254 ++local_pos;
255
256 size_t conv_index = parse_index(&local_pos);
257
258 // the flags aren't relevant for this situation, but I need to skip past
259 // them so they're parsed but the result is discarded.
260 parse_flags(&local_pos);
261
262 // handle width
263 if (str[local_pos] == '*') {
264 ++local_pos;
265
266 size_t width_index = parse_index(&local_pos);
267 set_type_desc(width_index, TYPE_DESC<int>);
268 if (width_index == index)
269 return TYPE_DESC<int>;
270
271 } else if (internal::isdigit(str[local_pos])) {
272 while (internal::isdigit(str[local_pos]))
273 ++local_pos;
274 }
275
276 // handle precision
277 if (str[local_pos] == '.') {
278 ++local_pos;
279 if (str[local_pos] == '*') {
280 ++local_pos;
281
282 size_t precision_index = parse_index(&local_pos);
283 set_type_desc(precision_index, TYPE_DESC<int>);
284 if (precision_index == index)
285 return TYPE_DESC<int>;
286
287 } else if (internal::isdigit(str[local_pos])) {
288 while (internal::isdigit(str[local_pos]))
289 ++local_pos;
290 }
291 }
292
293 LengthModifier lm = parse_length_modifier(&local_pos);
294
295 // if we don't have an index for this conversion, then its position is
296 // unknown and all this information is irrelevant. The rest of this logic
297 // has been for skipping past this conversion properly to avoid
298 // weirdness with %%.
299 if (conv_index == 0) {
300 ++local_pos;
301 continue;
302 }
303
304 TypeDesc conv_size = TYPE_DESC<void>;
305 switch (str[local_pos]) {
306 case ('%'):
307 conv_size = TYPE_DESC<void>;
308 break;
309 case ('c'):
310 conv_size = TYPE_DESC<int>;
311 break;
312 case ('d'):
313 case ('i'):
314 case ('o'):
315 case ('x'):
316 case ('X'):
317 case ('u'):
318 switch (lm) {
319 case (LengthModifier::hh):
320 case (LengthModifier::h):
321 case (LengthModifier::none):
322 conv_size = TYPE_DESC<int>;
323 break;
324 case (LengthModifier::l):
325 conv_size = TYPE_DESC<long>;
326 break;
327 case (LengthModifier::ll):
328 case (LengthModifier::L): // This isn't in the standard, but is in other
329 // libc implementations.
330 conv_size = TYPE_DESC<long long>;
331 break;
332 case (LengthModifier::j):
333 conv_size = TYPE_DESC<intmax_t>;
334 break;
335 case (LengthModifier::z):
336 conv_size = TYPE_DESC<size_t>;
337 break;
338 case (LengthModifier::t):
339 conv_size = TYPE_DESC<ptrdiff_t>;
340 break;
341 }
342 break;
343 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
344 case ('f'):
345 case ('F'):
346 case ('e'):
347 case ('E'):
348 case ('a'):
349 case ('A'):
350 case ('g'):
351 case ('G'):
352 if (lm != LengthModifier::L)
353 conv_size = TYPE_DESC<double>;
354 else
355 conv_size = TYPE_DESC<long double>;
356 break;
357 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
358 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
359 case ('n'):
360 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
361 case ('p'):
362 case ('s'):
363 conv_size = TYPE_DESC<void *>;
364 break;
365 default:
366 conv_size = TYPE_DESC<int>;
367 break;
368 }
369
370 set_type_desc(conv_index, conv_size);
371 if (conv_index == index)
372 return conv_size;
373 }
374 ++local_pos;
375 }
376
377 // If there is no size for the requested index, then just guess that it's an
378 // int.
379 return TYPE_DESC<int>;
380 }
381
args_to_index(size_t index)382 void Parser::args_to_index(size_t index) {
383 if (args_index > index) {
384 args_index = 1;
385 args_cur = args_start;
386 }
387
388 while (args_index < index) {
389 Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
390 if (args_index <= DESC_ARR_LEN)
391 cur_type_desc = desc_arr[args_index - 1];
392
393 if (cur_type_desc == TYPE_DESC<void>)
394 cur_type_desc = get_type_desc(args_index);
395
396 if (cur_type_desc == TYPE_DESC<uint32_t>)
397 args_cur.next_var<uint32_t>();
398 else if (cur_type_desc == TYPE_DESC<uint64_t>)
399 args_cur.next_var<uint64_t>();
400 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
401 // Floating point numbers are stored separately from the other arguments.
402 else if (cur_type_desc == TYPE_DESC<double>)
403 args_cur.next_var<double>();
404 else if (cur_type_desc == TYPE_DESC<long double>)
405 args_cur.next_var<long double>();
406 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
407 // pointers may be stored separately from normal values.
408 else if (cur_type_desc == TYPE_DESC<void *>)
409 args_cur.next_var<void *>();
410 else
411 args_cur.next_var<uint32_t>();
412
413 ++args_index;
414 }
415 }
416
417 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
418
419 } // namespace printf_core
420 } // namespace __llvm_libc
421