1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
10 
11 #include "parser.h"
12 
13 #include "src/__support/arg_list.h"
14 
15 #include "src/__support/CPP/Bit.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/ctype_utils.h"
18 #include "src/__support/str_to_integer.h"
19 
20 namespace __llvm_libc {
21 namespace printf_core {
22 
23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
25 #else
26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
28 
get_next_section()29 FormatSection Parser::get_next_section() {
30   FormatSection section;
31   section.raw_string = str + cur_pos;
32   size_t starting_pos = cur_pos;
33   if (str[cur_pos] == '%') {
34     // format section
35     section.has_conv = true;
36 
37     ++cur_pos;
38     [[maybe_unused]] size_t conv_index = 0;
39 
40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
41     conv_index = parse_index(&cur_pos);
42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
43 
44     section.flags = parse_flags(&cur_pos);
45 
46     // handle width
47     section.min_width = 0;
48     if (str[cur_pos] == '*') {
49       ++cur_pos;
50 
51       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
52     } else if (internal::isdigit(str[cur_pos])) {
53       char *int_end;
54       section.min_width =
55           internal::strtointeger<int>(str + cur_pos, &int_end, 10);
56       cur_pos = int_end - str;
57     }
58     if (section.min_width < 0) {
59       section.min_width = -section.min_width;
60       section.flags =
61           static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
62     }
63 
64     // handle precision
65     section.precision = -1; // negative precisions are ignored.
66     if (str[cur_pos] == '.') {
67       ++cur_pos;
68       section.precision = 0; // if there's a . but no specified precision, the
69                              // precision is implicitly 0.
70       if (str[cur_pos] == '*') {
71         ++cur_pos;
72 
73         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
74 
75       } else if (internal::isdigit(str[cur_pos])) {
76         char *int_end;
77         section.precision =
78             internal::strtointeger<int>(str + cur_pos, &int_end, 10);
79         cur_pos = int_end - str;
80       }
81     }
82 
83     LengthModifier lm = parse_length_modifier(&cur_pos);
84 
85     section.length_modifier = lm;
86     section.conv_name = str[cur_pos];
87     switch (str[cur_pos]) {
88     case ('%'):
89       break;
90     case ('c'):
91       section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
92       break;
93     case ('d'):
94     case ('i'):
95     case ('o'):
96     case ('x'):
97     case ('X'):
98     case ('u'):
99       switch (lm) {
100       case (LengthModifier::hh):
101       case (LengthModifier::h):
102       case (LengthModifier::none):
103         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
104         break;
105       case (LengthModifier::l):
106         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
107         break;
108       case (LengthModifier::ll):
109       case (LengthModifier::L): // This isn't in the standard, but is in other
110                                 // libc implementations.
111         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
112         break;
113       case (LengthModifier::j):
114         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
115         break;
116       case (LengthModifier::z):
117         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
118         break;
119       case (LengthModifier::t):
120         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
121         break;
122       }
123       break;
124 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
125     case ('f'):
126     case ('F'):
127     case ('e'):
128     case ('E'):
129     case ('a'):
130     case ('A'):
131     case ('g'):
132     case ('G'):
133       if (lm != LengthModifier::L)
134         section.conv_val_raw =
135             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
136       else
137         section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>(
138             GET_ARG_VAL_SIMPLEST(long double, conv_index));
139       break;
140 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
141 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
142     case ('n'):
143 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
144     case ('p'):
145     case ('s'):
146       section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
147       break;
148     default:
149       // if the conversion is undefined, change this to a raw section.
150       section.has_conv = false;
151       break;
152     }
153     ++cur_pos;
154   } else {
155     // raw section
156     section.has_conv = false;
157     while (str[cur_pos] != '%' && str[cur_pos] != '\0')
158       ++cur_pos;
159   }
160   section.raw_len = cur_pos - starting_pos;
161   return section;
162 }
163 
parse_flags(size_t * local_pos)164 FormatFlags Parser::parse_flags(size_t *local_pos) {
165   bool found_flag = true;
166   FormatFlags flags = FormatFlags(0);
167   while (found_flag) {
168     switch (str[*local_pos]) {
169     case '-':
170       flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
171       break;
172     case '+':
173       flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
174       break;
175     case ' ':
176       flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
177       break;
178     case '#':
179       flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
180       break;
181     case '0':
182       flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
183       break;
184     default:
185       found_flag = false;
186     }
187     if (found_flag)
188       ++*local_pos;
189   }
190   return flags;
191 }
192 
parse_length_modifier(size_t * local_pos)193 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
194   switch (str[*local_pos]) {
195   case ('l'):
196     if (str[*local_pos + 1] == 'l') {
197       *local_pos += 2;
198       return LengthModifier::ll;
199     } else {
200       ++*local_pos;
201       return LengthModifier::l;
202     }
203   case ('h'):
204     if (str[*local_pos + 1] == 'h') {
205       *local_pos += 2;
206       return LengthModifier::hh;
207     } else {
208       ++*local_pos;
209       return LengthModifier::h;
210     }
211   case ('L'):
212     ++*local_pos;
213     return LengthModifier::L;
214   case ('j'):
215     ++*local_pos;
216     return LengthModifier::j;
217   case ('z'):
218     ++*local_pos;
219     return LengthModifier::z;
220   case ('t'):
221     ++*local_pos;
222     return LengthModifier::t;
223   default:
224     return LengthModifier::none;
225   }
226 }
227 
228 //----------------------------------------------------
229 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
230 //----------------------------------------------------
231 
232 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
233 
parse_index(size_t * local_pos)234 size_t Parser::parse_index(size_t *local_pos) {
235   if (internal::isdigit(str[*local_pos])) {
236     char *int_end;
237     size_t index =
238         internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
239     if (int_end[0] != '$')
240       return 0;
241     *local_pos = 1 + int_end - str;
242     return index;
243   }
244   return 0;
245 }
246 
get_type_desc(size_t index)247 Parser::TypeDesc Parser::get_type_desc(size_t index) {
248   // index mode is assumed, and the indicies start at 1, so an index
249   // of 0 is invalid.
250   size_t local_pos = 0;
251 
252   while (str[local_pos]) {
253     if (str[local_pos] == '%') {
254       ++local_pos;
255 
256       size_t conv_index = parse_index(&local_pos);
257 
258       // the flags aren't relevant for this situation, but I need to skip past
259       // them so they're parsed but the result is discarded.
260       parse_flags(&local_pos);
261 
262       // handle width
263       if (str[local_pos] == '*') {
264         ++local_pos;
265 
266         size_t width_index = parse_index(&local_pos);
267         set_type_desc(width_index, TYPE_DESC<int>);
268         if (width_index == index)
269           return TYPE_DESC<int>;
270 
271       } else if (internal::isdigit(str[local_pos])) {
272         while (internal::isdigit(str[local_pos]))
273           ++local_pos;
274       }
275 
276       // handle precision
277       if (str[local_pos] == '.') {
278         ++local_pos;
279         if (str[local_pos] == '*') {
280           ++local_pos;
281 
282           size_t precision_index = parse_index(&local_pos);
283           set_type_desc(precision_index, TYPE_DESC<int>);
284           if (precision_index == index)
285             return TYPE_DESC<int>;
286 
287         } else if (internal::isdigit(str[local_pos])) {
288           while (internal::isdigit(str[local_pos]))
289             ++local_pos;
290         }
291       }
292 
293       LengthModifier lm = parse_length_modifier(&local_pos);
294 
295       // if we don't have an index for this conversion, then its position is
296       // unknown and all this information is irrelevant. The rest of this logic
297       // has been for skipping past this conversion properly to avoid
298       // weirdness with %%.
299       if (conv_index == 0) {
300         ++local_pos;
301         continue;
302       }
303 
304       TypeDesc conv_size = TYPE_DESC<void>;
305       switch (str[local_pos]) {
306       case ('%'):
307         conv_size = TYPE_DESC<void>;
308         break;
309       case ('c'):
310         conv_size = TYPE_DESC<int>;
311         break;
312       case ('d'):
313       case ('i'):
314       case ('o'):
315       case ('x'):
316       case ('X'):
317       case ('u'):
318         switch (lm) {
319         case (LengthModifier::hh):
320         case (LengthModifier::h):
321         case (LengthModifier::none):
322           conv_size = TYPE_DESC<int>;
323           break;
324         case (LengthModifier::l):
325           conv_size = TYPE_DESC<long>;
326           break;
327         case (LengthModifier::ll):
328         case (LengthModifier::L): // This isn't in the standard, but is in other
329                                   // libc implementations.
330           conv_size = TYPE_DESC<long long>;
331           break;
332         case (LengthModifier::j):
333           conv_size = TYPE_DESC<intmax_t>;
334           break;
335         case (LengthModifier::z):
336           conv_size = TYPE_DESC<size_t>;
337           break;
338         case (LengthModifier::t):
339           conv_size = TYPE_DESC<ptrdiff_t>;
340           break;
341         }
342         break;
343 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
344       case ('f'):
345       case ('F'):
346       case ('e'):
347       case ('E'):
348       case ('a'):
349       case ('A'):
350       case ('g'):
351       case ('G'):
352         if (lm != LengthModifier::L)
353           conv_size = TYPE_DESC<double>;
354         else
355           conv_size = TYPE_DESC<long double>;
356         break;
357 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
358 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
359       case ('n'):
360 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
361       case ('p'):
362       case ('s'):
363         conv_size = TYPE_DESC<void *>;
364         break;
365       default:
366         conv_size = TYPE_DESC<int>;
367         break;
368       }
369 
370       set_type_desc(conv_index, conv_size);
371       if (conv_index == index)
372         return conv_size;
373     }
374     ++local_pos;
375   }
376 
377   // If there is no size for the requested index, then just guess that it's an
378   // int.
379   return TYPE_DESC<int>;
380 }
381 
args_to_index(size_t index)382 void Parser::args_to_index(size_t index) {
383   if (args_index > index) {
384     args_index = 1;
385     args_cur = args_start;
386   }
387 
388   while (args_index < index) {
389     Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
390     if (args_index <= DESC_ARR_LEN)
391       cur_type_desc = desc_arr[args_index - 1];
392 
393     if (cur_type_desc == TYPE_DESC<void>)
394       cur_type_desc = get_type_desc(args_index);
395 
396     if (cur_type_desc == TYPE_DESC<uint32_t>)
397       args_cur.next_var<uint32_t>();
398     else if (cur_type_desc == TYPE_DESC<uint64_t>)
399       args_cur.next_var<uint64_t>();
400 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
401     // Floating point numbers are stored separately from the other arguments.
402     else if (cur_type_desc == TYPE_DESC<double>)
403       args_cur.next_var<double>();
404     else if (cur_type_desc == TYPE_DESC<long double>)
405       args_cur.next_var<long double>();
406 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT
407     // pointers may be stored separately from normal values.
408     else if (cur_type_desc == TYPE_DESC<void *>)
409       args_cur.next_var<void *>();
410     else
411       args_cur.next_var<uint32_t>();
412 
413     ++args_index;
414   }
415 }
416 
417 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
418 
419 } // namespace printf_core
420 } // namespace __llvm_libc
421