1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
10 
11 #include "parser.h"
12 
13 #include "src/__support/arg_list.h"
14 
15 #include "src/__support/CPP/Bit.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/ctype_utils.h"
18 #include "src/__support/str_to_integer.h"
19 
20 namespace __llvm_libc {
21 namespace printf_core {
22 
23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
25 #else
26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
28 
29 FormatSection Parser::get_next_section() {
30   FormatSection section;
31   section.raw_string = str + cur_pos;
32   size_t starting_pos = cur_pos;
33   if (str[cur_pos] == '%') {
34     // format section
35     section.has_conv = true;
36 
37     ++cur_pos;
38     [[maybe_unused]] size_t conv_index = 0;
39 
40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
41     conv_index = parse_index(&cur_pos);
42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
43 
44     section.flags = parse_flags(&cur_pos);
45 
46     // handle width
47     section.min_width = 0;
48     if (str[cur_pos] == '*') {
49       ++cur_pos;
50 
51       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
52     } else if (internal::isdigit(str[cur_pos])) {
53       char *int_end;
54       section.min_width =
55           internal::strtointeger<int>(str + cur_pos, &int_end, 10);
56       cur_pos = int_end - str;
57     }
58     if (section.min_width < 0) {
59       section.min_width = -section.min_width;
60       section.flags =
61           static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
62     }
63 
64     // handle precision
65     section.precision = -1; // negative precisions are ignored.
66     if (str[cur_pos] == '.') {
67       ++cur_pos;
68       section.precision = 0; // if there's a . but no specified precision, the
69                              // precision is implicitly 0.
70       if (str[cur_pos] == '*') {
71         ++cur_pos;
72 
73         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
74 
75       } else if (internal::isdigit(str[cur_pos])) {
76         char *int_end;
77         section.precision =
78             internal::strtointeger<int>(str + cur_pos, &int_end, 10);
79         cur_pos = int_end - str;
80       }
81     }
82 
83     LengthModifier lm = parse_length_modifier(&cur_pos);
84 
85     section.length_modifier = lm;
86     section.conv_name = str[cur_pos];
87     switch (str[cur_pos]) {
88     case ('%'):
89       break;
90     case ('c'):
91       section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
92       break;
93     case ('d'):
94     case ('i'):
95     case ('o'):
96     case ('x'):
97     case ('X'):
98     case ('u'):
99       switch (lm) {
100       case (LengthModifier::hh):
101       case (LengthModifier::h):
102       case (LengthModifier::none):
103         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
104         break;
105       case (LengthModifier::l):
106         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
107         break;
108       case (LengthModifier::ll):
109       case (LengthModifier::L): // This isn't in the standard, but is in other
110                                 // libc implementations.
111         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
112         break;
113       case (LengthModifier::j):
114         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
115         break;
116       case (LengthModifier::z):
117         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
118         break;
119       case (LengthModifier::t):
120         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
121         break;
122       }
123       break;
124       // TODO(michaelrj): add a flag to disable float point values here
125     case ('f'):
126     case ('F'):
127     case ('e'):
128     case ('E'):
129     case ('a'):
130     case ('A'):
131     case ('g'):
132     case ('G'):
133       if (lm != LengthModifier::L)
134         section.conv_val_raw =
135             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
136       else
137         section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>(
138             GET_ARG_VAL_SIMPLEST(long double, conv_index));
139       break;
140 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
141     case ('n'):
142 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
143     case ('p'):
144     case ('s'):
145       section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
146       break;
147     default:
148       // if the conversion is undefined, change this to a raw section.
149       section.has_conv = false;
150       break;
151     }
152     ++cur_pos;
153   } else {
154     // raw section
155     section.has_conv = false;
156     while (str[cur_pos] != '%' && str[cur_pos] != '\0')
157       ++cur_pos;
158   }
159   section.raw_len = cur_pos - starting_pos;
160   return section;
161 }
162 
163 FormatFlags Parser::parse_flags(size_t *local_pos) {
164   bool found_flag = true;
165   FormatFlags flags = FormatFlags(0);
166   while (found_flag) {
167     switch (str[*local_pos]) {
168     case '-':
169       flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
170       break;
171     case '+':
172       flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
173       break;
174     case ' ':
175       flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
176       break;
177     case '#':
178       flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
179       break;
180     case '0':
181       flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
182       break;
183     default:
184       found_flag = false;
185     }
186     if (found_flag)
187       ++*local_pos;
188   }
189   return flags;
190 }
191 
192 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
193   switch (str[*local_pos]) {
194   case ('l'):
195     if (str[*local_pos + 1] == 'l') {
196       *local_pos += 2;
197       return LengthModifier::ll;
198     } else {
199       ++*local_pos;
200       return LengthModifier::l;
201     }
202   case ('h'):
203     if (str[*local_pos + 1] == 'h') {
204       *local_pos += 2;
205       return LengthModifier::hh;
206     } else {
207       ++*local_pos;
208       return LengthModifier::h;
209     }
210   case ('L'):
211     ++*local_pos;
212     return LengthModifier::L;
213   case ('j'):
214     ++*local_pos;
215     return LengthModifier::j;
216   case ('z'):
217     ++*local_pos;
218     return LengthModifier::z;
219   case ('t'):
220     ++*local_pos;
221     return LengthModifier::t;
222   default:
223     return LengthModifier::none;
224   }
225 }
226 
227 //----------------------------------------------------
228 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
229 //----------------------------------------------------
230 
231 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
232 
233 size_t Parser::parse_index(size_t *local_pos) {
234   if (internal::isdigit(str[*local_pos])) {
235     char *int_end;
236     size_t index =
237         internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
238     if (int_end[0] != '$')
239       return 0;
240     *local_pos = 1 + int_end - str;
241     return index;
242   }
243   return 0;
244 }
245 
246 Parser::TypeDesc Parser::get_type_desc(size_t index) {
247   // index mode is assumed, and the indicies start at 1, so an index
248   // of 0 is invalid.
249   size_t local_pos = 0;
250 
251   while (str[local_pos]) {
252     if (str[local_pos] == '%') {
253       ++local_pos;
254 
255       size_t conv_index = parse_index(&local_pos);
256 
257       // the flags aren't relevant for this situation, but I need to skip past
258       // them so they're parsed but the result is discarded.
259       parse_flags(&local_pos);
260 
261       // handle width
262       if (str[local_pos] == '*') {
263         ++local_pos;
264 
265         size_t width_index = parse_index(&local_pos);
266         set_type_desc(width_index, TYPE_DESC<int>);
267         if (width_index == index)
268           return TYPE_DESC<int>;
269 
270       } else if (internal::isdigit(str[local_pos])) {
271         while (internal::isdigit(str[local_pos]))
272           ++local_pos;
273       }
274 
275       // handle precision
276       if (str[local_pos] == '.') {
277         ++local_pos;
278         if (str[local_pos] == '*') {
279           ++local_pos;
280 
281           size_t precision_index = parse_index(&local_pos);
282           set_type_desc(precision_index, TYPE_DESC<int>);
283           if (precision_index == index)
284             return TYPE_DESC<int>;
285 
286         } else if (internal::isdigit(str[local_pos])) {
287           while (internal::isdigit(str[local_pos]))
288             ++local_pos;
289         }
290       }
291 
292       LengthModifier lm = parse_length_modifier(&local_pos);
293 
294       // if we don't have an index for this conversion, then its position is
295       // unknown and all this information is irrelevant. The rest of this logic
296       // has been for skipping past this conversion properly to avoid
297       // weirdness with %%.
298       if (conv_index == 0) {
299         ++local_pos;
300         continue;
301       }
302 
303       TypeDesc conv_size = TYPE_DESC<void>;
304       switch (str[local_pos]) {
305       case ('%'):
306         conv_size = TYPE_DESC<void>;
307         break;
308       case ('c'):
309         conv_size = TYPE_DESC<int>;
310         break;
311       case ('d'):
312       case ('i'):
313       case ('o'):
314       case ('x'):
315       case ('X'):
316       case ('u'):
317         switch (lm) {
318         case (LengthModifier::hh):
319         case (LengthModifier::h):
320         case (LengthModifier::none):
321           conv_size = TYPE_DESC<int>;
322           break;
323         case (LengthModifier::l):
324           conv_size = TYPE_DESC<long>;
325           break;
326         case (LengthModifier::ll):
327         case (LengthModifier::L): // This isn't in the standard, but is in other
328                                   // libc implementations.
329           conv_size = TYPE_DESC<long long>;
330           break;
331         case (LengthModifier::j):
332           conv_size = TYPE_DESC<intmax_t>;
333           break;
334         case (LengthModifier::z):
335           conv_size = TYPE_DESC<size_t>;
336           break;
337         case (LengthModifier::t):
338           conv_size = TYPE_DESC<ptrdiff_t>;
339           break;
340         }
341         break;
342       // TODO(michaelrj): add a flag to disable float point values here
343       case ('f'):
344       case ('F'):
345       case ('e'):
346       case ('E'):
347       case ('a'):
348       case ('A'):
349       case ('g'):
350       case ('G'):
351         if (lm != LengthModifier::L)
352           conv_size = TYPE_DESC<double>;
353         else
354           conv_size = TYPE_DESC<long double>;
355         break;
356 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
357       case ('n'):
358 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT
359       case ('p'):
360       case ('s'):
361         conv_size = TYPE_DESC<void *>;
362         break;
363       default:
364         conv_size = TYPE_DESC<int>;
365         break;
366       }
367 
368       set_type_desc(conv_index, conv_size);
369       if (conv_index == index)
370         return conv_size;
371     }
372     ++local_pos;
373   }
374 
375   // If there is no size for the requested index, then just guess that it's an
376   // int.
377   return TYPE_DESC<int>;
378 }
379 
380 void Parser::args_to_index(size_t index) {
381   if (args_index > index) {
382     args_index = 1;
383     args_cur = args_start;
384   }
385 
386   while (args_index < index) {
387     Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
388     if (args_index <= DESC_ARR_LEN)
389       cur_type_desc = desc_arr[args_index - 1];
390 
391     if (cur_type_desc == TYPE_DESC<void>)
392       cur_type_desc = get_type_desc(args_index);
393 
394     if (cur_type_desc == TYPE_DESC<uint32_t>)
395       args_cur.next_var<uint32_t>();
396     else if (cur_type_desc == TYPE_DESC<uint64_t>)
397       args_cur.next_var<uint64_t>();
398     // TODO(michaelrj): add a flag to disable float point values here
399     // Floating point numbers are stored separately from the other arguments.
400     else if (cur_type_desc == TYPE_DESC<double>)
401       args_cur.next_var<double>();
402     else if (cur_type_desc == TYPE_DESC<long double>)
403       args_cur.next_var<long double>();
404     // pointers may be stored separately from normal values.
405     else if (cur_type_desc == TYPE_DESC<void *>)
406       args_cur.next_var<void *>();
407     else
408       args_cur.next_var<uint32_t>();
409 
410     ++args_index;
411   }
412 }
413 
414 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
415 
416 } // namespace printf_core
417 } // namespace __llvm_libc
418