1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
10 
11 #include "parser.h"
12 
13 #include "src/__support/arg_list.h"
14 
15 #include "src/__support/CPP/Bit.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/ctype_utils.h"
18 #include "src/__support/str_to_integer.h"
19 
20 namespace __llvm_libc {
21 namespace printf_core {
22 
23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
25 #else
26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
28 
29 FormatSection Parser::get_next_section() {
30   FormatSection section;
31   section.raw_string = str + cur_pos;
32   size_t starting_pos = cur_pos;
33   if (str[cur_pos] == '%') {
34     // format section
35     section.has_conv = true;
36 
37     ++cur_pos;
38     [[maybe_unused]] size_t conv_index = 0;
39 
40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
41     conv_index = parse_index(&cur_pos);
42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
43 
44     section.flags = parse_flags(&cur_pos);
45 
46     // handle width
47     section.min_width = 0;
48     if (str[cur_pos] == '*') {
49       ++cur_pos;
50 
51       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
52     } else if (internal::isdigit(str[cur_pos])) {
53       char *int_end;
54       section.min_width =
55           internal::strtointeger<int>(str + cur_pos, &int_end, 10);
56       cur_pos = int_end - str;
57     }
58     if (section.min_width < 0) {
59       section.min_width = -section.min_width;
60       section.flags =
61           static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
62     }
63 
64     // handle precision
65     section.precision = -1; // negative precisions are ignored.
66     if (str[cur_pos] == '.') {
67       ++cur_pos;
68       section.precision = 0; // if there's a . but no specified precision, the
69                              // precision is implicitly 0.
70       if (str[cur_pos] == '*') {
71         ++cur_pos;
72 
73         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
74 
75       } else if (internal::isdigit(str[cur_pos])) {
76         char *int_end;
77         section.precision =
78             internal::strtointeger<int>(str + cur_pos, &int_end, 10);
79         cur_pos = int_end - str;
80       }
81     }
82 
83     LengthModifier lm = parse_length_modifier(&cur_pos);
84 
85     section.length_modifier = lm;
86     section.conv_name = str[cur_pos];
87     switch (str[cur_pos]) {
88     case ('%'):
89       break;
90     case ('c'):
91       section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
92       break;
93     case ('d'):
94     case ('i'):
95     case ('o'):
96     case ('x'):
97     case ('X'):
98     case ('u'):
99       switch (lm) {
100       case (LengthModifier::hh):
101       case (LengthModifier::h):
102       case (LengthModifier::none):
103         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
104         break;
105       case (LengthModifier::l):
106         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
107         break;
108       case (LengthModifier::ll):
109       case (LengthModifier::L): // This isn't in the standard, but is in other
110                                 // libc implementations.
111         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
112         break;
113       case (LengthModifier::j):
114         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
115         break;
116       case (LengthModifier::z):
117         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
118         break;
119       case (LengthModifier::t):
120         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
121         break;
122       }
123       break;
124       // TODO(michaelrj): add a flag to disable float point values here
125     case ('f'):
126     case ('F'):
127     case ('e'):
128     case ('E'):
129     case ('a'):
130     case ('A'):
131     case ('g'):
132     case ('G'):
133       if (lm != LengthModifier::L)
134         section.conv_val_raw =
135             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
136       else
137         section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>(
138             GET_ARG_VAL_SIMPLEST(long double, conv_index));
139       break;
140     case ('n'):
141     case ('p'):
142     case ('s'):
143       section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
144       break;
145     default:
146       // if the conversion is undefined, change this to a raw section.
147       section.has_conv = false;
148       break;
149     }
150     ++cur_pos;
151   } else {
152     // raw section
153     section.has_conv = false;
154     while (str[cur_pos] != '%' && str[cur_pos] != '\0')
155       ++cur_pos;
156   }
157   section.raw_len = cur_pos - starting_pos;
158   return section;
159 }
160 
161 FormatFlags Parser::parse_flags(size_t *local_pos) {
162   bool found_flag = true;
163   FormatFlags flags = FormatFlags(0);
164   while (found_flag) {
165     switch (str[*local_pos]) {
166     case '-':
167       flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
168       break;
169     case '+':
170       flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
171       break;
172     case ' ':
173       flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
174       break;
175     case '#':
176       flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
177       break;
178     case '0':
179       flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
180       break;
181     default:
182       found_flag = false;
183     }
184     if (found_flag)
185       ++*local_pos;
186   }
187   return flags;
188 }
189 
190 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
191   switch (str[*local_pos]) {
192   case ('l'):
193     if (str[*local_pos + 1] == 'l') {
194       *local_pos += 2;
195       return LengthModifier::ll;
196     } else {
197       ++*local_pos;
198       return LengthModifier::l;
199     }
200   case ('h'):
201     if (str[*local_pos + 1] == 'h') {
202       *local_pos += 2;
203       return LengthModifier::hh;
204     } else {
205       ++*local_pos;
206       return LengthModifier::h;
207     }
208   case ('L'):
209     ++*local_pos;
210     return LengthModifier::L;
211   case ('j'):
212     ++*local_pos;
213     return LengthModifier::j;
214   case ('z'):
215     ++*local_pos;
216     return LengthModifier::z;
217   case ('t'):
218     ++*local_pos;
219     return LengthModifier::t;
220   default:
221     return LengthModifier::none;
222   }
223 }
224 
225 //----------------------------------------------------
226 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
227 //----------------------------------------------------
228 
229 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
230 
231 size_t Parser::parse_index(size_t *local_pos) {
232   if (internal::isdigit(str[*local_pos])) {
233     char *int_end;
234     size_t index =
235         internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
236     if (int_end[0] != '$')
237       return 0;
238     *local_pos = 1 + int_end - str;
239     return index;
240   }
241   return 0;
242 }
243 
244 Parser::TypeDesc Parser::get_type_desc(size_t index) {
245   // index mode is assumed, and the indicies start at 1, so an index
246   // of 0 is invalid.
247   size_t local_pos = 0;
248 
249   while (str[local_pos]) {
250     if (str[local_pos] == '%') {
251       ++local_pos;
252 
253       size_t conv_index = parse_index(&local_pos);
254 
255       // the flags aren't relevant for this situation, but I need to skip past
256       // them so they're parsed but the result is discarded.
257       parse_flags(&local_pos);
258 
259       // handle width
260       if (str[local_pos] == '*') {
261         ++local_pos;
262 
263         size_t width_index = parse_index(&local_pos);
264         set_type_desc(width_index, TYPE_DESC<int>);
265         if (width_index == index)
266           return TYPE_DESC<int>;
267 
268       } else if (internal::isdigit(str[local_pos])) {
269         while (internal::isdigit(str[local_pos]))
270           ++local_pos;
271       }
272 
273       // handle precision
274       if (str[local_pos] == '.') {
275         ++local_pos;
276         if (str[local_pos] == '*') {
277           ++local_pos;
278 
279           size_t precision_index = parse_index(&local_pos);
280           set_type_desc(precision_index, TYPE_DESC<int>);
281           if (precision_index == index)
282             return TYPE_DESC<int>;
283 
284         } else if (internal::isdigit(str[local_pos])) {
285           while (internal::isdigit(str[local_pos]))
286             ++local_pos;
287         }
288       }
289 
290       LengthModifier lm = parse_length_modifier(&local_pos);
291 
292       // if we don't have an index for this conversion, then its position is
293       // unknown and all this information is irrelevant. The rest of this logic
294       // has been for skipping past this conversion properly to avoid
295       // weirdness with %%.
296       if (conv_index == 0) {
297         ++local_pos;
298         continue;
299       }
300 
301       TypeDesc conv_size = TYPE_DESC<void>;
302       switch (str[local_pos]) {
303       case ('%'):
304         conv_size = TYPE_DESC<void>;
305         break;
306       case ('c'):
307         conv_size = TYPE_DESC<int>;
308         break;
309       case ('d'):
310       case ('i'):
311       case ('o'):
312       case ('x'):
313       case ('X'):
314       case ('u'):
315         switch (lm) {
316         case (LengthModifier::hh):
317         case (LengthModifier::h):
318         case (LengthModifier::none):
319           conv_size = TYPE_DESC<int>;
320           break;
321         case (LengthModifier::l):
322           conv_size = TYPE_DESC<long>;
323           break;
324         case (LengthModifier::ll):
325         case (LengthModifier::L): // This isn't in the standard, but is in other
326                                   // libc implementations.
327           conv_size = TYPE_DESC<long long>;
328           break;
329         case (LengthModifier::j):
330           conv_size = TYPE_DESC<intmax_t>;
331           break;
332         case (LengthModifier::z):
333           conv_size = TYPE_DESC<size_t>;
334           break;
335         case (LengthModifier::t):
336           conv_size = TYPE_DESC<ptrdiff_t>;
337           break;
338         }
339         break;
340       // TODO(michaelrj): add a flag to disable float point values here
341       case ('f'):
342       case ('F'):
343       case ('e'):
344       case ('E'):
345       case ('a'):
346       case ('A'):
347       case ('g'):
348       case ('G'):
349         if (lm != LengthModifier::L)
350           conv_size = TYPE_DESC<double>;
351         else
352           conv_size = TYPE_DESC<long double>;
353         break;
354       case ('n'):
355       case ('p'):
356       case ('s'):
357         conv_size = TYPE_DESC<void *>;
358         break;
359       default:
360         conv_size = TYPE_DESC<int>;
361         break;
362       }
363 
364       set_type_desc(conv_index, conv_size);
365       if (conv_index == index)
366         return conv_size;
367     }
368     ++local_pos;
369   }
370 
371   // If there is no size for the requested index, then just guess that it's an
372   // int.
373   return TYPE_DESC<int>;
374 }
375 
376 void Parser::args_to_index(size_t index) {
377   if (args_index > index) {
378     args_index = 1;
379     args_cur = args_start;
380   }
381 
382   while (args_index < index) {
383     Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
384     if (args_index <= DESC_ARR_LEN)
385       cur_type_desc = desc_arr[args_index - 1];
386 
387     if (cur_type_desc == TYPE_DESC<void>)
388       cur_type_desc = get_type_desc(args_index);
389 
390     if (cur_type_desc == TYPE_DESC<uint32_t>)
391       args_cur.next_var<uint32_t>();
392     else if (cur_type_desc == TYPE_DESC<uint64_t>)
393       args_cur.next_var<uint64_t>();
394     // TODO(michaelrj): add a flag to disable float point values here
395     // Floating point numbers are stored separately from the other arguments.
396     else if (cur_type_desc == TYPE_DESC<double>)
397       args_cur.next_var<double>();
398     else if (cur_type_desc == TYPE_DESC<long double>)
399       args_cur.next_var<long double>();
400     // pointers may be stored separately from normal values.
401     else if (cur_type_desc == TYPE_DESC<void *>)
402       args_cur.next_var<void *>();
403     else
404       args_cur.next_var<uint32_t>();
405 
406     ++args_index;
407   }
408 }
409 
410 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
411 
412 } // namespace printf_core
413 } // namespace __llvm_libc
414