1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
10 
11 #include "parser.h"
12 
13 #include "src/__support/arg_list.h"
14 
15 #include "src/__support/CPP/Bit.h"
16 #include "src/__support/ctype_utils.h"
17 #include "src/__support/str_to_integer.h"
18 
19 namespace __llvm_libc {
20 namespace printf_core {
21 
22 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
23 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
24 #else
25 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
26 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
27 
28 FormatSection Parser::get_next_section() {
29   FormatSection section;
30   section.raw_string = str + cur_pos;
31   size_t starting_pos = cur_pos;
32   if (str[cur_pos] == '%') {
33     // format section
34     section.has_conv = true;
35 
36     ++cur_pos;
37     [[maybe_unused]] size_t conv_index = 0;
38 
39 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
40     conv_index = parse_index(&cur_pos);
41 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
42 
43     section.flags = parse_flags(&cur_pos);
44 
45     // handle width
46     section.min_width = 0;
47     if (str[cur_pos] == '*') {
48       ++cur_pos;
49 
50       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
51     } else if (internal::isdigit(str[cur_pos])) {
52       char *int_end;
53       section.min_width =
54           internal::strtointeger<int>(str + cur_pos, &int_end, 10);
55       cur_pos = int_end - str;
56     }
57     if (section.min_width < 0) {
58       section.min_width = -section.min_width;
59       section.flags =
60           static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
61     }
62 
63     // handle precision
64     section.precision = -1; // negative precisions are ignored.
65     if (str[cur_pos] == '.') {
66       ++cur_pos;
67       section.precision = 0; // if there's a . but no specified precision, the
68                              // precision is implicitly 0.
69       if (str[cur_pos] == '*') {
70         ++cur_pos;
71 
72         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
73 
74       } else if (internal::isdigit(str[cur_pos])) {
75         char *int_end;
76         section.precision =
77             internal::strtointeger<int>(str + cur_pos, &int_end, 10);
78         cur_pos = int_end - str;
79       }
80     }
81 
82     LengthModifier lm = parse_length_modifier(&cur_pos);
83 
84     section.length_modifier = lm;
85     section.conv_name = str[cur_pos];
86     switch (str[cur_pos]) {
87     case ('%'):
88       break;
89     case ('c'):
90       section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
91       break;
92     case ('d'):
93     case ('i'):
94     case ('o'):
95     case ('x'):
96     case ('X'):
97     case ('u'):
98       switch (lm) {
99       case (LengthModifier::hh):
100       case (LengthModifier::h):
101       case (LengthModifier::none):
102         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
103         break;
104       case (LengthModifier::l):
105         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
106         break;
107       case (LengthModifier::ll):
108       case (LengthModifier::L): // This isn't in the standard, but is in other
109                                 // libc implementations.
110         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
111         break;
112       case (LengthModifier::j):
113         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
114         break;
115       case (LengthModifier::z):
116         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
117         break;
118       case (LengthModifier::t):
119         section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
120         break;
121       }
122       break;
123     case ('f'):
124     case ('F'):
125     case ('e'):
126     case ('E'):
127     case ('a'):
128     case ('A'):
129     case ('g'):
130     case ('G'):
131       if (lm != LengthModifier::L)
132         section.conv_val_raw =
133             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
134       else
135         section.conv_val_raw = bit_cast<__uint128_t>(
136             GET_ARG_VAL_SIMPLEST(long double, conv_index));
137       break;
138     case ('n'):
139     case ('p'):
140     case ('s'):
141       section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
142       break;
143     default:
144       // if the conversion is undefined, change this to a raw section.
145       section.has_conv = false;
146       break;
147     }
148     ++cur_pos;
149   } else {
150     // raw section
151     section.has_conv = false;
152     while (str[cur_pos] != '%' && str[cur_pos] != '\0')
153       ++cur_pos;
154   }
155   section.raw_len = cur_pos - starting_pos;
156   return section;
157 }
158 
159 FormatFlags Parser::parse_flags(size_t *local_pos) {
160   bool found_flag = true;
161   FormatFlags flags = FormatFlags(0);
162   while (found_flag) {
163     switch (str[*local_pos]) {
164     case '-':
165       flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
166       break;
167     case '+':
168       flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
169       break;
170     case ' ':
171       flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
172       break;
173     case '#':
174       flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
175       break;
176     case '0':
177       flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
178       break;
179     default:
180       found_flag = false;
181     }
182     if (found_flag)
183       ++*local_pos;
184   }
185   return flags;
186 }
187 
188 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
189   switch (str[*local_pos]) {
190   case ('l'):
191     if (str[*local_pos + 1] == 'l') {
192       *local_pos += 2;
193       return LengthModifier::ll;
194     } else {
195       ++*local_pos;
196       return LengthModifier::l;
197     }
198   case ('h'):
199     if (str[*local_pos + 1] == 'h') {
200       *local_pos += 2;
201       return LengthModifier::hh;
202     } else {
203       ++*local_pos;
204       return LengthModifier::h;
205     }
206   case ('L'):
207     ++*local_pos;
208     return LengthModifier::L;
209   case ('j'):
210     ++*local_pos;
211     return LengthModifier::j;
212   case ('z'):
213     ++*local_pos;
214     return LengthModifier::z;
215   case ('t'):
216     ++*local_pos;
217     return LengthModifier::t;
218   default:
219     return LengthModifier::none;
220   }
221 }
222 
223 //----------------------------------------------------
224 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
225 //----------------------------------------------------
226 
227 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
228 
229 size_t Parser::parse_index(size_t *local_pos) {
230   if (internal::isdigit(str[*local_pos])) {
231     char *int_end;
232     size_t index =
233         internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
234     if (int_end[0] != '$')
235       return 0;
236     *local_pos = 1 + int_end - str;
237     return index;
238   }
239   return 0;
240 }
241 
242 Parser::TypeDesc Parser::get_type_desc(size_t index) {
243   // index mode is assumed, and the indicies start at 1, so an index
244   // of 0 is invalid.
245   size_t local_pos = 0;
246 
247   while (str[local_pos]) {
248     if (str[local_pos] == '%') {
249       ++local_pos;
250 
251       size_t conv_index = parse_index(&local_pos);
252 
253       // the flags aren't relevant for this situation, but I need to skip past
254       // them so they're parsed but the result is discarded.
255       parse_flags(&local_pos);
256 
257       // handle width
258       if (str[local_pos] == '*') {
259         ++local_pos;
260 
261         size_t width_index = parse_index(&local_pos);
262         set_type_desc(width_index, TYPE_DESC<int>);
263         if (width_index == index)
264           return TYPE_DESC<int>;
265 
266       } else if (internal::isdigit(str[local_pos])) {
267         while (internal::isdigit(str[local_pos]))
268           ++local_pos;
269       }
270 
271       // handle precision
272       if (str[local_pos] == '.') {
273         ++local_pos;
274         if (str[local_pos] == '*') {
275           ++local_pos;
276 
277           size_t precision_index = parse_index(&local_pos);
278           set_type_desc(precision_index, TYPE_DESC<int>);
279           if (precision_index == index)
280             return TYPE_DESC<int>;
281 
282         } else if (internal::isdigit(str[local_pos])) {
283           while (internal::isdigit(str[local_pos]))
284             ++local_pos;
285         }
286       }
287 
288       LengthModifier lm = parse_length_modifier(&local_pos);
289 
290       // if we don't have an index for this conversion, then its position is
291       // unknown and all this information is irrelevant. The rest of this logic
292       // has been for skipping past this conversion properly to avoid
293       // weirdness with %%.
294       if (conv_index == 0) {
295         ++local_pos;
296         continue;
297       }
298 
299       TypeDesc conv_size = TYPE_DESC<void>;
300       switch (str[local_pos]) {
301       case ('%'):
302         conv_size = TYPE_DESC<void>;
303         break;
304       case ('c'):
305         conv_size = TYPE_DESC<int>;
306         break;
307       case ('d'):
308       case ('i'):
309       case ('o'):
310       case ('x'):
311       case ('X'):
312       case ('u'):
313         switch (lm) {
314         case (LengthModifier::hh):
315         case (LengthModifier::h):
316         case (LengthModifier::none):
317           conv_size = TYPE_DESC<int>;
318           break;
319         case (LengthModifier::l):
320           conv_size = TYPE_DESC<long>;
321           break;
322         case (LengthModifier::ll):
323         case (LengthModifier::L): // This isn't in the standard, but is in other
324                                   // libc implementations.
325           conv_size = TYPE_DESC<long long>;
326           break;
327         case (LengthModifier::j):
328           conv_size = TYPE_DESC<intmax_t>;
329           break;
330         case (LengthModifier::z):
331           conv_size = TYPE_DESC<size_t>;
332           break;
333         case (LengthModifier::t):
334           conv_size = TYPE_DESC<ptrdiff_t>;
335           break;
336         }
337         break;
338       case ('f'):
339       case ('F'):
340       case ('e'):
341       case ('E'):
342       case ('a'):
343       case ('A'):
344       case ('g'):
345       case ('G'):
346         if (lm != LengthModifier::L)
347           conv_size = TYPE_DESC<double>;
348         else
349           conv_size = TYPE_DESC<long double>;
350         break;
351       case ('n'):
352       case ('p'):
353       case ('s'):
354         conv_size = TYPE_DESC<void *>;
355         break;
356       default:
357         conv_size = TYPE_DESC<int>;
358         break;
359       }
360 
361       set_type_desc(conv_index, conv_size);
362       if (conv_index == index)
363         return conv_size;
364     }
365     ++local_pos;
366   }
367 
368   // If there is no size for the requested index, then just guess that it's an
369   // int.
370   return TYPE_DESC<int>;
371 }
372 
373 void Parser::args_to_index(size_t index) {
374   if (args_index > index) {
375     args_index = 1;
376     args_cur = args_start;
377   }
378 
379   while (args_index < index) {
380     Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
381     if (args_index <= DESC_ARR_LEN)
382       cur_type_desc = desc_arr[args_index - 1];
383 
384     if (cur_type_desc == TYPE_DESC<void>)
385       cur_type_desc = get_type_desc(args_index);
386 
387     if (cur_type_desc == TYPE_DESC<uint32_t>)
388       args_cur.next_var<uint32_t>();
389     else if (cur_type_desc == TYPE_DESC<uint64_t>)
390       args_cur.next_var<uint64_t>();
391     // Floating point numbers are stored separately from the other arguments.
392     else if (cur_type_desc == TYPE_DESC<double>)
393       args_cur.next_var<double>();
394     else if (cur_type_desc == TYPE_DESC<long double>)
395       args_cur.next_var<long double>();
396     // pointers may be stored separately from normal values.
397     else if (cur_type_desc == TYPE_DESC<void *>)
398       args_cur.next_var<void *>();
399     else
400       args_cur.next_var<uint32_t>();
401 
402     ++args_index;
403   }
404 }
405 
406 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
407 
408 } // namespace printf_core
409 } // namespace __llvm_libc
410