1 //===-- Format string parser implementation for printf ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "parser.h" 10 11 #include "src/__support/arg_list.h" 12 13 #include "src/__support/CPP/Bit.h" 14 #include "src/__support/ctype_utils.h" 15 #include "src/__support/str_to_integer.h" 16 17 namespace __llvm_libc { 18 namespace printf_core { 19 20 #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. 21 22 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 23 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index) 24 #else 25 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>() 26 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 27 28 FormatSection Parser::get_next_section() { 29 FormatSection section; 30 section.raw_string = str + cur_pos; 31 size_t starting_pos = cur_pos; 32 if (str[cur_pos] == '%') { 33 // format section 34 section.has_conv = true; 35 36 ++cur_pos; 37 [[maybe_unused]] size_t conv_index = 0; 38 39 section.flags = parse_flags(&cur_pos); 40 41 // handle width 42 section.min_width = 0; 43 if (str[cur_pos] == '*') { 44 ++cur_pos; 45 46 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 47 } else if (internal::isdigit(str[cur_pos])) { 48 char *int_end; 49 section.min_width = 50 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 51 cur_pos = int_end - str; 52 } 53 if (section.min_width < 0) { 54 section.min_width = -section.min_width; 55 section.flags = 56 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED); 57 } 58 59 // handle precision 60 section.precision = -1; // negative precisions are ignored. 61 if (str[cur_pos] == '.') { 62 ++cur_pos; 63 section.precision = 0; // if there's a . but no specified precision, the 64 // precision is implicitly 0. 65 if (str[cur_pos] == '*') { 66 ++cur_pos; 67 68 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 69 70 } else if (internal::isdigit(str[cur_pos])) { 71 char *int_end; 72 section.precision = 73 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 74 cur_pos = int_end - str; 75 } 76 } 77 78 LengthModifier lm = parse_length_modifier(&cur_pos); 79 80 section.length_modifier = lm; 81 section.conv_name = str[cur_pos]; 82 switch (str[cur_pos]) { 83 case ('%'): 84 break; 85 case ('c'): 86 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 87 break; 88 case ('d'): 89 case ('i'): 90 case ('o'): 91 case ('x'): 92 case ('X'): 93 case ('u'): 94 switch (lm) { 95 case (LengthModifier::hh): 96 case (LengthModifier::h): 97 case (LengthModifier::none): 98 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 99 break; 100 case (LengthModifier::l): 101 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); 102 break; 103 case (LengthModifier::ll): 104 case (LengthModifier::L): // This isn't in the standard, but is in other 105 // libc implementations. 106 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); 107 break; 108 case (LengthModifier::j): 109 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); 110 break; 111 case (LengthModifier::z): 112 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); 113 break; 114 case (LengthModifier::t): 115 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); 116 break; 117 } 118 break; 119 case ('f'): 120 case ('F'): 121 case ('e'): 122 case ('E'): 123 case ('a'): 124 case ('A'): 125 case ('g'): 126 case ('G'): 127 if (lm != LengthModifier::L) 128 section.conv_val_raw = 129 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index)); 130 else 131 section.conv_val_raw = bit_cast<__uint128_t>( 132 GET_ARG_VAL_SIMPLEST(long double, conv_index)); 133 break; 134 case ('n'): 135 case ('p'): 136 case ('s'): 137 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); 138 break; 139 default: 140 // if the conversion is undefined, change this to a raw section. 141 section.has_conv = false; 142 break; 143 } 144 ++cur_pos; 145 } else { 146 // raw section 147 section.has_conv = false; 148 while (str[cur_pos] != '%' && str[cur_pos] != '\0') 149 ++cur_pos; 150 } 151 section.raw_len = cur_pos - starting_pos; 152 return section; 153 } 154 155 FormatFlags Parser::parse_flags(size_t *local_pos) { 156 bool found_flag = true; 157 FormatFlags flags = FormatFlags(0); 158 while (found_flag) { 159 switch (str[*local_pos]) { 160 case '-': 161 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 162 break; 163 case '+': 164 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 165 break; 166 case ' ': 167 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 168 break; 169 case '#': 170 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 171 break; 172 case '0': 173 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 174 break; 175 default: 176 found_flag = false; 177 } 178 if (found_flag) 179 ++*local_pos; 180 } 181 return flags; 182 } 183 184 LengthModifier Parser::parse_length_modifier(size_t *local_pos) { 185 switch (str[*local_pos]) { 186 case ('l'): 187 if (str[*local_pos + 1] == 'l') { 188 *local_pos += 2; 189 return LengthModifier::ll; 190 } else { 191 ++*local_pos; 192 return LengthModifier::l; 193 } 194 case ('h'): 195 if (str[cur_pos + 1] == 'h') { 196 *local_pos += 2; 197 return LengthModifier::hh; 198 } else { 199 ++*local_pos; 200 return LengthModifier::h; 201 } 202 case ('L'): 203 ++*local_pos; 204 return LengthModifier::L; 205 case ('j'): 206 ++*local_pos; 207 return LengthModifier::j; 208 case ('z'): 209 ++*local_pos; 210 return LengthModifier::z; 211 case ('t'): 212 ++*local_pos; 213 return LengthModifier::t; 214 default: 215 return LengthModifier::none; 216 } 217 } 218 219 } // namespace printf_core 220 } // namespace __llvm_libc 221