1 //===-- Format string parser implementation for printf ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. 10 11 #include "parser.h" 12 13 #include "src/__support/arg_list.h" 14 15 #include "src/__support/CPP/Bit.h" 16 #include "src/__support/ctype_utils.h" 17 #include "src/__support/str_to_integer.h" 18 19 namespace __llvm_libc { 20 namespace printf_core { 21 22 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 23 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index) 24 #else 25 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>() 26 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 27 28 FormatSection Parser::get_next_section() { 29 FormatSection section; 30 section.raw_string = str + cur_pos; 31 size_t starting_pos = cur_pos; 32 if (str[cur_pos] == '%') { 33 // format section 34 section.has_conv = true; 35 36 ++cur_pos; 37 [[maybe_unused]] size_t conv_index = 0; 38 39 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 40 conv_index = parse_index(&cur_pos); 41 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 42 43 section.flags = parse_flags(&cur_pos); 44 45 // handle width 46 section.min_width = 0; 47 if (str[cur_pos] == '*') { 48 ++cur_pos; 49 50 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 51 } else if (internal::isdigit(str[cur_pos])) { 52 char *int_end; 53 section.min_width = 54 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 55 cur_pos = int_end - str; 56 } 57 if (section.min_width < 0) { 58 section.min_width = -section.min_width; 59 section.flags = 60 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED); 61 } 62 63 // handle precision 64 section.precision = -1; // negative precisions are ignored. 65 if (str[cur_pos] == '.') { 66 ++cur_pos; 67 section.precision = 0; // if there's a . but no specified precision, the 68 // precision is implicitly 0. 69 if (str[cur_pos] == '*') { 70 ++cur_pos; 71 72 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 73 74 } else if (internal::isdigit(str[cur_pos])) { 75 char *int_end; 76 section.precision = 77 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 78 cur_pos = int_end - str; 79 } 80 } 81 82 LengthModifier lm = parse_length_modifier(&cur_pos); 83 84 section.length_modifier = lm; 85 section.conv_name = str[cur_pos]; 86 switch (str[cur_pos]) { 87 case ('%'): 88 break; 89 case ('c'): 90 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 91 break; 92 case ('d'): 93 case ('i'): 94 case ('o'): 95 case ('x'): 96 case ('X'): 97 case ('u'): 98 switch (lm) { 99 case (LengthModifier::hh): 100 case (LengthModifier::h): 101 case (LengthModifier::none): 102 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 103 break; 104 case (LengthModifier::l): 105 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); 106 break; 107 case (LengthModifier::ll): 108 case (LengthModifier::L): // This isn't in the standard, but is in other 109 // libc implementations. 110 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); 111 break; 112 case (LengthModifier::j): 113 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); 114 break; 115 case (LengthModifier::z): 116 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); 117 break; 118 case (LengthModifier::t): 119 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); 120 break; 121 } 122 break; 123 case ('f'): 124 case ('F'): 125 case ('e'): 126 case ('E'): 127 case ('a'): 128 case ('A'): 129 case ('g'): 130 case ('G'): 131 if (lm != LengthModifier::L) 132 section.conv_val_raw = 133 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index)); 134 else 135 section.conv_val_raw = bit_cast<__uint128_t>( 136 GET_ARG_VAL_SIMPLEST(long double, conv_index)); 137 break; 138 case ('n'): 139 case ('p'): 140 case ('s'): 141 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); 142 break; 143 default: 144 // if the conversion is undefined, change this to a raw section. 145 section.has_conv = false; 146 break; 147 } 148 ++cur_pos; 149 } else { 150 // raw section 151 section.has_conv = false; 152 while (str[cur_pos] != '%' && str[cur_pos] != '\0') 153 ++cur_pos; 154 } 155 section.raw_len = cur_pos - starting_pos; 156 return section; 157 } 158 159 FormatFlags Parser::parse_flags(size_t *local_pos) { 160 bool found_flag = true; 161 FormatFlags flags = FormatFlags(0); 162 while (found_flag) { 163 switch (str[*local_pos]) { 164 case '-': 165 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 166 break; 167 case '+': 168 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 169 break; 170 case ' ': 171 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 172 break; 173 case '#': 174 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 175 break; 176 case '0': 177 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 178 break; 179 default: 180 found_flag = false; 181 } 182 if (found_flag) 183 ++*local_pos; 184 } 185 return flags; 186 } 187 188 LengthModifier Parser::parse_length_modifier(size_t *local_pos) { 189 switch (str[*local_pos]) { 190 case ('l'): 191 if (str[*local_pos + 1] == 'l') { 192 *local_pos += 2; 193 return LengthModifier::ll; 194 } else { 195 ++*local_pos; 196 return LengthModifier::l; 197 } 198 case ('h'): 199 if (str[*local_pos + 1] == 'h') { 200 *local_pos += 2; 201 return LengthModifier::hh; 202 } else { 203 ++*local_pos; 204 return LengthModifier::h; 205 } 206 case ('L'): 207 ++*local_pos; 208 return LengthModifier::L; 209 case ('j'): 210 ++*local_pos; 211 return LengthModifier::j; 212 case ('z'): 213 ++*local_pos; 214 return LengthModifier::z; 215 case ('t'): 216 ++*local_pos; 217 return LengthModifier::t; 218 default: 219 return LengthModifier::none; 220 } 221 } 222 223 //---------------------------------------------------- 224 // INDEX MODE ONLY FUNCTIONS AFTER HERE: 225 //---------------------------------------------------- 226 227 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 228 229 size_t Parser::parse_index(size_t *local_pos) { 230 if (internal::isdigit(str[*local_pos])) { 231 char *int_end; 232 size_t index = 233 internal::strtointeger<size_t>(str + *local_pos, &int_end, 10); 234 if (int_end[0] != '$') 235 return 0; 236 *local_pos = 1 + int_end - str; 237 return index; 238 } 239 return 0; 240 } 241 242 Parser::TypeDesc Parser::get_type_desc(size_t index) { 243 // index mode is assumed, and the indicies start at 1, so an index 244 // of 0 is invalid. 245 size_t local_pos = 0; 246 247 while (str[local_pos]) { 248 if (str[local_pos] == '%') { 249 ++local_pos; 250 251 size_t conv_index = parse_index(&local_pos); 252 253 // the flags aren't relevant for this situation, but I need to skip past 254 // them so they're parsed but the result is discarded. 255 parse_flags(&local_pos); 256 257 // handle width 258 if (str[local_pos] == '*') { 259 ++local_pos; 260 261 size_t width_index = parse_index(&local_pos); 262 set_type_desc(width_index, TYPE_DESC<int>); 263 if (width_index == index) 264 return TYPE_DESC<int>; 265 266 } else if (internal::isdigit(str[local_pos])) { 267 while (internal::isdigit(str[local_pos])) 268 ++local_pos; 269 } 270 271 // handle precision 272 if (str[local_pos] == '.') { 273 ++local_pos; 274 if (str[local_pos] == '*') { 275 ++local_pos; 276 277 size_t precision_index = parse_index(&local_pos); 278 set_type_desc(precision_index, TYPE_DESC<int>); 279 if (precision_index == index) 280 return TYPE_DESC<int>; 281 282 } else if (internal::isdigit(str[local_pos])) { 283 while (internal::isdigit(str[local_pos])) 284 ++local_pos; 285 } 286 } 287 288 LengthModifier lm = parse_length_modifier(&local_pos); 289 290 // if we don't have an index for this conversion, then its position is 291 // unknown and all this information is irrelevant. The rest of this logic 292 // has been for skipping past this conversion properly to avoid 293 // weirdness with %%. 294 if (conv_index == 0) { 295 ++local_pos; 296 continue; 297 } 298 299 TypeDesc conv_size = TYPE_DESC<void>; 300 switch (str[local_pos]) { 301 case ('%'): 302 conv_size = TYPE_DESC<void>; 303 break; 304 case ('c'): 305 conv_size = TYPE_DESC<int>; 306 break; 307 case ('d'): 308 case ('i'): 309 case ('o'): 310 case ('x'): 311 case ('X'): 312 case ('u'): 313 switch (lm) { 314 case (LengthModifier::hh): 315 case (LengthModifier::h): 316 case (LengthModifier::none): 317 conv_size = TYPE_DESC<int>; 318 break; 319 case (LengthModifier::l): 320 conv_size = TYPE_DESC<long>; 321 break; 322 case (LengthModifier::ll): 323 case (LengthModifier::L): // This isn't in the standard, but is in other 324 // libc implementations. 325 conv_size = TYPE_DESC<long long>; 326 break; 327 case (LengthModifier::j): 328 conv_size = TYPE_DESC<intmax_t>; 329 break; 330 case (LengthModifier::z): 331 conv_size = TYPE_DESC<size_t>; 332 break; 333 case (LengthModifier::t): 334 conv_size = TYPE_DESC<ptrdiff_t>; 335 break; 336 } 337 break; 338 case ('f'): 339 case ('F'): 340 case ('e'): 341 case ('E'): 342 case ('a'): 343 case ('A'): 344 case ('g'): 345 case ('G'): 346 if (lm != LengthModifier::L) 347 conv_size = TYPE_DESC<double>; 348 else 349 conv_size = TYPE_DESC<long double>; 350 break; 351 case ('n'): 352 case ('p'): 353 case ('s'): 354 conv_size = TYPE_DESC<void *>; 355 break; 356 default: 357 conv_size = TYPE_DESC<int>; 358 break; 359 } 360 361 set_type_desc(conv_index, conv_size); 362 if (conv_index == index) 363 return conv_size; 364 } 365 ++local_pos; 366 } 367 368 // If there is no size for the requested index, then just guess that it's an 369 // int. 370 return TYPE_DESC<int>; 371 } 372 373 void Parser::args_to_index(size_t index) { 374 if (args_index > index) { 375 args_index = 1; 376 args_cur = args_start; 377 } 378 379 while (args_index < index) { 380 Parser::TypeDesc cur_type_desc = TYPE_DESC<void>; 381 if (args_index <= DESC_ARR_LEN) 382 cur_type_desc = desc_arr[args_index - 1]; 383 384 if (cur_type_desc == TYPE_DESC<void>) 385 cur_type_desc = get_type_desc(args_index); 386 387 if (cur_type_desc == TYPE_DESC<uint32_t>) 388 args_cur.next_var<uint32_t>(); 389 else if (cur_type_desc == TYPE_DESC<uint64_t>) 390 args_cur.next_var<uint64_t>(); 391 // Floating point numbers are stored separately from the other arguments. 392 else if (cur_type_desc == TYPE_DESC<double>) 393 args_cur.next_var<double>(); 394 else if (cur_type_desc == TYPE_DESC<long double>) 395 args_cur.next_var<long double>(); 396 // pointers may be stored separately from normal values. 397 else if (cur_type_desc == TYPE_DESC<void *>) 398 args_cur.next_var<void *>(); 399 else 400 args_cur.next_var<uint32_t>(); 401 402 ++args_index; 403 } 404 } 405 406 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 407 408 } // namespace printf_core 409 } // namespace __llvm_libc 410