1 //===-- Format string parser implementation for printf ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. 10 11 #include "parser.h" 12 13 #include "src/__support/arg_list.h" 14 15 #include "src/__support/CPP/Bit.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/ctype_utils.h" 18 #include "src/__support/str_to_integer.h" 19 20 namespace __llvm_libc { 21 namespace printf_core { 22 23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index) 25 #else 26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>() 27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 28 29 FormatSection Parser::get_next_section() { 30 FormatSection section; 31 section.raw_string = str + cur_pos; 32 size_t starting_pos = cur_pos; 33 if (str[cur_pos] == '%') { 34 // format section 35 section.has_conv = true; 36 37 ++cur_pos; 38 [[maybe_unused]] size_t conv_index = 0; 39 40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 41 conv_index = parse_index(&cur_pos); 42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 43 44 section.flags = parse_flags(&cur_pos); 45 46 // handle width 47 section.min_width = 0; 48 if (str[cur_pos] == '*') { 49 ++cur_pos; 50 51 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 52 } else if (internal::isdigit(str[cur_pos])) { 53 char *int_end; 54 section.min_width = 55 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 56 cur_pos = int_end - str; 57 } 58 if (section.min_width < 0) { 59 section.min_width = -section.min_width; 60 section.flags = 61 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED); 62 } 63 64 // handle precision 65 section.precision = -1; // negative precisions are ignored. 66 if (str[cur_pos] == '.') { 67 ++cur_pos; 68 section.precision = 0; // if there's a . but no specified precision, the 69 // precision is implicitly 0. 70 if (str[cur_pos] == '*') { 71 ++cur_pos; 72 73 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 74 75 } else if (internal::isdigit(str[cur_pos])) { 76 char *int_end; 77 section.precision = 78 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 79 cur_pos = int_end - str; 80 } 81 } 82 83 LengthModifier lm = parse_length_modifier(&cur_pos); 84 85 section.length_modifier = lm; 86 section.conv_name = str[cur_pos]; 87 switch (str[cur_pos]) { 88 case ('%'): 89 break; 90 case ('c'): 91 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 92 break; 93 case ('d'): 94 case ('i'): 95 case ('o'): 96 case ('x'): 97 case ('X'): 98 case ('u'): 99 switch (lm) { 100 case (LengthModifier::hh): 101 case (LengthModifier::h): 102 case (LengthModifier::none): 103 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 104 break; 105 case (LengthModifier::l): 106 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); 107 break; 108 case (LengthModifier::ll): 109 case (LengthModifier::L): // This isn't in the standard, but is in other 110 // libc implementations. 111 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); 112 break; 113 case (LengthModifier::j): 114 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); 115 break; 116 case (LengthModifier::z): 117 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); 118 break; 119 case (LengthModifier::t): 120 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); 121 break; 122 } 123 break; 124 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT 125 case ('f'): 126 case ('F'): 127 case ('e'): 128 case ('E'): 129 case ('a'): 130 case ('A'): 131 case ('g'): 132 case ('G'): 133 if (lm != LengthModifier::L) 134 section.conv_val_raw = 135 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index)); 136 else 137 section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>( 138 GET_ARG_VAL_SIMPLEST(long double, conv_index)); 139 break; 140 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT 141 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 142 case ('n'): 143 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 144 case ('p'): 145 case ('s'): 146 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); 147 break; 148 default: 149 // if the conversion is undefined, change this to a raw section. 150 section.has_conv = false; 151 break; 152 } 153 ++cur_pos; 154 } else { 155 // raw section 156 section.has_conv = false; 157 while (str[cur_pos] != '%' && str[cur_pos] != '\0') 158 ++cur_pos; 159 } 160 section.raw_len = cur_pos - starting_pos; 161 return section; 162 } 163 164 FormatFlags Parser::parse_flags(size_t *local_pos) { 165 bool found_flag = true; 166 FormatFlags flags = FormatFlags(0); 167 while (found_flag) { 168 switch (str[*local_pos]) { 169 case '-': 170 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 171 break; 172 case '+': 173 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 174 break; 175 case ' ': 176 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 177 break; 178 case '#': 179 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 180 break; 181 case '0': 182 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 183 break; 184 default: 185 found_flag = false; 186 } 187 if (found_flag) 188 ++*local_pos; 189 } 190 return flags; 191 } 192 193 LengthModifier Parser::parse_length_modifier(size_t *local_pos) { 194 switch (str[*local_pos]) { 195 case ('l'): 196 if (str[*local_pos + 1] == 'l') { 197 *local_pos += 2; 198 return LengthModifier::ll; 199 } else { 200 ++*local_pos; 201 return LengthModifier::l; 202 } 203 case ('h'): 204 if (str[*local_pos + 1] == 'h') { 205 *local_pos += 2; 206 return LengthModifier::hh; 207 } else { 208 ++*local_pos; 209 return LengthModifier::h; 210 } 211 case ('L'): 212 ++*local_pos; 213 return LengthModifier::L; 214 case ('j'): 215 ++*local_pos; 216 return LengthModifier::j; 217 case ('z'): 218 ++*local_pos; 219 return LengthModifier::z; 220 case ('t'): 221 ++*local_pos; 222 return LengthModifier::t; 223 default: 224 return LengthModifier::none; 225 } 226 } 227 228 //---------------------------------------------------- 229 // INDEX MODE ONLY FUNCTIONS AFTER HERE: 230 //---------------------------------------------------- 231 232 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 233 234 size_t Parser::parse_index(size_t *local_pos) { 235 if (internal::isdigit(str[*local_pos])) { 236 char *int_end; 237 size_t index = 238 internal::strtointeger<size_t>(str + *local_pos, &int_end, 10); 239 if (int_end[0] != '$') 240 return 0; 241 *local_pos = 1 + int_end - str; 242 return index; 243 } 244 return 0; 245 } 246 247 Parser::TypeDesc Parser::get_type_desc(size_t index) { 248 // index mode is assumed, and the indicies start at 1, so an index 249 // of 0 is invalid. 250 size_t local_pos = 0; 251 252 while (str[local_pos]) { 253 if (str[local_pos] == '%') { 254 ++local_pos; 255 256 size_t conv_index = parse_index(&local_pos); 257 258 // the flags aren't relevant for this situation, but I need to skip past 259 // them so they're parsed but the result is discarded. 260 parse_flags(&local_pos); 261 262 // handle width 263 if (str[local_pos] == '*') { 264 ++local_pos; 265 266 size_t width_index = parse_index(&local_pos); 267 set_type_desc(width_index, TYPE_DESC<int>); 268 if (width_index == index) 269 return TYPE_DESC<int>; 270 271 } else if (internal::isdigit(str[local_pos])) { 272 while (internal::isdigit(str[local_pos])) 273 ++local_pos; 274 } 275 276 // handle precision 277 if (str[local_pos] == '.') { 278 ++local_pos; 279 if (str[local_pos] == '*') { 280 ++local_pos; 281 282 size_t precision_index = parse_index(&local_pos); 283 set_type_desc(precision_index, TYPE_DESC<int>); 284 if (precision_index == index) 285 return TYPE_DESC<int>; 286 287 } else if (internal::isdigit(str[local_pos])) { 288 while (internal::isdigit(str[local_pos])) 289 ++local_pos; 290 } 291 } 292 293 LengthModifier lm = parse_length_modifier(&local_pos); 294 295 // if we don't have an index for this conversion, then its position is 296 // unknown and all this information is irrelevant. The rest of this logic 297 // has been for skipping past this conversion properly to avoid 298 // weirdness with %%. 299 if (conv_index == 0) { 300 ++local_pos; 301 continue; 302 } 303 304 TypeDesc conv_size = TYPE_DESC<void>; 305 switch (str[local_pos]) { 306 case ('%'): 307 conv_size = TYPE_DESC<void>; 308 break; 309 case ('c'): 310 conv_size = TYPE_DESC<int>; 311 break; 312 case ('d'): 313 case ('i'): 314 case ('o'): 315 case ('x'): 316 case ('X'): 317 case ('u'): 318 switch (lm) { 319 case (LengthModifier::hh): 320 case (LengthModifier::h): 321 case (LengthModifier::none): 322 conv_size = TYPE_DESC<int>; 323 break; 324 case (LengthModifier::l): 325 conv_size = TYPE_DESC<long>; 326 break; 327 case (LengthModifier::ll): 328 case (LengthModifier::L): // This isn't in the standard, but is in other 329 // libc implementations. 330 conv_size = TYPE_DESC<long long>; 331 break; 332 case (LengthModifier::j): 333 conv_size = TYPE_DESC<intmax_t>; 334 break; 335 case (LengthModifier::z): 336 conv_size = TYPE_DESC<size_t>; 337 break; 338 case (LengthModifier::t): 339 conv_size = TYPE_DESC<ptrdiff_t>; 340 break; 341 } 342 break; 343 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT 344 case ('f'): 345 case ('F'): 346 case ('e'): 347 case ('E'): 348 case ('a'): 349 case ('A'): 350 case ('g'): 351 case ('G'): 352 if (lm != LengthModifier::L) 353 conv_size = TYPE_DESC<double>; 354 else 355 conv_size = TYPE_DESC<long double>; 356 break; 357 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT 358 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 359 case ('n'): 360 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 361 case ('p'): 362 case ('s'): 363 conv_size = TYPE_DESC<void *>; 364 break; 365 default: 366 conv_size = TYPE_DESC<int>; 367 break; 368 } 369 370 set_type_desc(conv_index, conv_size); 371 if (conv_index == index) 372 return conv_size; 373 } 374 ++local_pos; 375 } 376 377 // If there is no size for the requested index, then just guess that it's an 378 // int. 379 return TYPE_DESC<int>; 380 } 381 382 void Parser::args_to_index(size_t index) { 383 if (args_index > index) { 384 args_index = 1; 385 args_cur = args_start; 386 } 387 388 while (args_index < index) { 389 Parser::TypeDesc cur_type_desc = TYPE_DESC<void>; 390 if (args_index <= DESC_ARR_LEN) 391 cur_type_desc = desc_arr[args_index - 1]; 392 393 if (cur_type_desc == TYPE_DESC<void>) 394 cur_type_desc = get_type_desc(args_index); 395 396 if (cur_type_desc == TYPE_DESC<uint32_t>) 397 args_cur.next_var<uint32_t>(); 398 else if (cur_type_desc == TYPE_DESC<uint64_t>) 399 args_cur.next_var<uint64_t>(); 400 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT 401 // Floating point numbers are stored separately from the other arguments. 402 else if (cur_type_desc == TYPE_DESC<double>) 403 args_cur.next_var<double>(); 404 else if (cur_type_desc == TYPE_DESC<long double>) 405 args_cur.next_var<long double>(); 406 #endif // LLVM_LIBC_PRINTF_DISABLE_FLOAT 407 // pointers may be stored separately from normal values. 408 else if (cur_type_desc == TYPE_DESC<void *>) 409 args_cur.next_var<void *>(); 410 else 411 args_cur.next_var<uint32_t>(); 412 413 ++args_index; 414 } 415 } 416 417 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 418 419 } // namespace printf_core 420 } // namespace __llvm_libc 421