1 //===-- Format string parser implementation for printf ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. 10 11 #include "parser.h" 12 13 #include "src/__support/arg_list.h" 14 15 #include "src/__support/CPP/Bit.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/ctype_utils.h" 18 #include "src/__support/str_to_integer.h" 19 20 namespace __llvm_libc { 21 namespace printf_core { 22 23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index) 25 #else 26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>() 27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 28 29 FormatSection Parser::get_next_section() { 30 FormatSection section; 31 section.raw_string = str + cur_pos; 32 size_t starting_pos = cur_pos; 33 if (str[cur_pos] == '%') { 34 // format section 35 section.has_conv = true; 36 37 ++cur_pos; 38 [[maybe_unused]] size_t conv_index = 0; 39 40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 41 conv_index = parse_index(&cur_pos); 42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 43 44 section.flags = parse_flags(&cur_pos); 45 46 // handle width 47 section.min_width = 0; 48 if (str[cur_pos] == '*') { 49 ++cur_pos; 50 51 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 52 } else if (internal::isdigit(str[cur_pos])) { 53 char *int_end; 54 section.min_width = 55 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 56 cur_pos = int_end - str; 57 } 58 if (section.min_width < 0) { 59 section.min_width = -section.min_width; 60 section.flags = 61 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED); 62 } 63 64 // handle precision 65 section.precision = -1; // negative precisions are ignored. 66 if (str[cur_pos] == '.') { 67 ++cur_pos; 68 section.precision = 0; // if there's a . but no specified precision, the 69 // precision is implicitly 0. 70 if (str[cur_pos] == '*') { 71 ++cur_pos; 72 73 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 74 75 } else if (internal::isdigit(str[cur_pos])) { 76 char *int_end; 77 section.precision = 78 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 79 cur_pos = int_end - str; 80 } 81 } 82 83 LengthModifier lm = parse_length_modifier(&cur_pos); 84 85 section.length_modifier = lm; 86 section.conv_name = str[cur_pos]; 87 switch (str[cur_pos]) { 88 case ('%'): 89 break; 90 case ('c'): 91 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 92 break; 93 case ('d'): 94 case ('i'): 95 case ('o'): 96 case ('x'): 97 case ('X'): 98 case ('u'): 99 switch (lm) { 100 case (LengthModifier::hh): 101 case (LengthModifier::h): 102 case (LengthModifier::none): 103 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 104 break; 105 case (LengthModifier::l): 106 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); 107 break; 108 case (LengthModifier::ll): 109 case (LengthModifier::L): // This isn't in the standard, but is in other 110 // libc implementations. 111 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); 112 break; 113 case (LengthModifier::j): 114 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); 115 break; 116 case (LengthModifier::z): 117 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); 118 break; 119 case (LengthModifier::t): 120 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); 121 break; 122 } 123 break; 124 // TODO(michaelrj): add a flag to disable float point values here 125 case ('f'): 126 case ('F'): 127 case ('e'): 128 case ('E'): 129 case ('a'): 130 case ('A'): 131 case ('g'): 132 case ('G'): 133 if (lm != LengthModifier::L) 134 section.conv_val_raw = 135 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index)); 136 else 137 section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>( 138 GET_ARG_VAL_SIMPLEST(long double, conv_index)); 139 break; 140 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 141 case ('n'): 142 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 143 case ('p'): 144 case ('s'): 145 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); 146 break; 147 default: 148 // if the conversion is undefined, change this to a raw section. 149 section.has_conv = false; 150 break; 151 } 152 ++cur_pos; 153 } else { 154 // raw section 155 section.has_conv = false; 156 while (str[cur_pos] != '%' && str[cur_pos] != '\0') 157 ++cur_pos; 158 } 159 section.raw_len = cur_pos - starting_pos; 160 return section; 161 } 162 163 FormatFlags Parser::parse_flags(size_t *local_pos) { 164 bool found_flag = true; 165 FormatFlags flags = FormatFlags(0); 166 while (found_flag) { 167 switch (str[*local_pos]) { 168 case '-': 169 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 170 break; 171 case '+': 172 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 173 break; 174 case ' ': 175 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 176 break; 177 case '#': 178 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 179 break; 180 case '0': 181 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 182 break; 183 default: 184 found_flag = false; 185 } 186 if (found_flag) 187 ++*local_pos; 188 } 189 return flags; 190 } 191 192 LengthModifier Parser::parse_length_modifier(size_t *local_pos) { 193 switch (str[*local_pos]) { 194 case ('l'): 195 if (str[*local_pos + 1] == 'l') { 196 *local_pos += 2; 197 return LengthModifier::ll; 198 } else { 199 ++*local_pos; 200 return LengthModifier::l; 201 } 202 case ('h'): 203 if (str[*local_pos + 1] == 'h') { 204 *local_pos += 2; 205 return LengthModifier::hh; 206 } else { 207 ++*local_pos; 208 return LengthModifier::h; 209 } 210 case ('L'): 211 ++*local_pos; 212 return LengthModifier::L; 213 case ('j'): 214 ++*local_pos; 215 return LengthModifier::j; 216 case ('z'): 217 ++*local_pos; 218 return LengthModifier::z; 219 case ('t'): 220 ++*local_pos; 221 return LengthModifier::t; 222 default: 223 return LengthModifier::none; 224 } 225 } 226 227 //---------------------------------------------------- 228 // INDEX MODE ONLY FUNCTIONS AFTER HERE: 229 //---------------------------------------------------- 230 231 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 232 233 size_t Parser::parse_index(size_t *local_pos) { 234 if (internal::isdigit(str[*local_pos])) { 235 char *int_end; 236 size_t index = 237 internal::strtointeger<size_t>(str + *local_pos, &int_end, 10); 238 if (int_end[0] != '$') 239 return 0; 240 *local_pos = 1 + int_end - str; 241 return index; 242 } 243 return 0; 244 } 245 246 Parser::TypeDesc Parser::get_type_desc(size_t index) { 247 // index mode is assumed, and the indicies start at 1, so an index 248 // of 0 is invalid. 249 size_t local_pos = 0; 250 251 while (str[local_pos]) { 252 if (str[local_pos] == '%') { 253 ++local_pos; 254 255 size_t conv_index = parse_index(&local_pos); 256 257 // the flags aren't relevant for this situation, but I need to skip past 258 // them so they're parsed but the result is discarded. 259 parse_flags(&local_pos); 260 261 // handle width 262 if (str[local_pos] == '*') { 263 ++local_pos; 264 265 size_t width_index = parse_index(&local_pos); 266 set_type_desc(width_index, TYPE_DESC<int>); 267 if (width_index == index) 268 return TYPE_DESC<int>; 269 270 } else if (internal::isdigit(str[local_pos])) { 271 while (internal::isdigit(str[local_pos])) 272 ++local_pos; 273 } 274 275 // handle precision 276 if (str[local_pos] == '.') { 277 ++local_pos; 278 if (str[local_pos] == '*') { 279 ++local_pos; 280 281 size_t precision_index = parse_index(&local_pos); 282 set_type_desc(precision_index, TYPE_DESC<int>); 283 if (precision_index == index) 284 return TYPE_DESC<int>; 285 286 } else if (internal::isdigit(str[local_pos])) { 287 while (internal::isdigit(str[local_pos])) 288 ++local_pos; 289 } 290 } 291 292 LengthModifier lm = parse_length_modifier(&local_pos); 293 294 // if we don't have an index for this conversion, then its position is 295 // unknown and all this information is irrelevant. The rest of this logic 296 // has been for skipping past this conversion properly to avoid 297 // weirdness with %%. 298 if (conv_index == 0) { 299 ++local_pos; 300 continue; 301 } 302 303 TypeDesc conv_size = TYPE_DESC<void>; 304 switch (str[local_pos]) { 305 case ('%'): 306 conv_size = TYPE_DESC<void>; 307 break; 308 case ('c'): 309 conv_size = TYPE_DESC<int>; 310 break; 311 case ('d'): 312 case ('i'): 313 case ('o'): 314 case ('x'): 315 case ('X'): 316 case ('u'): 317 switch (lm) { 318 case (LengthModifier::hh): 319 case (LengthModifier::h): 320 case (LengthModifier::none): 321 conv_size = TYPE_DESC<int>; 322 break; 323 case (LengthModifier::l): 324 conv_size = TYPE_DESC<long>; 325 break; 326 case (LengthModifier::ll): 327 case (LengthModifier::L): // This isn't in the standard, but is in other 328 // libc implementations. 329 conv_size = TYPE_DESC<long long>; 330 break; 331 case (LengthModifier::j): 332 conv_size = TYPE_DESC<intmax_t>; 333 break; 334 case (LengthModifier::z): 335 conv_size = TYPE_DESC<size_t>; 336 break; 337 case (LengthModifier::t): 338 conv_size = TYPE_DESC<ptrdiff_t>; 339 break; 340 } 341 break; 342 // TODO(michaelrj): add a flag to disable float point values here 343 case ('f'): 344 case ('F'): 345 case ('e'): 346 case ('E'): 347 case ('a'): 348 case ('A'): 349 case ('g'): 350 case ('G'): 351 if (lm != LengthModifier::L) 352 conv_size = TYPE_DESC<double>; 353 else 354 conv_size = TYPE_DESC<long double>; 355 break; 356 #ifndef LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 357 case ('n'): 358 #endif // LLVM_LIBC_PRINTF_DISABLE_WRITE_INT 359 case ('p'): 360 case ('s'): 361 conv_size = TYPE_DESC<void *>; 362 break; 363 default: 364 conv_size = TYPE_DESC<int>; 365 break; 366 } 367 368 set_type_desc(conv_index, conv_size); 369 if (conv_index == index) 370 return conv_size; 371 } 372 ++local_pos; 373 } 374 375 // If there is no size for the requested index, then just guess that it's an 376 // int. 377 return TYPE_DESC<int>; 378 } 379 380 void Parser::args_to_index(size_t index) { 381 if (args_index > index) { 382 args_index = 1; 383 args_cur = args_start; 384 } 385 386 while (args_index < index) { 387 Parser::TypeDesc cur_type_desc = TYPE_DESC<void>; 388 if (args_index <= DESC_ARR_LEN) 389 cur_type_desc = desc_arr[args_index - 1]; 390 391 if (cur_type_desc == TYPE_DESC<void>) 392 cur_type_desc = get_type_desc(args_index); 393 394 if (cur_type_desc == TYPE_DESC<uint32_t>) 395 args_cur.next_var<uint32_t>(); 396 else if (cur_type_desc == TYPE_DESC<uint64_t>) 397 args_cur.next_var<uint64_t>(); 398 // TODO(michaelrj): add a flag to disable float point values here 399 // Floating point numbers are stored separately from the other arguments. 400 else if (cur_type_desc == TYPE_DESC<double>) 401 args_cur.next_var<double>(); 402 else if (cur_type_desc == TYPE_DESC<long double>) 403 args_cur.next_var<long double>(); 404 // pointers may be stored separately from normal values. 405 else if (cur_type_desc == TYPE_DESC<void *>) 406 args_cur.next_var<void *>(); 407 else 408 args_cur.next_var<uint32_t>(); 409 410 ++args_index; 411 } 412 } 413 414 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 415 416 } // namespace printf_core 417 } // namespace __llvm_libc 418