1 //===-- Format string parser implementation for printf ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag. 10 11 #include "parser.h" 12 13 #include "src/__support/arg_list.h" 14 15 #include "src/__support/CPP/Bit.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/ctype_utils.h" 18 #include "src/__support/str_to_integer.h" 19 20 namespace __llvm_libc { 21 namespace printf_core { 22 23 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 24 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index) 25 #else 26 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>() 27 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 28 29 FormatSection Parser::get_next_section() { 30 FormatSection section; 31 section.raw_string = str + cur_pos; 32 size_t starting_pos = cur_pos; 33 if (str[cur_pos] == '%') { 34 // format section 35 section.has_conv = true; 36 37 ++cur_pos; 38 [[maybe_unused]] size_t conv_index = 0; 39 40 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 41 conv_index = parse_index(&cur_pos); 42 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 43 44 section.flags = parse_flags(&cur_pos); 45 46 // handle width 47 section.min_width = 0; 48 if (str[cur_pos] == '*') { 49 ++cur_pos; 50 51 section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 52 } else if (internal::isdigit(str[cur_pos])) { 53 char *int_end; 54 section.min_width = 55 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 56 cur_pos = int_end - str; 57 } 58 if (section.min_width < 0) { 59 section.min_width = -section.min_width; 60 section.flags = 61 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED); 62 } 63 64 // handle precision 65 section.precision = -1; // negative precisions are ignored. 66 if (str[cur_pos] == '.') { 67 ++cur_pos; 68 section.precision = 0; // if there's a . but no specified precision, the 69 // precision is implicitly 0. 70 if (str[cur_pos] == '*') { 71 ++cur_pos; 72 73 section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos)); 74 75 } else if (internal::isdigit(str[cur_pos])) { 76 char *int_end; 77 section.precision = 78 internal::strtointeger<int>(str + cur_pos, &int_end, 10); 79 cur_pos = int_end - str; 80 } 81 } 82 83 LengthModifier lm = parse_length_modifier(&cur_pos); 84 85 section.length_modifier = lm; 86 section.conv_name = str[cur_pos]; 87 switch (str[cur_pos]) { 88 case ('%'): 89 break; 90 case ('c'): 91 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 92 break; 93 case ('d'): 94 case ('i'): 95 case ('o'): 96 case ('x'): 97 case ('X'): 98 case ('u'): 99 switch (lm) { 100 case (LengthModifier::hh): 101 case (LengthModifier::h): 102 case (LengthModifier::none): 103 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index); 104 break; 105 case (LengthModifier::l): 106 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index); 107 break; 108 case (LengthModifier::ll): 109 case (LengthModifier::L): // This isn't in the standard, but is in other 110 // libc implementations. 111 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index); 112 break; 113 case (LengthModifier::j): 114 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index); 115 break; 116 case (LengthModifier::z): 117 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index); 118 break; 119 case (LengthModifier::t): 120 section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index); 121 break; 122 } 123 break; 124 // TODO(michaelrj): add a flag to disable float point values here 125 case ('f'): 126 case ('F'): 127 case ('e'): 128 case ('E'): 129 case ('a'): 130 case ('A'): 131 case ('g'): 132 case ('G'): 133 if (lm != LengthModifier::L) 134 section.conv_val_raw = 135 bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index)); 136 else 137 section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>( 138 GET_ARG_VAL_SIMPLEST(long double, conv_index)); 139 break; 140 case ('n'): 141 case ('p'): 142 case ('s'): 143 section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); 144 break; 145 default: 146 // if the conversion is undefined, change this to a raw section. 147 section.has_conv = false; 148 break; 149 } 150 ++cur_pos; 151 } else { 152 // raw section 153 section.has_conv = false; 154 while (str[cur_pos] != '%' && str[cur_pos] != '\0') 155 ++cur_pos; 156 } 157 section.raw_len = cur_pos - starting_pos; 158 return section; 159 } 160 161 FormatFlags Parser::parse_flags(size_t *local_pos) { 162 bool found_flag = true; 163 FormatFlags flags = FormatFlags(0); 164 while (found_flag) { 165 switch (str[*local_pos]) { 166 case '-': 167 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 168 break; 169 case '+': 170 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 171 break; 172 case ' ': 173 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 174 break; 175 case '#': 176 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 177 break; 178 case '0': 179 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 180 break; 181 default: 182 found_flag = false; 183 } 184 if (found_flag) 185 ++*local_pos; 186 } 187 return flags; 188 } 189 190 LengthModifier Parser::parse_length_modifier(size_t *local_pos) { 191 switch (str[*local_pos]) { 192 case ('l'): 193 if (str[*local_pos + 1] == 'l') { 194 *local_pos += 2; 195 return LengthModifier::ll; 196 } else { 197 ++*local_pos; 198 return LengthModifier::l; 199 } 200 case ('h'): 201 if (str[*local_pos + 1] == 'h') { 202 *local_pos += 2; 203 return LengthModifier::hh; 204 } else { 205 ++*local_pos; 206 return LengthModifier::h; 207 } 208 case ('L'): 209 ++*local_pos; 210 return LengthModifier::L; 211 case ('j'): 212 ++*local_pos; 213 return LengthModifier::j; 214 case ('z'): 215 ++*local_pos; 216 return LengthModifier::z; 217 case ('t'): 218 ++*local_pos; 219 return LengthModifier::t; 220 default: 221 return LengthModifier::none; 222 } 223 } 224 225 //---------------------------------------------------- 226 // INDEX MODE ONLY FUNCTIONS AFTER HERE: 227 //---------------------------------------------------- 228 229 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 230 231 size_t Parser::parse_index(size_t *local_pos) { 232 if (internal::isdigit(str[*local_pos])) { 233 char *int_end; 234 size_t index = 235 internal::strtointeger<size_t>(str + *local_pos, &int_end, 10); 236 if (int_end[0] != '$') 237 return 0; 238 *local_pos = 1 + int_end - str; 239 return index; 240 } 241 return 0; 242 } 243 244 Parser::TypeDesc Parser::get_type_desc(size_t index) { 245 // index mode is assumed, and the indicies start at 1, so an index 246 // of 0 is invalid. 247 size_t local_pos = 0; 248 249 while (str[local_pos]) { 250 if (str[local_pos] == '%') { 251 ++local_pos; 252 253 size_t conv_index = parse_index(&local_pos); 254 255 // the flags aren't relevant for this situation, but I need to skip past 256 // them so they're parsed but the result is discarded. 257 parse_flags(&local_pos); 258 259 // handle width 260 if (str[local_pos] == '*') { 261 ++local_pos; 262 263 size_t width_index = parse_index(&local_pos); 264 set_type_desc(width_index, TYPE_DESC<int>); 265 if (width_index == index) 266 return TYPE_DESC<int>; 267 268 } else if (internal::isdigit(str[local_pos])) { 269 while (internal::isdigit(str[local_pos])) 270 ++local_pos; 271 } 272 273 // handle precision 274 if (str[local_pos] == '.') { 275 ++local_pos; 276 if (str[local_pos] == '*') { 277 ++local_pos; 278 279 size_t precision_index = parse_index(&local_pos); 280 set_type_desc(precision_index, TYPE_DESC<int>); 281 if (precision_index == index) 282 return TYPE_DESC<int>; 283 284 } else if (internal::isdigit(str[local_pos])) { 285 while (internal::isdigit(str[local_pos])) 286 ++local_pos; 287 } 288 } 289 290 LengthModifier lm = parse_length_modifier(&local_pos); 291 292 // if we don't have an index for this conversion, then its position is 293 // unknown and all this information is irrelevant. The rest of this logic 294 // has been for skipping past this conversion properly to avoid 295 // weirdness with %%. 296 if (conv_index == 0) { 297 ++local_pos; 298 continue; 299 } 300 301 TypeDesc conv_size = TYPE_DESC<void>; 302 switch (str[local_pos]) { 303 case ('%'): 304 conv_size = TYPE_DESC<void>; 305 break; 306 case ('c'): 307 conv_size = TYPE_DESC<int>; 308 break; 309 case ('d'): 310 case ('i'): 311 case ('o'): 312 case ('x'): 313 case ('X'): 314 case ('u'): 315 switch (lm) { 316 case (LengthModifier::hh): 317 case (LengthModifier::h): 318 case (LengthModifier::none): 319 conv_size = TYPE_DESC<int>; 320 break; 321 case (LengthModifier::l): 322 conv_size = TYPE_DESC<long>; 323 break; 324 case (LengthModifier::ll): 325 case (LengthModifier::L): // This isn't in the standard, but is in other 326 // libc implementations. 327 conv_size = TYPE_DESC<long long>; 328 break; 329 case (LengthModifier::j): 330 conv_size = TYPE_DESC<intmax_t>; 331 break; 332 case (LengthModifier::z): 333 conv_size = TYPE_DESC<size_t>; 334 break; 335 case (LengthModifier::t): 336 conv_size = TYPE_DESC<ptrdiff_t>; 337 break; 338 } 339 break; 340 // TODO(michaelrj): add a flag to disable float point values here 341 case ('f'): 342 case ('F'): 343 case ('e'): 344 case ('E'): 345 case ('a'): 346 case ('A'): 347 case ('g'): 348 case ('G'): 349 if (lm != LengthModifier::L) 350 conv_size = TYPE_DESC<double>; 351 else 352 conv_size = TYPE_DESC<long double>; 353 break; 354 case ('n'): 355 case ('p'): 356 case ('s'): 357 conv_size = TYPE_DESC<void *>; 358 break; 359 default: 360 conv_size = TYPE_DESC<int>; 361 break; 362 } 363 364 set_type_desc(conv_index, conv_size); 365 if (conv_index == index) 366 return conv_size; 367 } 368 ++local_pos; 369 } 370 371 // If there is no size for the requested index, then just guess that it's an 372 // int. 373 return TYPE_DESC<int>; 374 } 375 376 void Parser::args_to_index(size_t index) { 377 if (args_index > index) { 378 args_index = 1; 379 args_cur = args_start; 380 } 381 382 while (args_index < index) { 383 Parser::TypeDesc cur_type_desc = TYPE_DESC<void>; 384 if (args_index <= DESC_ARR_LEN) 385 cur_type_desc = desc_arr[args_index - 1]; 386 387 if (cur_type_desc == TYPE_DESC<void>) 388 cur_type_desc = get_type_desc(args_index); 389 390 if (cur_type_desc == TYPE_DESC<uint32_t>) 391 args_cur.next_var<uint32_t>(); 392 else if (cur_type_desc == TYPE_DESC<uint64_t>) 393 args_cur.next_var<uint64_t>(); 394 // TODO(michaelrj): add a flag to disable float point values here 395 // Floating point numbers are stored separately from the other arguments. 396 else if (cur_type_desc == TYPE_DESC<double>) 397 args_cur.next_var<double>(); 398 else if (cur_type_desc == TYPE_DESC<long double>) 399 args_cur.next_var<long double>(); 400 // pointers may be stored separately from normal values. 401 else if (cur_type_desc == TYPE_DESC<void *>) 402 args_cur.next_var<void *>(); 403 else 404 args_cur.next_var<uint32_t>(); 405 406 ++args_index; 407 } 408 } 409 410 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 411 412 } // namespace printf_core 413 } // namespace __llvm_libc 414