1 //===-- Format string parser for printf -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 10 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 11 12 #include "src/__support/arg_list.h" 13 #include "src/stdio/printf_core/core_structs.h" 14 #include "src/string/memory_utils/memset_implementations.h" 15 16 #include <stddef.h> 17 18 namespace __llvm_libc { 19 namespace printf_core { 20 21 class Parser { 22 const char *__restrict str; 23 24 size_t cur_pos = 0; 25 internal::ArgList args_cur; 26 27 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 28 // args_start stores the start of the va_args, which is allows getting the 29 // value of arguments that have already been passed. args_index is tracked so 30 // that we know which argument args_cur is on. 31 internal::ArgList args_start; 32 size_t args_index = 1; 33 34 enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 }; 35 36 // TypeDesc stores the information about a type that is relevant to printf in 37 // a relatively compact manner. 38 struct TypeDesc { 39 uint8_t size; 40 PrimaryType primary_type; 41 constexpr bool operator==(const TypeDesc &other) const { 42 return (size == other.size) && (primary_type == other.primary_type); 43 } 44 }; 45 // TODO: Make this size configurable via a compile option. 46 static constexpr size_t DESC_ARR_LEN = 32; 47 // desc_arr stores the sizes of the variables in the ArgList. This is used in 48 // index mode to reduce repeated string parsing. The sizes are stored as 49 // TypeDesc objects, which store the size as well as minimal type information. 50 // This is necessary because some systems separate the floating point and 51 // integer values in va_args. 52 TypeDesc desc_arr[DESC_ARR_LEN]; 53 54 // TODO: Look into object stores for optimization. 55 56 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 57 58 public: 59 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE Parser(const char * __restrict new_str,internal::ArgList & args)60 Parser(const char *__restrict new_str, internal::ArgList &args) 61 : str(new_str), args_cur(args), args_start(args) { 62 inline_memset(reinterpret_cast<char *>(desc_arr), 0, 63 DESC_ARR_LEN * sizeof(TypeDesc)); 64 } 65 #else 66 Parser(const char *__restrict new_str, internal::ArgList &args) 67 : str(new_str), args_cur(args) {} 68 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 69 70 // get_next_section will parse the format string until it has a fully 71 // specified format section. This can either be a raw format section with no 72 // conversion, or a format section with a conversion that has all of its 73 // variables stored in the format section. 74 FormatSection get_next_section(); 75 76 private: 77 // parse_flags parses the flags inside a format string. It assumes that 78 // str[*local_pos] is inside a format specifier, and parses any flags it 79 // finds. It returns a FormatFlags object containing the set of found flags 80 // arithmetically or'd together. local_pos will be moved past any flags found. 81 FormatFlags parse_flags(size_t *local_pos); 82 83 // parse_length_modifier parses the length modifier inside a format string. It 84 // assumes that str[*local_pos] is inside a format specifier. It returns a 85 // LengthModifier with the length modifier it found. It will advance local_pos 86 // after the format specifier if one is found. 87 LengthModifier parse_length_modifier(size_t *local_pos); 88 89 // get_next_arg_value gets the next value from the arg list as type T. get_next_arg_value()90 template <class T> T inline get_next_arg_value() { 91 return args_cur.next_var<T>(); 92 } 93 94 //---------------------------------------------------- 95 // INDEX MODE ONLY FUNCTIONS AFTER HERE: 96 //---------------------------------------------------- 97 98 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 99 100 // parse_index parses the index of a value inside a format string. It 101 // assumes that str[*local_pos] points to character after a '%' or '*', and 102 // returns 0 if there is no closing $, or if it finds no number. If it finds a 103 // number, it will move local_pos past the end of the $, else it will not move 104 // local_pos. 105 size_t parse_index(size_t *local_pos); 106 107 template <typename T> 108 static constexpr TypeDesc TYPE_DESC{sizeof(T), PrimaryType::Integer}; 109 template <> 110 static constexpr TypeDesc TYPE_DESC<double>{sizeof(double), 111 PrimaryType::Float}; 112 template <> 113 static constexpr TypeDesc TYPE_DESC<long double>{sizeof(long double), 114 PrimaryType::Float}; 115 template <> 116 static constexpr TypeDesc TYPE_DESC<void *>{sizeof(void *), 117 PrimaryType::Pointer}; 118 template <> 119 static constexpr TypeDesc TYPE_DESC<void>{0, PrimaryType::Integer}; 120 set_type_desc(size_t index,TypeDesc value)121 void inline set_type_desc(size_t index, TypeDesc value) { 122 if (index != 0 && index <= DESC_ARR_LEN) 123 desc_arr[index - 1] = value; 124 } 125 126 // get_arg_value gets the value from the arg list at index (starting at 1). 127 // This may require parsing the format string. An index of 0 is interpreted as 128 // the next value. get_arg_value(size_t index)129 template <class T> T inline get_arg_value(size_t index) { 130 if (!(index == 0 || index == args_index)) 131 args_to_index(index); 132 133 set_type_desc(index, TYPE_DESC<T>); 134 135 ++args_index; 136 return get_next_arg_value<T>(); 137 } 138 139 // the ArgList can only return the next item in the list. This function is 140 // used in index mode when the item that needs to be read is not the next one. 141 // It moves cur_args to the index requested so the the appropriate value may 142 // be read. This may involve parsing the format string, and is in the worst 143 // case an O(n^2) operation. 144 void args_to_index(size_t index); 145 146 // get_type_desc assumes that this format string uses index mode. It iterates 147 // through the format string until it finds a format specifier that defines 148 // the type of index, and returns a TypeDesc describing that type. It does not 149 // modify cur_pos. 150 TypeDesc get_type_desc(size_t index); 151 152 #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 153 }; 154 155 } // namespace printf_core 156 } // namespace __llvm_libc 157 158 #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 159