14f4752eeSMichael Jones //===-- Format string parser for printf -------------------------*- C++ -*-===// 24f4752eeSMichael Jones // 34f4752eeSMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44f4752eeSMichael Jones // See https://llvm.org/LICENSE.txt for license information. 54f4752eeSMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64f4752eeSMichael Jones // 74f4752eeSMichael Jones //===----------------------------------------------------------------------===// 84f4752eeSMichael Jones 94f4752eeSMichael Jones #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 104f4752eeSMichael Jones #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 114f4752eeSMichael Jones 124f4752eeSMichael Jones #include "src/__support/arg_list.h" 134f4752eeSMichael Jones #include "src/stdio/printf_core/core_structs.h" 14*945fa672SMichael Jones #include "src/string/memory_utils/memset_implementations.h" 154f4752eeSMichael Jones 164f4752eeSMichael Jones #include <stddef.h> 174f4752eeSMichael Jones 184f4752eeSMichael Jones namespace __llvm_libc { 194f4752eeSMichael Jones namespace printf_core { 204f4752eeSMichael Jones 214f4752eeSMichael Jones class Parser { 224f4752eeSMichael Jones const char *__restrict str; 234f4752eeSMichael Jones 244f4752eeSMichael Jones size_t cur_pos = 0; 254f4752eeSMichael Jones internal::ArgList args_cur; 26*945fa672SMichael Jones 27*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 28*945fa672SMichael Jones // args_start stores the start of the va_args, which is allows getting the 29*945fa672SMichael Jones // value of arguments that have already been passed. args_index is tracked so 30*945fa672SMichael Jones // that we know which argument args_cur is on. 31*945fa672SMichael Jones internal::ArgList args_start; 324f4752eeSMichael Jones size_t args_index = 1; 334f4752eeSMichael Jones 34*945fa672SMichael Jones enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 }; 35*945fa672SMichael Jones 36*945fa672SMichael Jones // TypeDesc stores the information about a type that is relevant to printf in 37*945fa672SMichael Jones // a relatively compact manner. 38*945fa672SMichael Jones struct TypeDesc { 39*945fa672SMichael Jones uint8_t size; 40*945fa672SMichael Jones PrimaryType primary_type; 41*945fa672SMichael Jones constexpr bool operator==(const TypeDesc &other) const { 42*945fa672SMichael Jones return (size == other.size) && (primary_type == other.primary_type); 43*945fa672SMichael Jones } 44*945fa672SMichael Jones }; 45*945fa672SMichael Jones // TODO: Make this size configurable via a compile option. 46*945fa672SMichael Jones static constexpr size_t DESC_ARR_LEN = 32; 47*945fa672SMichael Jones // desc_arr stores the sizes of the variables in the ArgList. This is used in 48*945fa672SMichael Jones // index mode to reduce repeated string parsing. The sizes are stored as 49*945fa672SMichael Jones // TypeDesc objects, which store the size as well as minimal type information. 50*945fa672SMichael Jones // This is necessary because some systems separate the floating point and 51*945fa672SMichael Jones // integer values in va_args. 52*945fa672SMichael Jones TypeDesc desc_arr[DESC_ARR_LEN]; 53*945fa672SMichael Jones 544f4752eeSMichael Jones // TODO: Look into object stores for optimization. 554f4752eeSMichael Jones 56*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 57*945fa672SMichael Jones 584f4752eeSMichael Jones public: 59*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE Parser(const char * __restrict new_str,internal::ArgList & args)604f4752eeSMichael Jones Parser(const char *__restrict new_str, internal::ArgList &args) 61*945fa672SMichael Jones : str(new_str), args_cur(args), args_start(args) { 62*945fa672SMichael Jones inline_memset(reinterpret_cast<char *>(desc_arr), 0, 63*945fa672SMichael Jones DESC_ARR_LEN * sizeof(TypeDesc)); 64*945fa672SMichael Jones } 65*945fa672SMichael Jones #else 66*945fa672SMichael Jones Parser(const char *__restrict new_str, internal::ArgList &args) 67*945fa672SMichael Jones : str(new_str), args_cur(args) {} 68*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 694f4752eeSMichael Jones 704f4752eeSMichael Jones // get_next_section will parse the format string until it has a fully 714f4752eeSMichael Jones // specified format section. This can either be a raw format section with no 724f4752eeSMichael Jones // conversion, or a format section with a conversion that has all of its 734f4752eeSMichael Jones // variables stored in the format section. 744f4752eeSMichael Jones FormatSection get_next_section(); 754f4752eeSMichael Jones 764f4752eeSMichael Jones private: 774f4752eeSMichael Jones // parse_flags parses the flags inside a format string. It assumes that 784f4752eeSMichael Jones // str[*local_pos] is inside a format specifier, and parses any flags it 794f4752eeSMichael Jones // finds. It returns a FormatFlags object containing the set of found flags 804f4752eeSMichael Jones // arithmetically or'd together. local_pos will be moved past any flags found. 814f4752eeSMichael Jones FormatFlags parse_flags(size_t *local_pos); 824f4752eeSMichael Jones 834f4752eeSMichael Jones // parse_length_modifier parses the length modifier inside a format string. It 844f4752eeSMichael Jones // assumes that str[*local_pos] is inside a format specifier. It returns a 854f4752eeSMichael Jones // LengthModifier with the length modifier it found. It will advance local_pos 864f4752eeSMichael Jones // after the format specifier if one is found. 874f4752eeSMichael Jones LengthModifier parse_length_modifier(size_t *local_pos); 884f4752eeSMichael Jones 894f4752eeSMichael Jones // get_next_arg_value gets the next value from the arg list as type T. get_next_arg_value()904f4752eeSMichael Jones template <class T> T inline get_next_arg_value() { 914f4752eeSMichael Jones return args_cur.next_var<T>(); 924f4752eeSMichael Jones } 93*945fa672SMichael Jones 94*945fa672SMichael Jones //---------------------------------------------------- 95*945fa672SMichael Jones // INDEX MODE ONLY FUNCTIONS AFTER HERE: 96*945fa672SMichael Jones //---------------------------------------------------- 97*945fa672SMichael Jones 98*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 99*945fa672SMichael Jones 100*945fa672SMichael Jones // parse_index parses the index of a value inside a format string. It 101*945fa672SMichael Jones // assumes that str[*local_pos] points to character after a '%' or '*', and 102*945fa672SMichael Jones // returns 0 if there is no closing $, or if it finds no number. If it finds a 103*945fa672SMichael Jones // number, it will move local_pos past the end of the $, else it will not move 104*945fa672SMichael Jones // local_pos. 105*945fa672SMichael Jones size_t parse_index(size_t *local_pos); 106*945fa672SMichael Jones 107*945fa672SMichael Jones template <typename T> 108*945fa672SMichael Jones static constexpr TypeDesc TYPE_DESC{sizeof(T), PrimaryType::Integer}; 109*945fa672SMichael Jones template <> 110*945fa672SMichael Jones static constexpr TypeDesc TYPE_DESC<double>{sizeof(double), 111*945fa672SMichael Jones PrimaryType::Float}; 112*945fa672SMichael Jones template <> 113*945fa672SMichael Jones static constexpr TypeDesc TYPE_DESC<long double>{sizeof(long double), 114*945fa672SMichael Jones PrimaryType::Float}; 115*945fa672SMichael Jones template <> 116*945fa672SMichael Jones static constexpr TypeDesc TYPE_DESC<void *>{sizeof(void *), 117*945fa672SMichael Jones PrimaryType::Pointer}; 118*945fa672SMichael Jones template <> 119*945fa672SMichael Jones static constexpr TypeDesc TYPE_DESC<void>{0, PrimaryType::Integer}; 120*945fa672SMichael Jones set_type_desc(size_t index,TypeDesc value)121*945fa672SMichael Jones void inline set_type_desc(size_t index, TypeDesc value) { 122*945fa672SMichael Jones if (index != 0 && index <= DESC_ARR_LEN) 123*945fa672SMichael Jones desc_arr[index - 1] = value; 124*945fa672SMichael Jones } 125*945fa672SMichael Jones 126*945fa672SMichael Jones // get_arg_value gets the value from the arg list at index (starting at 1). 127*945fa672SMichael Jones // This may require parsing the format string. An index of 0 is interpreted as 128*945fa672SMichael Jones // the next value. get_arg_value(size_t index)129*945fa672SMichael Jones template <class T> T inline get_arg_value(size_t index) { 130*945fa672SMichael Jones if (!(index == 0 || index == args_index)) 131*945fa672SMichael Jones args_to_index(index); 132*945fa672SMichael Jones 133*945fa672SMichael Jones set_type_desc(index, TYPE_DESC<T>); 134*945fa672SMichael Jones 135*945fa672SMichael Jones ++args_index; 136*945fa672SMichael Jones return get_next_arg_value<T>(); 137*945fa672SMichael Jones } 138*945fa672SMichael Jones 139*945fa672SMichael Jones // the ArgList can only return the next item in the list. This function is 140*945fa672SMichael Jones // used in index mode when the item that needs to be read is not the next one. 141*945fa672SMichael Jones // It moves cur_args to the index requested so the the appropriate value may 142*945fa672SMichael Jones // be read. This may involve parsing the format string, and is in the worst 143*945fa672SMichael Jones // case an O(n^2) operation. 144*945fa672SMichael Jones void args_to_index(size_t index); 145*945fa672SMichael Jones 146*945fa672SMichael Jones // get_type_desc assumes that this format string uses index mode. It iterates 147*945fa672SMichael Jones // through the format string until it finds a format specifier that defines 148*945fa672SMichael Jones // the type of index, and returns a TypeDesc describing that type. It does not 149*945fa672SMichael Jones // modify cur_pos. 150*945fa672SMichael Jones TypeDesc get_type_desc(size_t index); 151*945fa672SMichael Jones 152*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1534f4752eeSMichael Jones }; 1544f4752eeSMichael Jones 1554f4752eeSMichael Jones } // namespace printf_core 1564f4752eeSMichael Jones } // namespace __llvm_libc 1574f4752eeSMichael Jones 1584f4752eeSMichael Jones #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 159