14f4752eeSMichael Jones //===-- Format string parser for printf -------------------------*- C++ -*-===//
24f4752eeSMichael Jones //
34f4752eeSMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44f4752eeSMichael Jones // See https://llvm.org/LICENSE.txt for license information.
54f4752eeSMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64f4752eeSMichael Jones //
74f4752eeSMichael Jones //===----------------------------------------------------------------------===//
84f4752eeSMichael Jones 
94f4752eeSMichael Jones #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
104f4752eeSMichael Jones #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
114f4752eeSMichael Jones 
124f4752eeSMichael Jones #include "src/__support/arg_list.h"
134f4752eeSMichael Jones #include "src/stdio/printf_core/core_structs.h"
14*945fa672SMichael Jones #include "src/string/memory_utils/memset_implementations.h"
154f4752eeSMichael Jones 
164f4752eeSMichael Jones #include <stddef.h>
174f4752eeSMichael Jones 
184f4752eeSMichael Jones namespace __llvm_libc {
194f4752eeSMichael Jones namespace printf_core {
204f4752eeSMichael Jones 
214f4752eeSMichael Jones class Parser {
224f4752eeSMichael Jones   const char *__restrict str;
234f4752eeSMichael Jones 
244f4752eeSMichael Jones   size_t cur_pos = 0;
254f4752eeSMichael Jones   internal::ArgList args_cur;
26*945fa672SMichael Jones 
27*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
28*945fa672SMichael Jones   // args_start stores the start of the va_args, which is allows getting the
29*945fa672SMichael Jones   // value of arguments that have already been passed. args_index is tracked so
30*945fa672SMichael Jones   // that we know which argument args_cur is on.
31*945fa672SMichael Jones   internal::ArgList args_start;
324f4752eeSMichael Jones   size_t args_index = 1;
334f4752eeSMichael Jones 
34*945fa672SMichael Jones   enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 };
35*945fa672SMichael Jones 
36*945fa672SMichael Jones   // TypeDesc stores the information about a type that is relevant to printf in
37*945fa672SMichael Jones   // a relatively compact manner.
38*945fa672SMichael Jones   struct TypeDesc {
39*945fa672SMichael Jones     uint8_t size;
40*945fa672SMichael Jones     PrimaryType primary_type;
41*945fa672SMichael Jones     constexpr bool operator==(const TypeDesc &other) const {
42*945fa672SMichael Jones       return (size == other.size) && (primary_type == other.primary_type);
43*945fa672SMichael Jones     }
44*945fa672SMichael Jones   };
45*945fa672SMichael Jones   // TODO: Make this size configurable via a compile option.
46*945fa672SMichael Jones   static constexpr size_t DESC_ARR_LEN = 32;
47*945fa672SMichael Jones   // desc_arr stores the sizes of the variables in the ArgList. This is used in
48*945fa672SMichael Jones   // index mode to reduce repeated string parsing. The sizes are stored as
49*945fa672SMichael Jones   // TypeDesc objects, which store the size as well as minimal type information.
50*945fa672SMichael Jones   // This is necessary because some systems separate the floating point and
51*945fa672SMichael Jones   // integer values in va_args.
52*945fa672SMichael Jones   TypeDesc desc_arr[DESC_ARR_LEN];
53*945fa672SMichael Jones 
544f4752eeSMichael Jones   // TODO: Look into object stores for optimization.
554f4752eeSMichael Jones 
56*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
57*945fa672SMichael Jones 
584f4752eeSMichael Jones public:
59*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
Parser(const char * __restrict new_str,internal::ArgList & args)604f4752eeSMichael Jones   Parser(const char *__restrict new_str, internal::ArgList &args)
61*945fa672SMichael Jones       : str(new_str), args_cur(args), args_start(args) {
62*945fa672SMichael Jones     inline_memset(reinterpret_cast<char *>(desc_arr), 0,
63*945fa672SMichael Jones                   DESC_ARR_LEN * sizeof(TypeDesc));
64*945fa672SMichael Jones   }
65*945fa672SMichael Jones #else
66*945fa672SMichael Jones   Parser(const char *__restrict new_str, internal::ArgList &args)
67*945fa672SMichael Jones       : str(new_str), args_cur(args) {}
68*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
694f4752eeSMichael Jones 
704f4752eeSMichael Jones   // get_next_section will parse the format string until it has a fully
714f4752eeSMichael Jones   // specified format section. This can either be a raw format section with no
724f4752eeSMichael Jones   // conversion, or a format section with a conversion that has all of its
734f4752eeSMichael Jones   // variables stored in the format section.
744f4752eeSMichael Jones   FormatSection get_next_section();
754f4752eeSMichael Jones 
764f4752eeSMichael Jones private:
774f4752eeSMichael Jones   // parse_flags parses the flags inside a format string. It assumes that
784f4752eeSMichael Jones   // str[*local_pos] is inside a format specifier, and parses any flags it
794f4752eeSMichael Jones   // finds. It returns a FormatFlags object containing the set of found flags
804f4752eeSMichael Jones   // arithmetically or'd together. local_pos will be moved past any flags found.
814f4752eeSMichael Jones   FormatFlags parse_flags(size_t *local_pos);
824f4752eeSMichael Jones 
834f4752eeSMichael Jones   // parse_length_modifier parses the length modifier inside a format string. It
844f4752eeSMichael Jones   // assumes that str[*local_pos] is inside a format specifier. It returns a
854f4752eeSMichael Jones   // LengthModifier with the length modifier it found. It will advance local_pos
864f4752eeSMichael Jones   // after the format specifier if one is found.
874f4752eeSMichael Jones   LengthModifier parse_length_modifier(size_t *local_pos);
884f4752eeSMichael Jones 
894f4752eeSMichael Jones   // get_next_arg_value gets the next value from the arg list as type T.
get_next_arg_value()904f4752eeSMichael Jones   template <class T> T inline get_next_arg_value() {
914f4752eeSMichael Jones     return args_cur.next_var<T>();
924f4752eeSMichael Jones   }
93*945fa672SMichael Jones 
94*945fa672SMichael Jones   //----------------------------------------------------
95*945fa672SMichael Jones   // INDEX MODE ONLY FUNCTIONS AFTER HERE:
96*945fa672SMichael Jones   //----------------------------------------------------
97*945fa672SMichael Jones 
98*945fa672SMichael Jones #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
99*945fa672SMichael Jones 
100*945fa672SMichael Jones   // parse_index parses the index of a value inside a format string. It
101*945fa672SMichael Jones   // assumes that str[*local_pos] points to character after a '%' or '*', and
102*945fa672SMichael Jones   // returns 0 if there is no closing $, or if it finds no number. If it finds a
103*945fa672SMichael Jones   // number, it will move local_pos past the end of the $, else it will not move
104*945fa672SMichael Jones   // local_pos.
105*945fa672SMichael Jones   size_t parse_index(size_t *local_pos);
106*945fa672SMichael Jones 
107*945fa672SMichael Jones   template <typename T>
108*945fa672SMichael Jones   static constexpr TypeDesc TYPE_DESC{sizeof(T), PrimaryType::Integer};
109*945fa672SMichael Jones   template <>
110*945fa672SMichael Jones   static constexpr TypeDesc TYPE_DESC<double>{sizeof(double),
111*945fa672SMichael Jones                                               PrimaryType::Float};
112*945fa672SMichael Jones   template <>
113*945fa672SMichael Jones   static constexpr TypeDesc TYPE_DESC<long double>{sizeof(long double),
114*945fa672SMichael Jones                                                    PrimaryType::Float};
115*945fa672SMichael Jones   template <>
116*945fa672SMichael Jones   static constexpr TypeDesc TYPE_DESC<void *>{sizeof(void *),
117*945fa672SMichael Jones                                               PrimaryType::Pointer};
118*945fa672SMichael Jones   template <>
119*945fa672SMichael Jones   static constexpr TypeDesc TYPE_DESC<void>{0, PrimaryType::Integer};
120*945fa672SMichael Jones 
set_type_desc(size_t index,TypeDesc value)121*945fa672SMichael Jones   void inline set_type_desc(size_t index, TypeDesc value) {
122*945fa672SMichael Jones     if (index != 0 && index <= DESC_ARR_LEN)
123*945fa672SMichael Jones       desc_arr[index - 1] = value;
124*945fa672SMichael Jones   }
125*945fa672SMichael Jones 
126*945fa672SMichael Jones   // get_arg_value gets the value from the arg list at index (starting at 1).
127*945fa672SMichael Jones   // This may require parsing the format string. An index of 0 is interpreted as
128*945fa672SMichael Jones   // the next value.
get_arg_value(size_t index)129*945fa672SMichael Jones   template <class T> T inline get_arg_value(size_t index) {
130*945fa672SMichael Jones     if (!(index == 0 || index == args_index))
131*945fa672SMichael Jones       args_to_index(index);
132*945fa672SMichael Jones 
133*945fa672SMichael Jones     set_type_desc(index, TYPE_DESC<T>);
134*945fa672SMichael Jones 
135*945fa672SMichael Jones     ++args_index;
136*945fa672SMichael Jones     return get_next_arg_value<T>();
137*945fa672SMichael Jones   }
138*945fa672SMichael Jones 
139*945fa672SMichael Jones   // the ArgList can only return the next item in the list. This function is
140*945fa672SMichael Jones   // used in index mode when the item that needs to be read is not the next one.
141*945fa672SMichael Jones   // It moves cur_args to the index requested so the the appropriate value may
142*945fa672SMichael Jones   // be read. This may involve parsing the format string, and is in the worst
143*945fa672SMichael Jones   // case an O(n^2) operation.
144*945fa672SMichael Jones   void args_to_index(size_t index);
145*945fa672SMichael Jones 
146*945fa672SMichael Jones   // get_type_desc assumes that this format string uses index mode. It iterates
147*945fa672SMichael Jones   // through the format string until it finds a format specifier that defines
148*945fa672SMichael Jones   // the type of index, and returns a TypeDesc describing that type. It does not
149*945fa672SMichael Jones   // modify cur_pos.
150*945fa672SMichael Jones   TypeDesc get_type_desc(size_t index);
151*945fa672SMichael Jones 
152*945fa672SMichael Jones #endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
1534f4752eeSMichael Jones };
1544f4752eeSMichael Jones 
1554f4752eeSMichael Jones } // namespace printf_core
1564f4752eeSMichael Jones } // namespace __llvm_libc
1574f4752eeSMichael Jones 
1584f4752eeSMichael Jones #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
159