1 //===- Format.h - Utilities for String Format -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares utilities for formatting strings. They are specially
10 // tailored to the needs of TableGen'ing op definitions and rewrite rules,
11 // so they are not expected to be used as widely applicable utilities.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef MLIR_TABLEGEN_FORMAT_H_
16 #define MLIR_TABLEGEN_FORMAT_H_
17
18 #include "mlir/Support/LLVM.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/Support/FormatVariadic.h"
22
23 namespace mlir {
24 namespace tblgen {
25
26 /// Format context containing substitutions for special placeholders.
27 ///
28 /// This context divides special placeholders into two categories: builtin ones
29 /// and custom ones.
30 ///
31 /// Builtin placeholders are baked into `FmtContext` and each one of them has a
32 /// dedicated setter. They can be used in all dialects. Their names follow the
33 /// convention of `$_<name>`. The rationale of the leading underscore is to
34 /// avoid confusion and name collision: op arguments/attributes/results are
35 /// named as $<name>, and we can potentially support referencing those entities
36 /// directly in the format template in the future.
37 //
38 /// Custom ones are registered by dialect-specific TableGen backends and use the
39 /// same unified setter.
40 class FmtContext {
41 public:
42 // Placeholder kinds
43 enum class PHKind : char {
44 None,
45 Custom, // For custom placeholders
46 Builder, // For the $_builder placeholder
47 Op, // For the $_op placeholder
48 Self, // For the $_self placeholder
49 };
50
51 FmtContext() = default;
52
53 // Create a format context with a list of substitutions.
54 FmtContext(ArrayRef<std::pair<StringRef, StringRef>> subs);
55
56 // Setter for custom placeholders
57 FmtContext &addSubst(StringRef placeholder, const Twine &subst);
58
59 // Setters for builtin placeholders
60 FmtContext &withBuilder(Twine subst);
61 FmtContext &withOp(Twine subst);
62 FmtContext &withSelf(Twine subst);
63
64 Optional<StringRef> getSubstFor(PHKind placeholder) const;
65 Optional<StringRef> getSubstFor(StringRef placeholder) const;
66
67 static PHKind getPlaceHolderKind(StringRef str);
68
69 private:
70 struct PHKindInfo : DenseMapInfo<PHKind> {
71 using CharInfo = DenseMapInfo<char>;
72
getEmptyKeyPHKindInfo73 static inline PHKind getEmptyKey() {
74 return static_cast<PHKind>(CharInfo::getEmptyKey());
75 }
getTombstoneKeyPHKindInfo76 static inline PHKind getTombstoneKey() {
77 return static_cast<PHKind>(CharInfo::getTombstoneKey());
78 }
getHashValuePHKindInfo79 static unsigned getHashValue(const PHKind &val) {
80 return CharInfo::getHashValue(static_cast<char>(val));
81 }
82
isEqualPHKindInfo83 static bool isEqual(const PHKind &lhs, const PHKind &rhs) {
84 return lhs == rhs;
85 }
86 };
87
88 llvm::SmallDenseMap<PHKind, std::string, 4, PHKindInfo> builtinSubstMap;
89 llvm::StringMap<std::string> customSubstMap;
90 };
91
92 /// Struct representing a replacement segment for the formatted string. It can
93 /// be a segment of the formatting template (for `Literal`) or a replacement
94 /// parameter (for `PositionalPH`, `PositionalRangePH` and `SpecialPH`).
95 struct FmtReplacement {
96 enum class Type {
97 Empty,
98 Literal,
99 PositionalPH,
100 PositionalRangePH,
101 SpecialPH
102 };
103
104 FmtReplacement() = default;
FmtReplacementFmtReplacement105 explicit FmtReplacement(StringRef literal)
106 : type(Type::Literal), spec(literal) {}
FmtReplacementFmtReplacement107 FmtReplacement(StringRef spec, size_t index)
108 : type(Type::PositionalPH), spec(spec), index(index) {}
FmtReplacementFmtReplacement109 FmtReplacement(StringRef spec, size_t index, size_t end)
110 : type(Type::PositionalRangePH), spec(spec), index(index), end(end) {}
FmtReplacementFmtReplacement111 FmtReplacement(StringRef spec, FmtContext::PHKind placeholder)
112 : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {}
113
114 Type type = Type::Empty;
115 StringRef spec;
116 size_t index = 0;
117 size_t end = kUnset;
118 FmtContext::PHKind placeholder = FmtContext::PHKind::None;
119
120 static constexpr size_t kUnset = -1;
121 };
122
123 class FmtObjectBase {
124 private:
125 static std::pair<FmtReplacement, StringRef> splitFmtSegment(StringRef fmt);
126 static std::vector<FmtReplacement> parseFormatString(StringRef fmt);
127
128 protected:
129 // The parameters are stored in a std::tuple, which does not provide runtime
130 // indexing capabilities. In order to enable runtime indexing, we use this
131 // structure to put the parameters into a std::vector. Since the parameters
132 // are not all the same type, we use some type-erasure by wrapping the
133 // parameters in a template class that derives from a non-template superclass.
134 // Essentially, we are converting a std::tuple<Derived<Ts...>> to a
135 // std::vector<Base*>.
136 struct CreateAdapters {
137 template <typename... Ts>
operatorCreateAdapters138 std::vector<llvm::detail::format_adapter *> operator()(Ts &...items) {
139 return std::vector<llvm::detail::format_adapter *>{&items...};
140 }
141 };
142
143 StringRef fmt;
144 const FmtContext *context;
145 std::vector<llvm::detail::format_adapter *> adapters;
146 std::vector<FmtReplacement> replacements;
147
148 public:
FmtObjectBase(StringRef fmt,const FmtContext * ctx,size_t numParams)149 FmtObjectBase(StringRef fmt, const FmtContext *ctx, size_t numParams)
150 : fmt(fmt), context(ctx), replacements(parseFormatString(fmt)) {}
151
152 FmtObjectBase(const FmtObjectBase &that) = delete;
153
FmtObjectBase(FmtObjectBase && that)154 FmtObjectBase(FmtObjectBase &&that)
155 : fmt(that.fmt), context(that.context),
156 adapters(), // adapters are initialized by FmtObject
157 replacements(std::move(that.replacements)) {}
158
159 void format(llvm::raw_ostream &s) const;
160
str()161 std::string str() const {
162 std::string result;
163 llvm::raw_string_ostream s(result);
164 format(s);
165 return s.str();
166 }
167
168 template <unsigned N>
sstr()169 SmallString<N> sstr() const {
170 SmallString<N> result;
171 llvm::raw_svector_ostream s(result);
172 format(s);
173 return result;
174 }
175
176 template <unsigned N>
177 operator SmallString<N>() const {
178 return sstr<N>();
179 }
180
string()181 operator std::string() const { return str(); }
182 };
183
184 template <typename Tuple>
185 class FmtObject : public FmtObjectBase {
186 // Storage for the parameter adapters. Since the base class erases the type
187 // of the parameters, we have to own the storage for the parameters here, and
188 // have the base class store type-erased pointers into this tuple.
189 Tuple parameters;
190
191 public:
FmtObject(StringRef fmt,const FmtContext * ctx,Tuple && params)192 FmtObject(StringRef fmt, const FmtContext *ctx, Tuple &¶ms)
193 : FmtObjectBase(fmt, ctx, std::tuple_size<Tuple>::value),
194 parameters(std::move(params)) {
195 adapters.reserve(std::tuple_size<Tuple>::value);
196 adapters = llvm::apply_tuple(CreateAdapters(), parameters);
197 }
198
199 FmtObject(FmtObject const &that) = delete;
200
FmtObject(FmtObject && that)201 FmtObject(FmtObject &&that)
202 : FmtObjectBase(std::move(that)), parameters(std::move(that.parameters)) {
203 adapters.reserve(that.adapters.size());
204 adapters = llvm::apply_tuple(CreateAdapters(), parameters);
205 }
206 };
207
208 class FmtStrVecObject : public FmtObjectBase {
209 public:
210 using StrFormatAdapter =
211 decltype(llvm::detail::build_format_adapter(std::declval<std::string>()));
212
213 FmtStrVecObject(StringRef fmt, const FmtContext *ctx,
214 ArrayRef<std::string> params);
215 FmtStrVecObject(FmtStrVecObject const &that) = delete;
216 FmtStrVecObject(FmtStrVecObject &&that);
217
218 private:
219 SmallVector<StrFormatAdapter, 16> parameters;
220 };
221
222 /// Formats text by substituting placeholders in format string with replacement
223 /// parameters.
224 ///
225 /// There are two categories of placeholders accepted, both led by a '$' sign:
226 ///
227 /// 1.a Positional placeholder: $[0-9]+
228 /// 1.b Positional range placeholder: $[0-9]+...
229 /// 2. Special placeholder: $[a-zA-Z_][a-zA-Z0-9_]*
230 ///
231 /// Replacement parameters for positional placeholders are supplied as the
232 /// `vals` parameter pack with 1:1 mapping. That is, $0 will be replaced by the
233 /// first parameter in `vals`, $1 by the second one, and so on. Note that you
234 /// can use the positional placeholders in any order and repeat any times, for
235 /// example, "$2 $1 $1 $0" is accepted.
236 ///
237 /// Replace parameters for positional range placeholders are supplied as if
238 /// positional placeholders were specified with commas separating them.
239 ///
240 /// Replacement parameters for special placeholders are supplied using the `ctx`
241 /// format context.
242 ///
243 /// The `fmt` is recorded as a `StringRef` inside the returned `FmtObject`.
244 /// The caller needs to make sure the underlying data is available when the
245 /// `FmtObject` is used.
246 ///
247 /// `ctx` accepts a nullptr if there is no special placeholder is used.
248 ///
249 /// If no substitution is provided for a placeholder or any error happens during
250 /// format string parsing or replacement, the placeholder will be outputted
251 /// as-is with an additional marker '<no-subst-found>', to aid debugging.
252 ///
253 /// To print a '$' literally, escape it with '$$'.
254 ///
255 /// This utility function is inspired by LLVM formatv(), with modifications
256 /// specially tailored for TableGen C++ generation usage:
257 ///
258 /// 1. This utility use '$' instead of '{' and '}' for denoting the placeholder
259 /// because '{' and '}' are frequently used in C++ code.
260 /// 2. This utility does not support format layout because it is rarely needed
261 /// in C++ code generation.
262 template <typename... Ts>
263 inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&...vals)
264 -> FmtObject<decltype(std::make_tuple(
265 llvm::detail::build_format_adapter(std::forward<Ts>(vals))...))> {
266 using ParamTuple = decltype(std::make_tuple(
267 llvm::detail::build_format_adapter(std::forward<Ts>(vals))...));
268 return FmtObject<ParamTuple>(
269 fmt, ctx,
270 std::make_tuple(
271 llvm::detail::build_format_adapter(std::forward<Ts>(vals))...));
272 }
273
tgfmt(StringRef fmt,const FmtContext * ctx,ArrayRef<std::string> params)274 inline FmtStrVecObject tgfmt(StringRef fmt, const FmtContext *ctx,
275 ArrayRef<std::string> params) {
276 return FmtStrVecObject(fmt, ctx, params);
277 }
278
279 } // namespace tblgen
280 } // namespace mlir
281
282 #endif // MLIR_TABLEGEN_FORMAT_H_
283