1 //===--- amdgpu/impl/msgpack.h ------------------------------------ C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef MSGPACK_H
9 #define MSGPACK_H
10 
11 #include <functional>
12 
13 namespace msgpack {
14 
15 // The message pack format is dynamically typed, schema-less. Format is:
16 // message: [type][header][payload]
17 // where type is one byte, header length is a fixed length function of type
18 // payload is zero to N bytes, with the length encoded in [type][header]
19 
20 // Scalar fields include boolean, signed integer, float, string etc
21 // Composite types are sequences of messages
22 // Array field is [header][element][element]...
23 // Map field is [header][key][value][key][value]...
24 
25 // Multibyte integer fields are big endian encoded
26 // The map key can be any message type
27 // Maps may contain duplicate keys
28 // Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
29 // in as many as nine, as signed or unsigned. Implementation defined.
30 // Similarly "foo" may embed the length in the type field or in multiple bytes
31 
32 // This parser is structured as an iterator over a sequence of bytes.
33 // It calls a user provided function on each message in order to extract fields
34 // The default implementation for each scalar type is to do nothing. For map or
35 // arrays, the default implementation returns just after that message to support
36 // iterating to the next message, but otherwise has no effect.
37 
38 struct byte_range {
39   const unsigned char *start;
40   const unsigned char *end;
41 };
42 
43 const unsigned char *skip_next_message(const unsigned char *start,
44                                        const unsigned char *end);
45 
46 template <typename Derived> class functors_defaults {
47 public:
cb_string(size_t N,const unsigned char * str)48   void cb_string(size_t N, const unsigned char *str) {
49     derived().handle_string(N, str);
50   }
cb_boolean(bool x)51   void cb_boolean(bool x) { derived().handle_boolean(x); }
cb_signed(int64_t x)52   void cb_signed(int64_t x) { derived().handle_signed(x); }
cb_unsigned(uint64_t x)53   void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
cb_array_elements(byte_range bytes)54   void cb_array_elements(byte_range bytes) {
55     derived().handle_array_elements(bytes);
56   }
cb_map_elements(byte_range key,byte_range value)57   void cb_map_elements(byte_range key, byte_range value) {
58     derived().handle_map_elements(key, value);
59   }
cb_array(uint64_t N,byte_range bytes)60   const unsigned char *cb_array(uint64_t N, byte_range bytes) {
61     return derived().handle_array(N, bytes);
62   }
cb_map(uint64_t N,byte_range bytes)63   const unsigned char *cb_map(uint64_t N, byte_range bytes) {
64     return derived().handle_map(N, bytes);
65   }
66 
67 private:
derived()68   Derived &derived() { return *static_cast<Derived *>(this); }
69 
70   // Default implementations for scalar ops are no-ops
handle_string(size_t,const unsigned char *)71   void handle_string(size_t, const unsigned char *) {}
handle_boolean(bool)72   void handle_boolean(bool) {}
handle_signed(int64_t)73   void handle_signed(int64_t) {}
handle_unsigned(uint64_t)74   void handle_unsigned(uint64_t) {}
handle_array_elements(byte_range)75   void handle_array_elements(byte_range) {}
handle_map_elements(byte_range,byte_range)76   void handle_map_elements(byte_range, byte_range) {}
77 
78   // Default implementation for sequences is to skip over the messages
handle_array(uint64_t N,byte_range bytes)79   const unsigned char *handle_array(uint64_t N, byte_range bytes) {
80     for (uint64_t i = 0; i < N; i++) {
81       const unsigned char *next = skip_next_message(bytes.start, bytes.end);
82       if (!next) {
83         return nullptr;
84       }
85       cb_array_elements(bytes);
86       bytes.start = next;
87     }
88     return bytes.start;
89   }
handle_map(uint64_t N,byte_range bytes)90   const unsigned char *handle_map(uint64_t N, byte_range bytes) {
91     for (uint64_t i = 0; i < N; i++) {
92       const unsigned char *start_key = bytes.start;
93       const unsigned char *end_key = skip_next_message(start_key, bytes.end);
94       if (!end_key) {
95         return nullptr;
96       }
97       const unsigned char *start_value = end_key;
98       const unsigned char *end_value =
99           skip_next_message(start_value, bytes.end);
100       if (!end_value) {
101         return nullptr;
102       }
103       cb_map_elements({start_key, end_key}, {start_value, end_value});
104       bytes.start = end_value;
105     }
106     return bytes.start;
107   }
108 };
109 
110 typedef enum : uint8_t {
111 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
112 #include "msgpack.def"
113 #undef X
114 } type;
115 
116 [[noreturn]] void internal_error();
117 type parse_type(unsigned char x);
118 unsigned bytes_used_fixed(type ty);
119 
120 typedef uint64_t (*payload_info_t)(const unsigned char *);
121 payload_info_t payload_info(msgpack::type ty);
122 
123 template <typename T, typename R> R bitcast(T x);
124 
125 template <typename F, msgpack::type ty>
handle_msgpack_given_type(byte_range bytes,F f)126 const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
127   const unsigned char *start = bytes.start;
128   const unsigned char *end = bytes.end;
129   const uint64_t available = end - start;
130   assert(available != 0);
131   assert(ty == parse_type(*start));
132 
133   const uint64_t bytes_used = bytes_used_fixed(ty);
134   if (available < bytes_used) {
135     return 0;
136   }
137   const uint64_t available_post_header = available - bytes_used;
138 
139   const payload_info_t info = payload_info(ty);
140   const uint64_t N = info(start);
141 
142   switch (ty) {
143   case msgpack::t:
144   case msgpack::f: {
145     // t is 0b11000010, f is 0b11000011, masked with 0x1
146     f.cb_boolean(N);
147     return start + bytes_used;
148   }
149 
150   case msgpack::posfixint:
151   case msgpack::uint8:
152   case msgpack::uint16:
153   case msgpack::uint32:
154   case msgpack::uint64: {
155     f.cb_unsigned(N);
156     return start + bytes_used;
157   }
158 
159   case msgpack::negfixint:
160   case msgpack::int8:
161   case msgpack::int16:
162   case msgpack::int32:
163   case msgpack::int64: {
164     f.cb_signed(bitcast<uint64_t, int64_t>(N));
165     return start + bytes_used;
166   }
167 
168   case msgpack::fixstr:
169   case msgpack::str8:
170   case msgpack::str16:
171   case msgpack::str32: {
172     if (available_post_header < N) {
173       return 0;
174     } else {
175       f.cb_string(N, start + bytes_used);
176       return start + bytes_used + N;
177     }
178   }
179 
180   case msgpack::fixarray:
181   case msgpack::array16:
182   case msgpack::array32: {
183     return f.cb_array(N, {start + bytes_used, end});
184   }
185 
186   case msgpack::fixmap:
187   case msgpack::map16:
188   case msgpack::map32: {
189     return f.cb_map(N, {start + bytes_used, end});
190   }
191 
192   case msgpack::nil:
193   case msgpack::bin8:
194   case msgpack::bin16:
195   case msgpack::bin32:
196   case msgpack::float32:
197   case msgpack::float64:
198   case msgpack::ext8:
199   case msgpack::ext16:
200   case msgpack::ext32:
201   case msgpack::fixext1:
202   case msgpack::fixext2:
203   case msgpack::fixext4:
204   case msgpack::fixext8:
205   case msgpack::fixext16:
206   case msgpack::never_used: {
207     if (available_post_header < N) {
208       return 0;
209     }
210     return start + bytes_used + N;
211   }
212   }
213   internal_error();
214 }
215 
216 template <typename F>
handle_msgpack(byte_range bytes,F f)217 const unsigned char *handle_msgpack(byte_range bytes, F f) {
218   const unsigned char *start = bytes.start;
219   const unsigned char *end = bytes.end;
220   const uint64_t available = end - start;
221   if (available == 0) {
222     return 0;
223   }
224   const type ty = parse_type(*start);
225 
226   switch (ty) {
227 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
228   case msgpack::NAME:                                                          \
229     return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
230 #include "msgpack.def"
231 #undef X
232   }
233 
234   internal_error();
235 }
236 
237 bool message_is_string(byte_range bytes, const char *str);
238 
foronly_string(byte_range bytes,C callback)239 template <typename C> void foronly_string(byte_range bytes, C callback) {
240   struct inner : functors_defaults<inner> {
241     inner(C &cb) : cb(cb) {}
242     C &cb;
243     void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
244   };
245   handle_msgpack<inner>(bytes, {callback});
246 }
247 
foronly_unsigned(byte_range bytes,C callback)248 template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
249   struct inner : functors_defaults<inner> {
250     inner(C &cb) : cb(cb) {}
251     C &cb;
252     void handle_unsigned(uint64_t x) { cb(x); }
253   };
254   handle_msgpack<inner>(bytes, {callback});
255 }
256 
foreach_array(byte_range bytes,C callback)257 template <typename C> void foreach_array(byte_range bytes, C callback) {
258   struct inner : functors_defaults<inner> {
259     inner(C &cb) : cb(cb) {}
260     C &cb;
261     void handle_array_elements(byte_range element) { cb(element); }
262   };
263   handle_msgpack<inner>(bytes, {callback});
264 }
265 
foreach_map(byte_range bytes,C callback)266 template <typename C> void foreach_map(byte_range bytes, C callback) {
267   struct inner : functors_defaults<inner> {
268     inner(C &cb) : cb(cb) {}
269     C &cb;
270     void handle_map_elements(byte_range key, byte_range value) {
271       cb(key, value);
272     }
273   };
274   handle_msgpack<inner>(bytes, {callback});
275 }
276 
277 // Crude approximation to json
278 void dump(byte_range);
279 
280 } // namespace msgpack
281 
282 #endif
283