1 #ifndef MSGPACK_H 2 #define MSGPACK_H 3 4 #include <functional> 5 6 namespace msgpack { 7 8 // The message pack format is dynamically typed, schema-less. Format is: 9 // message: [type][header][payload] 10 // where type is one byte, header length is a fixed length function of type 11 // payload is zero to N bytes, with the length encoded in [type][header] 12 13 // Scalar fields include boolean, signed integer, float, string etc 14 // Composite types are sequences of messages 15 // Array field is [header][element][element]... 16 // Map field is [header][key][value][key][value]... 17 18 // Multibyte integer fields are big endian encoded 19 // The map key can be any message type 20 // Maps may contain duplicate keys 21 // Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or 22 // in as many as nine, as signed or unsigned. Implementation defined. 23 // Similarly "foo" may embed the length in the type field or in multiple bytes 24 25 // This parser is structured as an iterator over a sequence of bytes. 26 // It calls a user provided function on each message in order to extract fields 27 // The default implementation for each scalar type is to do nothing. For map or 28 // arrays, the default implementation returns just after that message to support 29 // iterating to the next message, but otherwise has no effect. 30 31 struct byte_range { 32 const unsigned char *start; 33 const unsigned char *end; 34 }; 35 36 const unsigned char *skip_next_message(const unsigned char *start, 37 const unsigned char *end); 38 39 template <typename Derived> class functors_defaults { 40 public: 41 void cb_string(size_t N, const unsigned char *str) { 42 derived().handle_string(N, str); 43 } 44 void cb_boolean(bool x) { derived().handle_boolean(x); } 45 void cb_signed(int64_t x) { derived().handle_signed(x); } 46 void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); } 47 void cb_array_elements(byte_range bytes) { 48 derived().handle_array_elements(bytes); 49 } 50 void cb_map_elements(byte_range key, byte_range value) { 51 derived().handle_map_elements(key, value); 52 } 53 const unsigned char *cb_array(uint64_t N, byte_range bytes) { 54 return derived().handle_array(N, bytes); 55 } 56 const unsigned char *cb_map(uint64_t N, byte_range bytes) { 57 return derived().handle_map(N, bytes); 58 } 59 60 private: 61 Derived &derived() { return *static_cast<Derived *>(this); } 62 63 // Default implementations for scalar ops are no-ops 64 void handle_string(size_t, const unsigned char *) {} 65 void handle_boolean(bool) {} 66 void handle_signed(int64_t) {} 67 void handle_unsigned(uint64_t) {} 68 void handle_array_elements(byte_range) {} 69 void handle_map_elements(byte_range, byte_range) {} 70 71 // Default implementation for sequences is to skip over the messages 72 const unsigned char *handle_array(uint64_t N, byte_range bytes) { 73 for (uint64_t i = 0; i < N; i++) { 74 const unsigned char *next = skip_next_message(bytes.start, bytes.end); 75 if (!next) { 76 return nullptr; 77 } 78 cb_array_elements(bytes); 79 bytes.start = next; 80 } 81 return bytes.start; 82 } 83 const unsigned char *handle_map(uint64_t N, byte_range bytes) { 84 for (uint64_t i = 0; i < N; i++) { 85 const unsigned char *start_key = bytes.start; 86 const unsigned char *end_key = skip_next_message(start_key, bytes.end); 87 if (!end_key) { 88 return nullptr; 89 } 90 const unsigned char *start_value = end_key; 91 const unsigned char *end_value = 92 skip_next_message(start_value, bytes.end); 93 if (!end_value) { 94 return nullptr; 95 } 96 cb_map_elements({start_key, end_key}, {start_value, end_value}); 97 bytes.start = end_value; 98 } 99 return bytes.start; 100 } 101 }; 102 103 typedef enum : uint8_t { 104 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME, 105 #include "msgpack.def" 106 #undef X 107 } type; 108 109 [[noreturn]] void internal_error(); 110 type parse_type(unsigned char x); 111 unsigned bytes_used_fixed(type ty); 112 113 typedef uint64_t (*payload_info_t)(const unsigned char *); 114 payload_info_t payload_info(msgpack::type ty); 115 116 template <typename T, typename R> R bitcast(T x); 117 118 template <typename F, msgpack::type ty> 119 const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) { 120 const unsigned char *start = bytes.start; 121 const unsigned char *end = bytes.end; 122 const uint64_t available = end - start; 123 assert(available != 0); 124 assert(ty == parse_type(*start)); 125 126 const uint64_t bytes_used = bytes_used_fixed(ty); 127 if (available < bytes_used) { 128 return 0; 129 } 130 const uint64_t available_post_header = available - bytes_used; 131 132 const payload_info_t info = payload_info(ty); 133 const uint64_t N = info(start); 134 135 switch (ty) { 136 case msgpack::t: 137 case msgpack::f: { 138 // t is 0b11000010, f is 0b11000011, masked with 0x1 139 f.cb_boolean(N); 140 return start + bytes_used; 141 } 142 143 case msgpack::posfixint: 144 case msgpack::uint8: 145 case msgpack::uint16: 146 case msgpack::uint32: 147 case msgpack::uint64: { 148 f.cb_unsigned(N); 149 return start + bytes_used; 150 } 151 152 case msgpack::negfixint: 153 case msgpack::int8: 154 case msgpack::int16: 155 case msgpack::int32: 156 case msgpack::int64: { 157 f.cb_signed(bitcast<uint64_t, int64_t>(N)); 158 return start + bytes_used; 159 } 160 161 case msgpack::fixstr: 162 case msgpack::str8: 163 case msgpack::str16: 164 case msgpack::str32: { 165 if (available_post_header < N) { 166 return 0; 167 } else { 168 f.cb_string(N, start + bytes_used); 169 return start + bytes_used + N; 170 } 171 } 172 173 case msgpack::fixarray: 174 case msgpack::array16: 175 case msgpack::array32: { 176 return f.cb_array(N, {start + bytes_used, end}); 177 } 178 179 case msgpack::fixmap: 180 case msgpack::map16: 181 case msgpack::map32: { 182 return f.cb_map(N, {start + bytes_used, end}); 183 } 184 185 case msgpack::nil: 186 case msgpack::bin8: 187 case msgpack::bin16: 188 case msgpack::bin32: 189 case msgpack::float32: 190 case msgpack::float64: 191 case msgpack::ext8: 192 case msgpack::ext16: 193 case msgpack::ext32: 194 case msgpack::fixext1: 195 case msgpack::fixext2: 196 case msgpack::fixext4: 197 case msgpack::fixext8: 198 case msgpack::fixext16: 199 case msgpack::never_used: { 200 if (available_post_header < N) { 201 return 0; 202 } 203 return start + bytes_used + N; 204 } 205 } 206 internal_error(); 207 } 208 209 template <typename F> 210 const unsigned char *handle_msgpack(byte_range bytes, F f) { 211 const unsigned char *start = bytes.start; 212 const unsigned char *end = bytes.end; 213 const uint64_t available = end - start; 214 if (available == 0) { 215 return 0; 216 } 217 const type ty = parse_type(*start); 218 219 switch (ty) { 220 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \ 221 case msgpack::NAME: \ 222 return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f); 223 #include "msgpack.def" 224 #undef X 225 } 226 227 internal_error(); 228 } 229 230 bool message_is_string(byte_range bytes, const char *str); 231 232 template <typename C> void foronly_string(byte_range bytes, C callback) { 233 struct inner : functors_defaults<inner> { 234 inner(C &cb) : cb(cb) {} 235 C &cb; 236 void handle_string(size_t N, const unsigned char *str) { cb(N, str); } 237 }; 238 handle_msgpack<inner>(bytes, {callback}); 239 } 240 241 template <typename C> void foronly_unsigned(byte_range bytes, C callback) { 242 struct inner : functors_defaults<inner> { 243 inner(C &cb) : cb(cb) {} 244 C &cb; 245 void handle_unsigned(uint64_t x) { cb(x); } 246 }; 247 handle_msgpack<inner>(bytes, {callback}); 248 } 249 250 template <typename C> void foreach_array(byte_range bytes, C callback) { 251 struct inner : functors_defaults<inner> { 252 inner(C &cb) : cb(cb) {} 253 C &cb; 254 void handle_array_elements(byte_range element) { cb(element); } 255 }; 256 handle_msgpack<inner>(bytes, {callback}); 257 } 258 259 template <typename C> void foreach_map(byte_range bytes, C callback) { 260 struct inner : functors_defaults<inner> { 261 inner(C &cb) : cb(cb) {} 262 C &cb; 263 void handle_map_elements(byte_range key, byte_range value) { 264 cb(key, value); 265 } 266 }; 267 handle_msgpack<inner>(bytes, {callback}); 268 } 269 270 // Crude approximation to json 271 void dump(byte_range); 272 273 } // namespace msgpack 274 275 #endif 276