1 #ifndef __MOS_API_H_ 2 #define __MOS_API_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #include <netinet/if_ether.h> 7 #else 8 #include <linux/tcp.h> 9 #include <linux/if_ether.h> 10 #endif 11 #include <netinet/in.h> 12 #include <arpa/inet.h> 13 #include <netinet/ip.h> 14 #include <stddef.h> /* for offsetof */ 15 #include "mtcp_epoll.h" 16 #include <stdbool.h> 17 18 #ifndef __MTCP_MANAGER 19 #define __MTCP_MANAGER 20 typedef struct mtcp_manager * mtcp_manager_t; 21 #endif 22 #ifndef __SOCKET_MAP 23 #define __SOCKET_MAP 24 typedef struct socket_map * socket_map_t; 25 #endif 26 27 /** Available hooking points */ 28 enum mtcp_hook_point 29 { 30 /* NOTE: The value of hooking points should not overlap with any of 31 * mos_event_types */ 32 33 /** Very first hooking point of incoming packet even before flow 34 * identification*/ 35 MOS_NULL = (1 << 29), 36 /** Hooking point before TCP receiver */ 37 MOS_HK_RCV = (1 << 30), 38 /** Hooking point after TCP sender */ 39 MOS_HK_SND = (1 << 31), 40 }; 41 42 /** Built-in events provided by mOS */ 43 enum mos_event_type 44 { 45 /** invalid event */ 46 MOS_NULL_EVENT = (0), 47 /* mos-defined tcp build-in events */ 48 /** A packet is coming in. */ 49 MOS_ON_PKT_IN = (0x1<<0), 50 /** A packet is going out. */ 51 /* THIS EVENT IS NOW DEPRECATED (USED ONLY FOR DEBUGGING) */ 52 MOS_ON_PKT_OUT = (0x1<<1), 53 /** SYN packet as seen by the monitor 54 * client side: activated when the client state is set to SYN_SENT 55 * server side: activated when the server state is set to SYN_RCVD 56 * 57 * Retransmitted SYN packets don't activate this event. 58 */ 59 MOS_ON_CONN_START = (0x1<<2), 60 /** 3-way handshake is finished. 61 * server side: ACK is coming in as a response of SYNACK. 62 * client side: SYNACK is coming in as a response of SYN. */ 63 /* THIS EVENT IS NOW DEPRECATED */ 64 MOS_ON_CONN_SETUP = (0x1<<3), 65 /** New data is now readable. 66 * This event is available in only MOS_NULL hook point. 67 * mOS raises this event only once while batched packet processing. */ 68 MOS_ON_CONN_NEW_DATA = (0x1<<4), 69 /** Abnormal behavior is detected. 70 * NOTE: This is not fully implemented yet. */ 71 MOS_ON_ERROR = (0x1<<5), 72 /** No packet is seen for a long time. 73 * This is implemented as mtcp_cb_settimer() 74 */ 75 MOS_ON_TIMEOUT = (0x1<<6), 76 /** TCP state is being changed. */ 77 MOS_ON_TCP_STATE_CHANGE = (0x1<<7), 78 /** A packet is not SYN and has no identified flow. */ 79 MOS_ON_ORPHAN = (0x1<<8), 80 /** Retransmission is detected */ 81 MOS_ON_REXMIT = (0x1<<9), 82 /** A flow is about to be destroyed. 83 * 4-way handshake, RST packet or timeout could be the reason. 84 * NOTE: In current implementation, mOS raises this event while destroying 85 * `struct tcp_stream`. There is possibility of false-positive especially 86 * when mOS is running out of memory. */ 87 MOS_ON_CONN_END = (0x1<<10), 88 89 /** This event is for debugging. We can easily mute this later. */ 90 MOS_ON_DEBUG_MESSAGE = (0x1<<11), 91 }; 92 93 #if 0 94 /* This may go away in future revisions */ 95 typedef union event_data { 96 uint32_t u32; 97 uint64_t u64; 98 void *ptr; 99 } event_data_t; 100 #endif 101 102 /* Macros for updating packet context */ 103 #define MOS_ETH_HDR (1 << 0) 104 #define MOS_IP_HDR (1 << 1) 105 #define MOS_TCP_HDR (1 << 2) 106 #define MOS_TCP_PAYLOAD (1 << 3) 107 #define MOS_UPDATE_IP_CHKSUM (1 << 4) 108 #define MOS_UPDATE_TCP_CHKSUM (1 << 5) 109 #define MOS_DROP (1 << 6) 110 #define MOS_OVERWRITE (1 << 7) 111 #define MOS_CHOMP (1 << 8) 112 #define MOS_INSERT (1 << 9) 113 114 /** 115 * struct pkt_info is the struct that is actually 116 * exposed to the monitor application. 117 * 118 * NOTE: When you retrieve the packet information using mtcp_getlastpkt() 119 * via MOS_SOCK_MONITOR_RAW socket, you can only use up to L3 information. 120 * (cur_ts, eth_len, ip_len, ethh, iph) 121 */ 122 struct pkt_info { 123 uint32_t cur_ts; /**< packet receiving time (read-only:ro) */ 124 125 /* ETH */ 126 uint16_t eth_len; 127 128 /* IP */ 129 uint16_t ip_len; 130 131 /* TCP */ 132 uint64_t offset; /**< TCP ring buffer offset */ 133 uint16_t payloadlen; 134 uint32_t seq; 135 uint32_t ack_seq; 136 uint16_t window; 137 138 /* ~~ 28 byte boundary ~~ */ 139 140 /* 141 * CAUTION!!! 142 * It is extremely critical that the last 5 fields (ethh .. frame) 143 * are always placed at the end of the definition. MOS relies on 144 * this specific arrangement when it is creating a new instantiation 145 * of pctx during mtcp_getlastpkt() invocation. 146 */ 147 struct ethhdr *ethh; 148 struct iphdr *iph; 149 struct tcphdr *tcph; 150 uint8_t *payload; 151 }; 152 153 /** 154 * PACKET CONTEXT is the packet structure that goes through 155 * the mOS core... 156 */ 157 struct pkt_ctx { 158 struct pkt_info p; 159 160 int8_t direction; /**< where does this packet originate from? (ro)*/ 161 uint8_t forward; /**< 0: drop, 1: forward to out_ifidx (rw) */ 162 int8_t in_ifidx; /**< input interface (ro) */ 163 int8_t out_ifidx; /**< output interface (rw) */ 164 int8_t batch_index; /**< index of packet in the rx batch */ 165 /* ~~ 64 byte boundary ~~ */ 166 }; 167 #define PKT_INFO_LEN offsetof(struct pkt_info, ethh) 168 169 /* 170 * Sequence number change structure. 171 * Used for MOS_SEQ_REMAP. 172 */ 173 typedef struct { 174 int64_t seq_off; /* the amount of sequence number drift */ 175 int side; /* which side does this sequence number change apply to? */ 176 uint32_t base_seq; /* seq # of the flow where the actual sequence # translation starts */ 177 } seq_remap_info; 178 179 typedef struct filter_arg { 180 void *arg; 181 size_t len; 182 } filter_arg_t; 183 184 /** 185 * The available level number in the POSIX library for sockets is 186 * on SOL_SOCKET 187 */ 188 #ifndef SOL_SOCKET 189 /* Level number for (get/set)sockopt() to apply to socket itself. */ 190 #define SOL_SOCKET 0xffff /* options for socket level */ 191 #endif 192 #define SOL_MONSOCKET 0xfffe /* MOS monitor socket level */ 193 194 /** 195 * MOS monitor socket option names (and values) 196 * This will contain options pertaining to monitor stream sockets 197 * 198 * MOS_FRAGINFO_CLIBUF : Gives back offsets to fragments of buffers 199 * (optname) currently stored in client's TCP ring buffer. 200 * (getsockopt) 201 * 202 * MOS_FRAGINFO_SVRBUF : Gives back offsets to fragments of buffers 203 * (optname) currently stored in server's TCP ring buffer. 204 * (getsockopt) 205 * 206 * MOS_INFO_CLIBUF : Gives back tcp info for client-side ring buffer. 207 * (optname) (getsockopt) 208 * 209 * MOS_INFO_SVRBUF : Gives back tcp info for server-side ring buffer. 210 * (optname) (getsockopt) 211 * 212 * MOS_TCP_STATE_CLI : Retrieves current TCP state for client side 213 * (optname) (getsockopt) 214 * 215 * MOS_TCP_STATE_SVR : Retrieves current TCP state for server side 216 * (optname) (getsockopt) 217 * 218 * MOS_TIMESTAMP : Retrieves timestamp of last packet seen for 219 * (optname) given flow. (in usecs) 220 * (getsockopt) 221 * 222 * MOS_SEQ_REMAP : Changes the sequence number change 223 * (optname) (setsockopt) 224 * 225 * MOS_STOP_MON : Stop monitoring 226 * (optname) (setsockopt) 227 */ 228 enum mos_socket_opts { 229 MOS_FRAGINFO_CLIBUF = 0x01, 230 MOS_FRAGINFO_SVRBUF = 0x02, 231 MOS_INFO_CLIBUF = 0x03, 232 MOS_INFO_SVRBUF = 0x04, 233 MOS_TCP_STATE_CLI = 0x05, 234 MOS_TCP_STATE_SVR = 0x06, 235 MOS_TIMESTAMP = 0x07, 236 MOS_MONLEVEL = 0x08, 237 MOS_CLIBUF = 0x09, 238 MOS_SVRBUF = 0x0a, 239 MOS_SEQ_REMAP = 0x0b, 240 MOS_STOP_MON = 0x0c, 241 MOS_FRAG_CLIBUF = 0x0d, 242 MOS_FRAG_SVRBUF = 0x0e, 243 #ifdef OLD_API 244 MOS_NO_CLIBUF = 0x0f, 245 MOS_NO_SVRBUF = 0x10, 246 #endif 247 }; 248 249 /** 250 * MOS tcp buf info structure. 251 * Used by the monitor application to retreive 252 * tcp_stream-related info. Usually called via 253 * getsockopt() function 254 */ 255 struct tcp_buf_info { 256 /** The initial TCP sequence number of TCP ring buffer. */ 257 uint32_t tcpbi_init_seq; 258 /** TCP sequence number of the 'last byte of payload that has 259 * already been read by the end application' (applies in the case 260 * of embedded monitor setup) 261 */ 262 uint32_t tcpbi_last_byte_read; 263 /** TCP sequence number of the 'last byte of the payload that 264 * is currently buffered and needs to be read by the end 265 * application' (applies in the case of embedded monitor setup). 266 * 267 * In case of standalone monitors, tcpbi_last_byte_read = 268 * tcpbi_next_byte_expected 269 */ 270 uint32_t tcpbi_next_byte_expected; 271 /** TCP sequence number of the 'last byte of the payload that 272 * is currently stored' in the TCP ring buffer. This value 273 * may be greater than tcpbi_next_byte_expected if packets 274 * arrive out of order. 275 */ 276 uint32_t tcpbi_last_byte_received; 277 }; 278 279 /** Structure to expose TCP ring buffer's fragment information. */ 280 struct tcp_ring_fragment { 281 uint64_t offset; 282 uint32_t len; 283 }; 284 285 /** 286 * mOS tcp stream states. 287 * used by the monitor application to retreive 288 * tcp_stream-state info. Usually called via 289 * getsockopt() function 290 */ 291 enum tcpstate 292 { 293 TCP_CLOSED = 0, 294 TCP_LISTEN = 1, 295 TCP_SYN_SENT = 2, 296 TCP_SYN_RCVD = 3, 297 TCP_ESTABLISHED = 4, 298 TCP_FIN_WAIT_1 = 5, 299 TCP_FIN_WAIT_2 = 6, 300 TCP_CLOSE_WAIT = 7, 301 TCP_CLOSING = 8, 302 TCP_LAST_ACK = 9, 303 TCP_TIME_WAIT = 10 304 }; 305 306 /** Definition of event type */ 307 typedef uint64_t event_t; 308 309 /** Definition of monitor side */ 310 enum {MOS_SIDE_CLI=0, MOS_SIDE_SVR, MOS_SIDE_BOTH}; 311 312 /* mos callback/filter function type definition */ 313 /** Prototype of callback function */ 314 typedef void (*callback_t)(mctx_t mctx, int sock, int side, 315 event_t event, filter_arg_t *arg); 316 /** Prototype of UDE's filter function */ 317 typedef bool (*filter_t)(mctx_t mctx, int sock, int side, 318 event_t event, filter_arg_t *arg); 319 320 /*----------------------------------------------------------------------------*/ 321 /* Definition of monitor_filter type */ 322 union monitor_filter { 323 /** For MOS_SOCK_MONITOR_RAW type socket **/ 324 char *raw_pkt_filter; 325 /** For MOS_SOCK_MONITOR_STREAM type socket **/ 326 struct { 327 char *stream_syn_filter; 328 char *stream_orphan_filter; 329 }; 330 }; 331 typedef union monitor_filter *monitor_filter_t; 332 333 /* Assign an address range (specified by ft) to monitor via sock 334 * 335 * (1) If sock is MOS_SOCK_MONITOR_RAW type, ft.raw_pkt_filter is applied to 336 * every packet coming in. 337 * (2) If sock is MOS_SOCK_MONITOR_STREAM type, 338 * ft.stream_syn_filter is applied to the first SYN pkt of the flow. 339 * (The succeeding packets of that flow will bypass the filter operation.) 340 * ft.stream_orphan_filter is applied to the pkts that don't belong to any 341 * of the existing TCP streams which are being monitored. 342 * (e.g., non-SYN pkt with no identified flow) 343 * [*] ft.stream_syn_filter and ft.stream_orphan_filter should be consisted 344 * only of the following keywords: 345 * - 'tcp, 'host', 'src', 'dst', 'net', 'mask', 'port', 'portrange' 346 * - 'and', 'or', '&', '|' 347 * 348 * @param [in] mctx: mtcp context 349 * @param [in] sock: socket id (should be MOS_SOCK_MONITOR_RAW 350 * or MOS_SOCK_MONITOR_STREAM type) 351 * @param [in] cf: Describe a set of connections to accept 352 * in a BPF (Berkerley Packet Filter) format 353 * NULL if you want to monitor any packet 354 * @return zero on success, -1 on error 355 */ 356 int 357 mtcp_bind_monitor_filter(mctx_t mctx, int sock, monitor_filter_t ft); 358 /*----------------------------------------------------------------------------*/ 359 360 /** Register a callback function in hook_point 361 * @param [in] mctx: mtcp context 362 * @param [in] sock: socket id 363 * @param [in] event: event id 364 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_DONTCARE 365 * @param [in] cb: callback fucntion 366 * @return zero on success, -1 on error 367 * 368 * (both for packet-level and flow-level) for events in hook_point 369 */ 370 int 371 mtcp_register_callback(mctx_t mctx, int sock, event_t event, 372 int hook_point, callback_t cb); 373 374 /** Remove registered callback functions 375 * @param [in] mctx: mtcp context 376 * @param [in] sock: socket id 377 * @param [in] event: event id 378 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_NULL 379 * @return zero on success, -1 on error 380 * 381 * (both for packet-level and flow-level) for events in hook_point 382 */ 383 //int 384 //mtcp_unregister_callback(mctx_t mctx, int sock, event_t event, 385 // int hook_point); 386 387 /** Allocate a child event 388 * @param [in] event: event id 389 * @return new event id on success, 0 on error 390 */ 391 event_t 392 mtcp_alloc_event(event_t event); 393 394 /** Define a user-defined event function 395 * @param [in] event: event id 396 * @param [in] filter: filter fucntion for new event 397 * @param [in] arg: a filter argument to be delivered to the filter 398 * @return new event id on success, 0 on error 399 * 400 * (both for packet-level and flow-level) 401 */ 402 event_t 403 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg); 404 405 /** Raise a event 406 * @param [in] mctx: mtcp context 407 * @param [in] event: event id 408 * @return 0 on success, -1 on error 409 */ 410 int 411 mtcp_raise_event(mctx_t mctx, event_t event); 412 413 /* 414 * Callback only functions 415 */ 416 417 /** Set user-level context 418 * (e.g., to store any per-flow user-defined meatadata) 419 * @param [in] mctx: mtcp context 420 * @param [in] sock: the monitor socket id 421 * @param [in] uctx: user-level context 422 */ 423 void 424 mtcp_set_uctx(mctx_t mctx, int sock, void *uctx); 425 426 /** Get user-level context 427 * (e.g., to retrieve user-defined metadata stored in mtcp_set_uctx()) 428 * @param [in] mctx: mtcp context 429 * @param [in] sock: the monitor socket id 430 * @return user-level context for input flow_ocntext 431 */ 432 void * 433 mtcp_get_uctx(mctx_t mctx, int sock); 434 435 /** Peeking bytestream from flow_context 436 * @param [in] mctx: mtcp context 437 * @param [in] sock: monitoring stream socket id 438 * @param [in] side: side of monitoring (client side, server side or both) 439 * @param [in] buf: buffer for read byte stream 440 * @param [in] len: requested length 441 * 442 * It will return the number of bytes actually read. 443 * It will return -1 if there is an error 444 */ 445 ssize_t 446 mtcp_peek(mctx_t mctx, int sock, int side, 447 char *buf, size_t len); 448 449 /** 450 * The mtcp_ppeek() function reads up to count bytes from the TCP ring 451 * buffer of the monitor socket sock in mctx into buf, starting from 452 * the TCP sequence number seq_num. 453 * Note that seq_num can point the data in the fragmented buffer list 454 * of the TCP ring buffer. If there is no received byte with TCP sequence 455 * number seq_num in the TCP ring buffer, it returns error. If there are 456 * received bytes starting from seq_num, count is set to be the number 457 * of bytes read from the buffer. After mtcp_ppeek(), the data in the 458 * TCP ring buffer will not be flushed, and the monitor offset used by 459 * mtcp_peek() is not changed. 460 * 461 * @param [in] mctx: mtcp context 462 * @param [in] sock: monitoring stream socket id 463 * @param [in] side: side of monitoring (client side, server side or both) 464 * @param [in] buf: buffer for read byte stream 465 * @param [in] count: No. of bytes to be read 466 * @param [in] seq_num: byte offset of the TCP bytestream (absolute offset: offset 0 = init_seq_num) 467 * @return # of bytes actually read on success, -1 for error 468 */ 469 ssize_t mtcp_ppeek(mctx_t mctx, int sock, int side, 470 char *buf, size_t count, uint64_t off); 471 472 /* Use this macro to copy packets when mtcp_getlastpkt is called */ 473 #define MTCP_CB_GETCURPKT_CREATE_COPY 474 475 /** Get current packet of mtcp context 476 * @param [in] mctx: mTCP/mOS context 477 * @param [in] sock: monitoring stream socket id 478 * @param [in] side: side of monitoring 479 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 480 * @param [in] p: ptr to packet info ptr 481 * (only L2-L3 information is available for MOS_SOCK_MONITOR_RAW socket) 482 * @return 0 on success, -1 on failure 483 * This is useful for running callback-only applications 484 */ 485 int 486 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *p); 487 488 /** Register user's custom timer 489 * @param [in] mctx: mtcp context 490 * @param [in] id: timer id 491 * @param [in] timeout: timeout length 492 * @param [in] cb: callback function 493 */ 494 int 495 mtcp_settimer(mctx_t mctx, int id, struct timeval *timeout, callback_t cb); 496 497 /** A sibling function to mtcp_settimer that returns 498 * the current timestamp of the machine in microseconds. 499 * This avoids the monitor application to call current 500 * time getter functions (e.g. gettimeofday) that may 501 * incur overhead. 502 * 503 * @param [in] mctx: mtcp context 504 * Returns timestamp on success, 0 on failure. 505 */ 506 uint32_t 507 mtcp_cb_get_ts(mctx_t mctx); 508 509 /** Pause mtcp application context since it is not running anything 510 * @param [in] mctx: mtcp context 511 * 512 * This is useful for running callback-only applications 513 */ 514 void 515 mtcp_app_join(mctx_t mctx); 516 517 /** Get IP addrs/ports for both sides. 518 * (Server IP/port in 0th element) (Client IP/port in 1st element) 519 * Should only be called with MOS_SOCK_MONITOR_STREAM_ACTIVE socket 520 * _NOTE_: Code is currently not set for MOS_SOCK_STREAM!!! 521 * Returns 0 on success, -1 on failure 522 */ 523 int 524 mtcp_getpeername(mctx_t mctx, int sock, struct sockaddr *saddr, socklen_t *addrlen, int side); 525 526 /** 527 * Updates the Ethernet frame at a given offset across 528 * datalen bytes. 529 * 530 * @param [in] mctx: mtcp context 531 * @param [in] sock: monitoring socket 532 * @param [in] side: monitoring side 533 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 534 * @param [in] offset: the offset from where the data needs to be written 535 * @param [in] data: the data buffer that needs to be written 536 * @param [in] datalen: the length of data that needs to be written 537 * @param [in] option: disjunction of MOS_ETH_HDR, MOS_IP_HDR, MOS_TCP_HDR, 538 * MOS_TCP_PAYLOAD, MOS_DROP_PKT, MOS_UPDATE_TCP_CHKSUM, 539 * MOS_UPDATE_IP_CHKSUM 540 * @return Returns 0 on success, -1 on failure 541 * 542 * If you want to chomp/insert something in the payload: 543 * (i) first update the ip header to adjust iph->tot_len field; (MOS_OVERWRITE) 544 * (ii) then update the tcp payload accordingly (MOS_CHOMP or MOS_INSERT) 545 * 546 * MOS_DROP, MOS_OVERWRITE, MOS_CHOMP and MOS_INSERT are mutually 547 * exclusive operations 548 */ 549 int 550 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset, 551 byte *data, uint16_t datalen, int option); 552 553 /** Drop current packet (don't forward it to the peer node) 554 * @param [in] mctx: mtcp context 555 * 556 * This is useful for running callback-only applications 557 * This function is now deprecated... 558 */ 559 //int 560 //mtcp_cb_dropcurpkt(mctx_t mctx); 561 562 /* Reset the connection (send RST to both sides) 563 * (This API will be updated after discussion.) 564 */ 565 int 566 mtcp_reset_conn(mctx_t mctx, int sock); 567 568 int 569 mtcp_set_debug_string(mtcp_manager_t mtcp, const char *fmt, ...); 570 571 int 572 mtcp_get_debug_string(mctx_t mctx, char *buf, int len); 573 574 #endif /* __MOS_API_H_ */ 575