1 #ifndef __MOS_API_H_ 2 #define __MOS_API_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #include <netinet/if_ether.h> 7 #else 8 #include <linux/tcp.h> 9 #include <linux/if_ether.h> 10 #endif 11 #include <netinet/in.h> 12 #include <arpa/inet.h> 13 #include <netinet/ip.h> 14 #include <stddef.h> /* for offsetof */ 15 #include "mtcp_epoll.h" 16 #include <stdbool.h> 17 18 #ifndef __MTCP_MANAGER 19 #define __MTCP_MANAGER 20 typedef struct mtcp_manager * mtcp_manager_t; 21 #endif 22 #ifndef __SOCKET_MAP 23 #define __SOCKET_MAP 24 typedef struct socket_map * socket_map_t; 25 #endif 26 27 /** Available hooking points */ 28 enum mtcp_hook_point 29 { 30 /* NOTE: The value of hooking points should not overlap with any of 31 * mos_event_types */ 32 33 /** Very first hooking point of incoming packet even before flow 34 * identification*/ 35 MOS_NULL = (1 << 29), 36 /** Hooking point before TCP receiver */ 37 MOS_HK_RCV = (1 << 30), 38 /** Hooking point after TCP sender */ 39 MOS_HK_SND = (1 << 31), 40 }; 41 42 /** Built-in events provided by mOS */ 43 enum mos_event_type 44 { 45 /** invalid event */ 46 MOS_NULL_EVENT = (0), 47 /* mos-defined tcp build-in events */ 48 /** A packet is coming in. */ 49 MOS_ON_PKT_IN = (0x1<<0), 50 /** A packet is going out. */ 51 /* THIS EVENT IS NOW DEPRECATED (USED ONLY FOR DEBUGGING) */ 52 MOS_ON_PKT_OUT = (0x1<<1), 53 /** SYN packet as seen by the monitor 54 * client side: activated when the client state is set to SYN_SENT 55 * server side: activated when the server state is set to SYN_RCVD 56 * 57 * Retransmitted SYN packets don't activate this event. 58 */ 59 MOS_ON_CONN_START = (0x1<<2), 60 /** 3-way handshake is finished. 61 * server side: ACK is coming in as a response of SYNACK. 62 * client side: SYNACK is coming in as a response of SYN. */ 63 /* THIS EVENT IS NOW DEPRECATED */ 64 MOS_ON_CONN_SETUP = (0x1<<3), 65 /** New data is now readable. 66 * This event is available in only MOS_NULL hook point. 67 * mOS raises this event only once while batched packet processing. */ 68 MOS_ON_CONN_NEW_DATA = (0x1<<4), 69 /** Abnormal behavior is detected. 70 * NOTE: This is not fully implemented yet. */ 71 MOS_ON_ERROR = (0x1<<5), 72 /** No packet is seen for a long time. 73 * This is implemented as mtcp_cb_settimer() 74 */ 75 MOS_ON_TIMEOUT = (0x1<<6), 76 /** TCP state is being changed. */ 77 MOS_ON_TCP_STATE_CHANGE = (0x1<<7), 78 /** A packet is not SYN and has no identified flow. */ 79 MOS_ON_ORPHAN = (0x1<<8), 80 /** Retransmission is detected */ 81 MOS_ON_REXMIT = (0x1<<9), 82 /** A flow is about to be destroyed. 83 * 4-way handshake, RST packet or timeout could be the reason. 84 * NOTE: In current implementation, mOS raises this event while destroying 85 * `struct tcp_stream`. There is possibility of false-positive especially 86 * when mOS is running out of memory. */ 87 MOS_ON_CONN_END = (0x1<<10), 88 89 /** This event is for debugging. We can easily mute this later. */ 90 MOS_ON_DEBUG_MESSAGE = (0x1<<11), 91 }; 92 93 #if 0 94 /* This may go away in future revisions */ 95 typedef union event_data { 96 uint32_t u32; 97 uint64_t u64; 98 void *ptr; 99 } event_data_t; 100 #endif 101 102 /* Macros for updating packet context */ 103 #define MOS_ETH_HDR (1 << 0) 104 #define MOS_IP_HDR (1 << 1) 105 #define MOS_TCP_HDR (1 << 2) 106 #define MOS_TCP_PAYLOAD (1 << 3) 107 #define MOS_UPDATE_IP_CHKSUM (1 << 4) 108 #define MOS_UPDATE_TCP_CHKSUM (1 << 5) 109 #define MOS_DROP (1 << 6) 110 #define MOS_OVERWRITE (1 << 7) 111 #define MOS_CHOMP (1 << 8) 112 #define MOS_INSERT (1 << 9) 113 114 /** 115 * struct pkt_info is the struct that is actually 116 * exposed to the monitor application. 117 * 118 * NOTE: When you retrieve the packet information using mtcp_getlastpkt() 119 * via MOS_SOCK_MONITOR_RAW socket, you can only use up to L3 information. 120 * (cur_ts, eth_len, ip_len, ethh, iph) 121 */ 122 struct pkt_info { 123 uint32_t cur_ts; /**< packet receiving time (read-only:ro) */ 124 int8_t in_ifidx; /**< input interface (ro) */ 125 126 /* ETH */ 127 uint16_t eth_len; 128 129 /* IP */ 130 uint16_t ip_len; 131 132 /* TCP */ 133 uint64_t offset; /**< TCP ring buffer offset */ 134 uint16_t payloadlen; 135 uint32_t seq; 136 uint32_t ack_seq; 137 uint16_t window; 138 139 /* ~~ 28 byte boundary ~~ */ 140 141 /* 142 * CAUTION!!! 143 * It is extremely critical that the last 5 fields (ethh .. frame) 144 * are always placed at the end of the definition. MOS relies on 145 * this specific arrangement when it is creating a new instantiation 146 * of pctx during mtcp_getlastpkt() invocation. 147 */ 148 struct ethhdr *ethh; 149 struct iphdr *iph; 150 struct tcphdr *tcph; 151 uint8_t *payload; 152 }; 153 154 /** 155 * PACKET CONTEXT is the packet structure that goes through 156 * the mOS core... 157 */ 158 struct pkt_ctx { 159 struct pkt_info p; 160 161 int8_t direction; /**< where does this packet originate from? (ro)*/ 162 uint8_t forward; /**< 0: drop, 1: forward to out_ifidx (rw) */ 163 int8_t out_ifidx; /**< output interface (rw) */ 164 int8_t batch_index; /**< index of packet in the rx batch */ 165 /* ~~ 64 byte boundary ~~ */ 166 }; 167 #define PKT_INFO_LEN offsetof(struct pkt_info, ethh) 168 169 /* 170 * Sequence number change structure. 171 * Used for MOS_SEQ_REMAP. 172 */ 173 typedef struct { 174 int64_t seq_off; /* the amount of sequence number drift */ 175 int side; /* which side does this sequence number change apply to? */ 176 uint32_t base_seq; /* seq # of the flow where the actual sequence # translation starts */ 177 } seq_remap_info; 178 179 typedef struct filter_arg { 180 void *arg; 181 size_t len; 182 } filter_arg_t; 183 184 /** 185 * The available level number in the POSIX library for sockets is 186 * on SOL_SOCKET 187 */ 188 #ifndef SOL_SOCKET 189 /* Level number for (get/set)sockopt() to apply to socket itself. */ 190 #define SOL_SOCKET 0xffff /* options for socket level */ 191 #endif 192 #define SOL_MONSOCKET 0xfffe /* MOS monitor socket level */ 193 194 /** 195 * MOS monitor socket option names (and values) 196 * This will contain options pertaining to monitor stream sockets 197 * 198 * MOS_FRAGINFO_CLIBUF : Gives back offsets to fragments of buffers 199 * (optname) currently stored in client's TCP ring buffer. 200 * (getsockopt) 201 * 202 * MOS_FRAGINFO_SVRBUF : Gives back offsets to fragments of buffers 203 * (optname) currently stored in server's TCP ring buffer. 204 * (getsockopt) 205 * 206 * MOS_INFO_CLIBUF : Gives back tcp info for client-side ring buffer. 207 * (optname) (getsockopt) 208 * 209 * MOS_INFO_SVRBUF : Gives back tcp info for server-side ring buffer. 210 * (optname) (getsockopt) 211 * 212 * MOS_TCP_STATE_CLI : Retrieves current TCP state for client side 213 * (optname) (getsockopt) 214 * 215 * MOS_TCP_STATE_SVR : Retrieves current TCP state for server side 216 * (optname) (getsockopt) 217 * 218 * MOS_TIMESTAMP : Retrieves timestamp of last packet seen for 219 * (optname) given flow. (in usecs) 220 * (getsockopt) 221 * 222 * MOS_SEQ_REMAP : Changes the sequence number change 223 * (optname) (setsockopt) 224 * 225 * MOS_STOP_MON : Stop monitoring 226 * (optname) (setsockopt) 227 */ 228 enum mos_socket_opts { 229 MOS_FRAGINFO_CLIBUF = 0x01, 230 MOS_FRAGINFO_SVRBUF = 0x02, 231 MOS_INFO_CLIBUF = 0x03, 232 MOS_INFO_SVRBUF = 0x04, 233 MOS_TCP_STATE_CLI = 0x05, 234 MOS_TCP_STATE_SVR = 0x06, 235 MOS_TIMESTAMP = 0x07, 236 MOS_MONLEVEL = 0x08, 237 MOS_CLIBUF = 0x09, 238 MOS_SVRBUF = 0x0a, 239 MOS_SEQ_REMAP = 0x0b, 240 MOS_STOP_MON = 0x0c, 241 MOS_FRAG_CLIBUF = 0x0d, 242 MOS_FRAG_SVRBUF = 0x0e, 243 MOS_CLIOVERLAP = 0x0f, 244 MOS_SVROVERLAP = 0x10, 245 #ifdef OLD_API 246 MOS_NO_CLIBUF = 0x0f, 247 MOS_NO_SVRBUF = 0x10, 248 #endif 249 }; 250 251 /** 252 * MOS tcp buf info structure. 253 * Used by the monitor application to retreive 254 * tcp_stream-related info. Usually called via 255 * getsockopt() function 256 */ 257 struct tcp_buf_info { 258 /** The initial TCP sequence number of TCP ring buffer. */ 259 uint32_t tcpbi_init_seq; 260 /** TCP sequence number of the 'last byte of payload that has 261 * already been read by the end application' (applies in the case 262 * of embedded monitor setup) 263 */ 264 uint32_t tcpbi_last_byte_read; 265 /** TCP sequence number of the 'last byte of the payload that 266 * is currently buffered and needs to be read by the end 267 * application' (applies in the case of embedded monitor setup). 268 * 269 * In case of standalone monitors, tcpbi_last_byte_read = 270 * tcpbi_next_byte_expected 271 */ 272 uint32_t tcpbi_next_byte_expected; 273 /** TCP sequence number of the 'last byte of the payload that 274 * is currently stored' in the TCP ring buffer. This value 275 * may be greater than tcpbi_next_byte_expected if packets 276 * arrive out of order. 277 */ 278 uint32_t tcpbi_last_byte_received; 279 }; 280 281 /** Structure to expose TCP ring buffer's fragment information. */ 282 struct tcp_ring_fragment { 283 uint64_t offset; 284 uint32_t len; 285 }; 286 287 /** 288 * mOS tcp stream states. 289 * used by the monitor application to retreive 290 * tcp_stream-state info. Usually called via 291 * getsockopt() function 292 */ 293 enum tcpstate 294 { 295 TCP_CLOSED = 0, 296 TCP_LISTEN = 1, 297 TCP_SYN_SENT = 2, 298 TCP_SYN_RCVD = 3, 299 TCP_ESTABLISHED = 4, 300 TCP_FIN_WAIT_1 = 5, 301 TCP_FIN_WAIT_2 = 6, 302 TCP_CLOSE_WAIT = 7, 303 TCP_CLOSING = 8, 304 TCP_LAST_ACK = 9, 305 TCP_TIME_WAIT = 10 306 }; 307 308 /** mOS segment overlapping policies */ 309 enum { 310 MOS_OVERLAP_POLICY_FIRST=0, 311 MOS_OVERLAP_POLICY_LAST, 312 MOS_OVERLAP_CNT 313 }; 314 315 /** Definition of event type */ 316 typedef uint64_t event_t; 317 318 /** Definition of monitor side */ 319 enum {MOS_SIDE_CLI=0, MOS_SIDE_SVR, MOS_SIDE_BOTH}; 320 321 /* mos callback/filter function type definition */ 322 /** Prototype of callback function */ 323 typedef void (*callback_t)(mctx_t mctx, int sock, int side, 324 event_t event, filter_arg_t *arg); 325 /** Prototype of UDE's filter function */ 326 typedef bool (*filter_t)(mctx_t mctx, int sock, int side, 327 event_t event, filter_arg_t *arg); 328 329 /*----------------------------------------------------------------------------*/ 330 /* Definition of monitor_filter type */ 331 union monitor_filter { 332 /** For MOS_SOCK_MONITOR_RAW type socket **/ 333 char *raw_pkt_filter; 334 /** For MOS_SOCK_MONITOR_STREAM type socket **/ 335 struct { 336 char *stream_syn_filter; 337 char *stream_orphan_filter; 338 }; 339 }; 340 typedef union monitor_filter *monitor_filter_t; 341 342 /* Assign an address range (specified by ft) to monitor via sock 343 * 344 * (1) If sock is MOS_SOCK_MONITOR_RAW type, ft.raw_pkt_filter is applied to 345 * every packet coming in. 346 * (2) If sock is MOS_SOCK_MONITOR_STREAM type, 347 * ft.stream_syn_filter is applied to the first SYN pkt of the flow. 348 * (The succeeding packets of that flow will bypass the filter operation.) 349 * ft.stream_orphan_filter is applied to the pkts that don't belong to any 350 * of the existing TCP streams which are being monitored. 351 * (e.g., non-SYN pkt with no identified flow) 352 * [*] ft.stream_syn_filter and ft.stream_orphan_filter should be consisted 353 * only of the following keywords: 354 * - 'tcp, 'host', 'src', 'dst', 'net', 'mask', 'port', 'portrange' 355 * - 'and', 'or', '&', '|' 356 * 357 * @param [in] mctx: mtcp context 358 * @param [in] sock: socket id (should be MOS_SOCK_MONITOR_RAW 359 * or MOS_SOCK_MONITOR_STREAM type) 360 * @param [in] cf: Describe a set of connections to accept 361 * in a BPF (Berkerley Packet Filter) format 362 * NULL if you want to monitor any packet 363 * @return zero on success, -1 on error 364 */ 365 int 366 mtcp_bind_monitor_filter(mctx_t mctx, int sock, monitor_filter_t ft); 367 /*----------------------------------------------------------------------------*/ 368 369 /** Register a callback function in hook_point 370 * @param [in] mctx: mtcp context 371 * @param [in] sock: socket id 372 * @param [in] event: event id 373 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_DONTCARE 374 * @param [in] cb: callback fucntion 375 * @return zero on success, -1 on error 376 * 377 * (both for packet-level and flow-level) for events in hook_point 378 */ 379 int 380 mtcp_register_callback(mctx_t mctx, int sock, event_t event, 381 int hook_point, callback_t cb); 382 383 /** Remove registered callback functions 384 * @param [in] mctx: mtcp context 385 * @param [in] sock: socket id 386 * @param [in] event: event id 387 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_NULL 388 * @return zero on success, -1 on error 389 * 390 * (both for packet-level and flow-level) for events in hook_point 391 */ 392 //int 393 //mtcp_unregister_callback(mctx_t mctx, int sock, event_t event, 394 // int hook_point); 395 396 /** Allocate a child event 397 * @param [in] event: event id 398 * @return new event id on success, 0 on error 399 */ 400 event_t 401 mtcp_alloc_event(event_t event); 402 403 /** Define a user-defined event function 404 * @param [in] event: event id 405 * @param [in] filter: filter fucntion for new event 406 * @param [in] arg: a filter argument to be delivered to the filter 407 * @return new event id on success, 0 on error 408 * 409 * (both for packet-level and flow-level) 410 */ 411 event_t 412 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg); 413 414 /** Raise a event 415 * @param [in] mctx: mtcp context 416 * @param [in] event: event id 417 * @return 0 on success, -1 on error 418 */ 419 int 420 mtcp_raise_event(mctx_t mctx, event_t event); 421 422 /* 423 * Callback only functions 424 */ 425 426 /** Set user-level context 427 * (e.g., to store any per-flow user-defined meatadata) 428 * @param [in] mctx: mtcp context 429 * @param [in] sock: the monitor socket id 430 * @param [in] uctx: user-level context 431 */ 432 void 433 mtcp_set_uctx(mctx_t mctx, int sock, void *uctx); 434 435 /** Get user-level context 436 * (e.g., to retrieve user-defined metadata stored in mtcp_set_uctx()) 437 * @param [in] mctx: mtcp context 438 * @param [in] sock: the monitor socket id 439 * @return user-level context for input flow_ocntext 440 */ 441 void * 442 mtcp_get_uctx(mctx_t mctx, int sock); 443 444 /** Peeking bytestream from flow_context 445 * @param [in] mctx: mtcp context 446 * @param [in] sock: monitoring stream socket id 447 * @param [in] side: side of monitoring (client side, server side or both) 448 * @param [in] buf: buffer for read byte stream 449 * @param [in] len: requested length 450 * 451 * It will return the number of bytes actually read. 452 * It will return -1 if there is an error 453 */ 454 ssize_t 455 mtcp_peek(mctx_t mctx, int sock, int side, 456 char *buf, size_t len); 457 458 /** 459 * The mtcp_ppeek() function reads up to count bytes from the TCP ring 460 * buffer of the monitor socket sock in mctx into buf, starting from 461 * the TCP sequence number seq_num. 462 * Note that seq_num can point the data in the fragmented buffer list 463 * of the TCP ring buffer. If there is no received byte with TCP sequence 464 * number seq_num in the TCP ring buffer, it returns error. If there are 465 * received bytes starting from seq_num, count is set to be the number 466 * of bytes read from the buffer. After mtcp_ppeek(), the data in the 467 * TCP ring buffer will not be flushed, and the monitor offset used by 468 * mtcp_peek() is not changed. 469 * 470 * @param [in] mctx: mtcp context 471 * @param [in] sock: monitoring stream socket id 472 * @param [in] side: side of monitoring (client side, server side or both) 473 * @param [in] buf: buffer for read byte stream 474 * @param [in] count: No. of bytes to be read 475 * @param [in] seq_num: byte offset of the TCP bytestream (absolute offset: offset 0 = init_seq_num) 476 * @return # of bytes actually read on success, -1 for error 477 */ 478 ssize_t mtcp_ppeek(mctx_t mctx, int sock, int side, 479 char *buf, size_t count, uint64_t off); 480 481 /* Use this macro to copy packets when mtcp_getlastpkt is called */ 482 #define MTCP_CB_GETCURPKT_CREATE_COPY 483 484 /** Get current packet of mtcp context 485 * @param [in] mctx: mTCP/mOS context 486 * @param [in] sock: monitoring stream socket id 487 * @param [in] side: side of monitoring 488 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 489 * @param [in] p: ptr to packet info ptr 490 * (only L2-L3 information is available for MOS_SOCK_MONITOR_RAW socket) 491 * @return 0 on success, -1 on failure 492 * This is useful for running callback-only applications 493 */ 494 int 495 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *p); 496 497 /** Register user's custom timer 498 * @param [in] mctx: mtcp context 499 * @param [in] id: timer id 500 * @param [in] timeout: timeout length 501 * @param [in] cb: callback function 502 */ 503 int 504 mtcp_settimer(mctx_t mctx, int id, struct timeval *timeout, callback_t cb); 505 506 /** A sibling function to mtcp_settimer that returns 507 * the current timestamp of the machine in microseconds. 508 * This avoids the monitor application to call current 509 * time getter functions (e.g. gettimeofday) that may 510 * incur overhead. 511 * 512 * @param [in] mctx: mtcp context 513 * Returns timestamp on success, 0 on failure. 514 */ 515 uint32_t 516 mtcp_cb_get_ts(mctx_t mctx); 517 518 /** Pause mtcp application context since it is not running anything 519 * @param [in] mctx: mtcp context 520 * 521 * This is useful for running callback-only applications 522 */ 523 void 524 mtcp_app_join(mctx_t mctx); 525 526 /** Get IP addrs/ports for both sides. 527 * (Server IP/port in 0th element) (Client IP/port in 1st element) 528 * Should only be called with MOS_SOCK_MONITOR_STREAM_ACTIVE socket 529 * _NOTE_: Code is currently not set for MOS_SOCK_STREAM!!! 530 * Returns 0 on success, -1 on failure 531 */ 532 int 533 mtcp_getpeername(mctx_t mctx, int sock, struct sockaddr *saddr, socklen_t *addrlen, int side); 534 535 /** 536 * Updates the Ethernet frame at a given offset across 537 * datalen bytes. 538 * 539 * @param [in] mctx: mtcp context 540 * @param [in] sock: monitoring socket 541 * @param [in] side: monitoring side 542 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 543 * @param [in] offset: the offset from where the data needs to be written 544 * @param [in] data: the data buffer that needs to be written 545 * @param [in] datalen: the length of data that needs to be written 546 * @param [in] option: disjunction of MOS_ETH_HDR, MOS_IP_HDR, MOS_TCP_HDR, 547 * MOS_TCP_PAYLOAD, MOS_DROP_PKT, MOS_UPDATE_TCP_CHKSUM, 548 * MOS_UPDATE_IP_CHKSUM 549 * @return Returns 0 on success, -1 on failure 550 * 551 * If you want to chomp/insert something in the payload: 552 * (i) first update the ip header to adjust iph->tot_len field; (MOS_OVERWRITE) 553 * (ii) then update the tcp payload accordingly (MOS_CHOMP or MOS_INSERT) 554 * 555 * MOS_DROP, MOS_OVERWRITE, MOS_CHOMP and MOS_INSERT are mutually 556 * exclusive operations 557 */ 558 int 559 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset, 560 byte *data, uint16_t datalen, int option); 561 562 /** Drop current packet (don't forward it to the peer node) 563 * @param [in] mctx: mtcp context 564 * 565 * This is useful for running callback-only applications 566 * This function is now deprecated... 567 */ 568 //int 569 //mtcp_cb_dropcurpkt(mctx_t mctx); 570 571 /* Reset the connection (send RST to both sides) 572 * (This API will be updated after discussion.) 573 */ 574 int 575 mtcp_reset_conn(mctx_t mctx, int sock); 576 577 int 578 mtcp_set_debug_string(mtcp_manager_t mtcp, const char *fmt, ...); 579 580 int 581 mtcp_get_debug_string(mctx_t mctx, char *buf, int len); 582 583 /**************************************************************************/ 584 /** Send a TCP packet of struct pkt_info 585 * @param [in] mctx: mTCP/mOS context 586 * @param [in] sock: monitoring stream socket id 587 * @param [in] pkt: ptr to packet info (e.g., captured by mtcp_getlastpkt) 588 * @return 0 on success, -1 on failure 589 * (NOTE: this function supports only TCP packet for now. 590 * we will add the support for any ethernet packets when required) 591 */ 592 int 593 mtcp_sendpkt(mctx_t mctx, int sock, const struct pkt_info *pkt); 594 595 /**************************************************************************/ 596 597 #endif /* __MOS_API_H_ */ 598