1 #ifndef __MOS_API_H_ 2 #define __MOS_API_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #include <netinet/if_ether.h> 7 #else 8 #include <linux/tcp.h> 9 #include <linux/if_ether.h> 10 #endif 11 #include <netinet/in.h> 12 #include <arpa/inet.h> 13 #include <netinet/ip.h> 14 #include <stddef.h> /* for offsetof */ 15 #include "mtcp_epoll.h" 16 #include <stdbool.h> 17 18 #ifndef __MTCP_MANAGER 19 #define __MTCP_MANAGER 20 typedef struct mtcp_manager * mtcp_manager_t; 21 #endif 22 #ifndef __SOCKET_MAP 23 #define __SOCKET_MAP 24 typedef struct socket_map * socket_map_t; 25 #endif 26 27 /** Available hooking points */ 28 enum mtcp_hook_point 29 { 30 /* NOTE: The value of hooking points should not overlap with any of 31 * mos_event_types */ 32 33 /** Very first hooking point of incoming packet even before flow 34 * identification*/ 35 MOS_NULL = (1 << 29), 36 /** Hooking point before TCP receiver */ 37 MOS_HK_RCV = (1 << 30), 38 /** Hooking point after TCP sender */ 39 MOS_HK_SND = (1 << 31), 40 }; 41 42 /** Built-in events provided by mOS */ 43 enum mos_event_type 44 { 45 /** invalid event */ 46 MOS_NULL_EVENT = (0), 47 /* mos-defined tcp build-in events */ 48 /** A packet is coming in. */ 49 MOS_ON_PKT_IN = (0x1<<0), 50 /** A packet is going out. */ 51 /* THIS EVENT IS NOW DEPRECATED (USED ONLY FOR DEBUGGING) */ 52 MOS_ON_PKT_OUT = (0x1<<1), 53 /** SYN packet as seen by the monitor 54 * client side: activated when the client state is set to SYN_SENT 55 * server side: activated when the server state is set to SYN_RCVD 56 * 57 * Retransmitted SYN packets don't activate this event. 58 */ 59 MOS_ON_CONN_START = (0x1<<2), 60 /** 3-way handshake is finished. 61 * server side: ACK is coming in as a response of SYNACK. 62 * client side: SYNACK is coming in as a response of SYN. */ 63 /* THIS EVENT IS NOW DEPRECATED */ 64 MOS_ON_CONN_SETUP = (0x1<<3), 65 /** New data is now readable. 66 * This event is available in only MOS_NULL hook point. 67 * mOS raises this event only once while batched packet processing. */ 68 MOS_ON_CONN_NEW_DATA = (0x1<<4), 69 /** Abnormal behavior is detected. 70 * NOTE: This is not fully implemented yet. */ 71 MOS_ON_ERROR = (0x1<<5), 72 /** No packet is seen for a long time. 73 * This is implemented as mtcp_cb_settimer() 74 */ 75 MOS_ON_TIMEOUT = (0x1<<6), 76 /** TCP state is being changed. */ 77 MOS_ON_TCP_STATE_CHANGE = (0x1<<7), 78 /** A packet is not SYN and has no identified flow. */ 79 MOS_ON_ORPHAN = (0x1<<8), 80 /** Retransmission is detected */ 81 MOS_ON_REXMIT = (0x1<<9), 82 /** A flow is about to be destroyed. 83 * 4-way handshake, RST packet or timeout could be the reason. 84 * NOTE: In current implementation, mOS raises this event while destroying 85 * `struct tcp_stream`. There is possibility of false-positive especially 86 * when mOS is running out of memory. */ 87 MOS_ON_CONN_END = (0x1<<10), 88 89 /** This event is for debugging. We can easily mute this later. */ 90 MOS_ON_DEBUG_MESSAGE = (0x1<<11), 91 }; 92 93 #if 0 94 /* This may go away in future revisions */ 95 typedef union event_data { 96 uint32_t u32; 97 uint64_t u64; 98 void *ptr; 99 } event_data_t; 100 #endif 101 102 /* Macros for updating packet context */ 103 #define MOS_ETH_HDR (1 << 0) 104 #define MOS_IP_HDR (1 << 1) 105 #define MOS_TCP_HDR (1 << 2) 106 #define MOS_TCP_PAYLOAD (1 << 3) 107 #define MOS_UPDATE_IP_CHKSUM (1 << 4) 108 #define MOS_UPDATE_TCP_CHKSUM (1 << 5) 109 #define MOS_DROP (1 << 6) 110 #define MOS_OVERWRITE (1 << 7) 111 #define MOS_CHOMP (1 << 8) 112 #define MOS_INSERT (1 << 9) 113 114 /** 115 * struct pkt_info is the struct that is actually 116 * exposed to the monitor application. 117 * 118 * NOTE: When you retrieve the packet information using mtcp_getlastpkt() 119 * via MOS_SOCK_MONITOR_RAW socket, you can only use up to L3 information. 120 * (cur_ts, eth_len, ip_len, ethh, iph) 121 */ 122 struct pkt_info { 123 uint32_t cur_ts; /**< packet receiving time (read-only:ro) */ 124 int8_t in_ifidx; /**< input interface (ro) */ 125 126 /* ETH */ 127 uint16_t eth_len; 128 129 /* IP */ 130 uint16_t ip_len; 131 132 /* TCP */ 133 uint64_t offset; /**< TCP ring buffer offset */ 134 uint16_t payloadlen; 135 uint32_t seq; 136 uint32_t ack_seq; 137 uint16_t window; 138 139 /* ~~ 28 byte boundary ~~ */ 140 141 /* 142 * CAUTION!!! 143 * It is extremely critical that the last 5 fields (ethh .. frame) 144 * are always placed at the end of the definition. MOS relies on 145 * this specific arrangement when it is creating a new instantiation 146 * of pctx during mtcp_getlastpkt() invocation. 147 */ 148 struct ethhdr *ethh; 149 struct iphdr *iph; 150 struct tcphdr *tcph; 151 uint8_t *payload; 152 }; 153 154 /** 155 * PACKET CONTEXT is the packet structure that goes through 156 * the mOS core... 157 */ 158 struct pkt_ctx { 159 struct pkt_info p; 160 161 int8_t direction; /**< where does this packet originate from? (ro)*/ 162 uint8_t forward; /**< 0: drop, 1: forward to out_ifidx (rw) */ 163 int8_t out_ifidx; /**< output interface (rw) */ 164 int8_t batch_index; /**< index of packet in the rx batch */ 165 /* ~~ 64 byte boundary ~~ */ 166 }; 167 #define PKT_INFO_LEN offsetof(struct pkt_info, ethh) 168 169 /* 170 * Sequence number change structure. 171 * Used for MOS_SEQ_REMAP. 172 */ 173 typedef struct { 174 int64_t seq_off; /* the amount of sequence number drift */ 175 int side; /* which side does this sequence number change apply to? */ 176 uint32_t base_seq; /* seq # of the flow where the actual sequence # translation starts */ 177 } seq_remap_info; 178 179 typedef struct filter_arg { 180 void *arg; 181 size_t len; 182 } filter_arg_t; 183 184 /** 185 * The available level number in the POSIX library for sockets is 186 * on SOL_SOCKET 187 */ 188 #ifndef SOL_SOCKET 189 /* Level number for (get/set)sockopt() to apply to socket itself. */ 190 #define SOL_SOCKET 0xffff /* options for socket level */ 191 #endif 192 #define SOL_MONSOCKET 0xfffe /* MOS monitor socket level */ 193 194 /** 195 * MOS monitor socket option names (and values) 196 * This will contain options pertaining to monitor stream sockets 197 * See mtcp_getsockopt() and mtcp_setsockopt() the mtcp_api.h file. 198 */ 199 enum mos_socket_opts { 200 MOS_FRAGINFO_CLIBUF = 0x01, 201 MOS_FRAGINFO_SVRBUF = 0x02, 202 MOS_INFO_CLIBUF = 0x03, 203 MOS_INFO_SVRBUF = 0x04, 204 MOS_TCP_STATE_CLI = 0x05, 205 MOS_TCP_STATE_SVR = 0x06, 206 MOS_CLIBUF = 0x09, 207 MOS_SVRBUF = 0x0a, 208 MOS_STOP_MON = 0x0c, 209 MOS_CLIOVERLAP = 0x0f, 210 MOS_SVROVERLAP = 0x10, 211 212 MOS_TIMESTAMP = 0x07, /* supressed (not used) */ 213 MOS_SEQ_REMAP = 0x0b, /* supressed (not used) */ 214 MOS_FRAG_CLIBUF = 0x0d, /* supressed (not used) */ 215 MOS_FRAG_SVRBUF = 0x0e, /* supressed (not used) */ 216 217 }; 218 219 /** 220 * MOS tcp buf info structure. 221 * Used by the monitor application to retreive 222 * tcp_stream-related info. Usually called via 223 * mtcp_getsockopt() function 224 */ 225 struct tcp_buf_info { 226 /** The initial TCP sequence number of TCP ring buffer. */ 227 uint32_t tcpbi_init_seq; 228 /** TCP sequence number of the 'last byte of payload that has 229 * already been read by the end application' (applies in the case 230 * of embedded monitor setup) 231 */ 232 uint32_t tcpbi_last_byte_read; 233 /** TCP sequence number of the 'last byte of the payload that 234 * is currently buffered and needs to be read by the end 235 * application' (applies in the case of embedded monitor setup). 236 * 237 * In case of standalone monitors, tcpbi_last_byte_read = 238 * tcpbi_next_byte_expected 239 */ 240 uint32_t tcpbi_next_byte_expected; 241 /** TCP sequence number of the 'last byte of the payload that 242 * is currently stored' in the TCP ring buffer. This value 243 * may be greater than tcpbi_next_byte_expected if packets 244 * arrive out of order. 245 */ 246 uint32_t tcpbi_last_byte_received; 247 }; 248 249 /** Structure to expose TCP ring buffer's fragment information. */ 250 struct tcp_ring_fragment { 251 uint64_t offset; 252 uint32_t len; 253 }; 254 255 /** 256 * mOS tcp stream states. 257 * used by the monitor application to retreive 258 * tcp_stream-state info. Usually called via 259 * getsockopt() function 260 */ 261 enum tcpstate 262 { 263 TCP_CLOSED = 0, 264 TCP_LISTEN = 1, 265 TCP_SYN_SENT = 2, 266 TCP_SYN_RCVD = 3, 267 TCP_ESTABLISHED = 4, 268 TCP_FIN_WAIT_1 = 5, 269 TCP_FIN_WAIT_2 = 6, 270 TCP_CLOSE_WAIT = 7, 271 TCP_CLOSING = 8, 272 TCP_LAST_ACK = 9, 273 TCP_TIME_WAIT = 10 274 }; 275 276 /** mOS segment overlapping policies */ 277 enum { 278 MOS_OVERLAP_POLICY_FIRST=0, 279 MOS_OVERLAP_POLICY_LAST, 280 MOS_OVERLAP_CNT 281 }; 282 283 /** Definition of event type */ 284 typedef uint64_t event_t; 285 286 /** Definition of monitor side */ 287 enum {MOS_SIDE_CLI=0, MOS_SIDE_SVR, MOS_SIDE_BOTH}; 288 289 /* mos callback/filter function type definition */ 290 /** Prototype of callback function */ 291 typedef void (*callback_t)(mctx_t mctx, int sock, int side, 292 event_t event, filter_arg_t *arg); 293 /** Prototype of UDE's filter function */ 294 typedef bool (*filter_t)(mctx_t mctx, int sock, int side, 295 event_t event, filter_arg_t *arg); 296 297 /*----------------------------------------------------------------------------*/ 298 /* Definition of monitor_filter type */ 299 union monitor_filter { 300 /** For MOS_SOCK_MONITOR_RAW type socket **/ 301 char *raw_pkt_filter; 302 /** For MOS_SOCK_MONITOR_STREAM type socket **/ 303 struct { 304 char *stream_syn_filter; 305 char *stream_orphan_filter; 306 }; 307 }; 308 typedef union monitor_filter *monitor_filter_t; 309 310 /* Assign an address range (specified by ft) to monitor via sock 311 * 312 * (1) If sock is MOS_SOCK_MONITOR_RAW type, ft.raw_pkt_filter is applied to 313 * every packet coming in. 314 * (2) If sock is MOS_SOCK_MONITOR_STREAM type, 315 * ft.stream_syn_filter is applied to the first SYN pkt of the flow. 316 * (The succeeding packets of that flow will bypass the filter operation.) 317 * ft.stream_orphan_filter is applied to the pkts that don't belong to any 318 * of the existing TCP streams which are being monitored. 319 * (e.g., non-SYN pkt with no identified flow) 320 * [*] ft.stream_syn_filter and ft.stream_orphan_filter should be consisted 321 * only of the following keywords: 322 * - 'tcp, 'host', 'src', 'dst', 'net', 'mask', 'port', 'portrange' 323 * - 'and', 'or', '&', '|' 324 * 325 * @param [in] mctx: mtcp context 326 * @param [in] sock: socket id (should be MOS_SOCK_MONITOR_RAW 327 * or MOS_SOCK_MONITOR_STREAM type) 328 * @param [in] cf: Describe a set of connections to accept 329 * in a BPF (Berkerley Packet Filter) format 330 * NULL if you want to monitor any packet 331 * @return zero on success, -1 on error 332 */ 333 int 334 mtcp_bind_monitor_filter(mctx_t mctx, int sock, monitor_filter_t ft); 335 /*----------------------------------------------------------------------------*/ 336 337 /** Register a callback function in hook_point 338 * @param [in] mctx: mtcp context 339 * @param [in] sock: socket id 340 * @param [in] event: event id 341 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_DONTCARE 342 * @param [in] cb: callback fucntion 343 * @return zero on success, -1 on error 344 * 345 * (both for packet-level and flow-level) for events in hook_point 346 */ 347 int 348 mtcp_register_callback(mctx_t mctx, int sock, event_t event, 349 int hook_point, callback_t cb); 350 351 /** Remove registered callback functions 352 * @param [in] mctx: mtcp context 353 * @param [in] sock: socket id 354 * @param [in] event: event id 355 * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_NULL 356 * @return zero on success, -1 on error 357 * 358 * (both for packet-level and flow-level) for events in hook_point 359 */ 360 //int 361 //mtcp_unregister_callback(mctx_t mctx, int sock, event_t event, 362 // int hook_point); 363 364 /** Allocate a child event 365 * @param [in] event: event id 366 * @return new event id on success, 0 on error 367 */ 368 event_t 369 mtcp_alloc_event(event_t event); 370 371 /** Define a user-defined event function 372 * @param [in] event: event id 373 * @param [in] filter: filter fucntion for new event 374 * @param [in] arg: a filter argument to be delivered to the filter 375 * @return new event id on success, 0 on error 376 * 377 * (both for packet-level and flow-level) 378 */ 379 event_t 380 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg); 381 382 /** Raise a event 383 * @param [in] mctx: mtcp context 384 * @param [in] event: event id 385 * @return 0 on success, -1 on error 386 */ 387 int 388 mtcp_raise_event(mctx_t mctx, event_t event); 389 390 /* 391 * Callback only functions 392 */ 393 394 /** Set user-level context 395 * (e.g., to store any per-flow user-defined meatadata) 396 * @param [in] mctx: mtcp context 397 * @param [in] sock: the monitor socket id 398 * @param [in] uctx: user-level context 399 */ 400 void 401 mtcp_set_uctx(mctx_t mctx, int sock, void *uctx); 402 403 /** Get user-level context 404 * (e.g., to retrieve user-defined metadata stored in mtcp_set_uctx()) 405 * @param [in] mctx: mtcp context 406 * @param [in] sock: the monitor socket id 407 * @return user-level context for input flow_ocntext 408 */ 409 void * 410 mtcp_get_uctx(mctx_t mctx, int sock); 411 412 /** Peeking bytestream from flow_context 413 * @param [in] mctx: mtcp context 414 * @param [in] sock: monitoring stream socket id 415 * @param [in] side: side of monitoring (client side, server side or both) 416 * @param [in] buf: buffer for read byte stream 417 * @param [in] len: requested length 418 * 419 * It will return the number of bytes actually read. 420 * It will return -1 if there is an error 421 */ 422 ssize_t 423 mtcp_peek(mctx_t mctx, int sock, int side, 424 char *buf, size_t len); 425 426 /** 427 * The mtcp_ppeek() function reads up to count bytes from the TCP ring 428 * buffer of the monitor socket sock in mctx into buf, starting from 429 * the TCP sequence number seq_num. 430 * Note that seq_num can point the data in the fragmented buffer list 431 * of the TCP ring buffer. If there is no received byte with TCP sequence 432 * number seq_num in the TCP ring buffer, it returns error. If there are 433 * received bytes starting from seq_num, count is set to be the number 434 * of bytes read from the buffer. After mtcp_ppeek(), the data in the 435 * TCP ring buffer will not be flushed, and the monitor offset used by 436 * mtcp_peek() is not changed. 437 * 438 * @param [in] mctx: mtcp context 439 * @param [in] sock: monitoring stream socket id 440 * @param [in] side: side of monitoring (client side, server side or both) 441 * @param [in] buf: buffer for read byte stream 442 * @param [in] count: No. of bytes to be read 443 * @param [in] seq_num: byte offset of the TCP bytestream (absolute offset: offset 0 = init_seq_num) 444 * @return # of bytes actually read on success, -1 for error 445 */ 446 ssize_t mtcp_ppeek(mctx_t mctx, int sock, int side, 447 char *buf, size_t count, uint64_t off); 448 449 /* Use this macro to copy packets when mtcp_getlastpkt is called */ 450 #define MTCP_CB_GETCURPKT_CREATE_COPY 451 452 /** Get current packet of mtcp context 453 * @param [in] mctx: mTCP/mOS context 454 * @param [in] sock: monitoring stream socket id 455 * @param [in] side: side of monitoring 456 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 457 * @param [in] p: ptr to packet info ptr 458 * (only L2-L3 information is available for MOS_SOCK_MONITOR_RAW socket) 459 * @return 0 on success, -1 on failure 460 * This is useful for running callback-only applications 461 */ 462 int 463 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *p); 464 465 /** Register user's custom timer 466 * @param [in] mctx: mtcp context 467 * @param [in] id: timer id 468 * @param [in] timeout: timeout length 469 * @param [in] cb: callback function 470 */ 471 int 472 mtcp_settimer(mctx_t mctx, int id, struct timeval *timeout, callback_t cb); 473 474 /** A sibling function to mtcp_settimer that returns 475 * the current timestamp of the machine in microseconds. 476 * This avoids the monitor application to call current 477 * time getter functions (e.g. gettimeofday) that may 478 * incur overhead. 479 * 480 * @param [in] mctx: mtcp context 481 * Returns timestamp on success, 0 on failure. 482 */ 483 uint32_t 484 mtcp_cb_get_ts(mctx_t mctx); 485 486 /** Pause mtcp application context since it is not running anything 487 * @param [in] mctx: mtcp context 488 * 489 * This is useful for running callback-only applications 490 */ 491 void 492 mtcp_app_join(mctx_t mctx); 493 494 /** Get IP addrs/ports for both sides. 495 * (Server IP/port in 0th element) (Client IP/port in 1st element) 496 * Should only be called with MOS_SOCK_MONITOR_STREAM_ACTIVE socket 497 * _NOTE_: Code is currently not set for MOS_SOCK_STREAM!!! 498 * Returns 0 on success, -1 on failure 499 */ 500 int 501 mtcp_getpeername(mctx_t mctx, int sock, struct sockaddr *saddr, socklen_t *addrlen, int side); 502 503 /** 504 * Updates the Ethernet frame at a given offset across 505 * datalen bytes. 506 * 507 * @param [in] mctx: mtcp context 508 * @param [in] sock: monitoring socket 509 * @param [in] side: monitoring side 510 * (MOS_NULL for MOS_SOCK_MONITOR_RAW socket) 511 * @param [in] offset: the offset from where the data needs to be written 512 * @param [in] data: the data buffer that needs to be written 513 * @param [in] datalen: the length of data that needs to be written 514 * @param [in] option: disjunction of MOS_ETH_HDR, MOS_IP_HDR, MOS_TCP_HDR, 515 * MOS_TCP_PAYLOAD, MOS_DROP_PKT, MOS_UPDATE_TCP_CHKSUM, 516 * MOS_UPDATE_IP_CHKSUM 517 * @return Returns 0 on success, -1 on failure 518 * 519 * If you want to chomp/insert something in the payload: 520 * (i) first update the ip header to adjust iph->tot_len field; (MOS_OVERWRITE) 521 * (ii) then update the tcp payload accordingly (MOS_CHOMP or MOS_INSERT) 522 * 523 * MOS_DROP, MOS_OVERWRITE, MOS_CHOMP and MOS_INSERT are mutually 524 * exclusive operations 525 */ 526 int 527 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset, 528 byte *data, uint16_t datalen, int option); 529 530 /** Drop current packet (don't forward it to the peer node) 531 * @param [in] mctx: mtcp context 532 * 533 * This is useful for running callback-only applications 534 * This function is now deprecated... 535 */ 536 //int 537 //mtcp_cb_dropcurpkt(mctx_t mctx); 538 539 /* Reset the connection (send RST to both sides) 540 * (This API will be updated after discussion.) 541 */ 542 int 543 mtcp_reset_conn(mctx_t mctx, int sock); 544 545 int 546 mtcp_set_debug_string(mtcp_manager_t mtcp, const char *fmt, ...); 547 548 int 549 mtcp_get_debug_string(mctx_t mctx, char *buf, int len); 550 551 /**************************************************************************/ 552 /** Send a TCP packet of struct pkt_info 553 * @param [in] mctx: mTCP/mOS context 554 * @param [in] sock: monitoring stream socket id 555 * @param [in] pkt: ptr to packet info (e.g., captured by mtcp_getlastpkt) 556 * @return 0 on success, -1 on failure 557 * (NOTE: this function supports only TCP packet for now. 558 * we will add the support for any ethernet packets when required) 559 */ 560 int 561 mtcp_sendpkt(mctx_t mctx, int sock, const struct pkt_info *pkt); 562 563 /**************************************************************************/ 564 565 #endif /* __MOS_API_H_ */ 566