176404edcSAsim Jamshed #ifndef __TCP_STREAM_H_ 276404edcSAsim Jamshed #define __TCP_STREAM_H_ 376404edcSAsim Jamshed 476404edcSAsim Jamshed #ifdef DARWIN 576404edcSAsim Jamshed #include <netinet/tcp.h> 676404edcSAsim Jamshed #else 776404edcSAsim Jamshed #include <linux/tcp.h> 876404edcSAsim Jamshed #endif 976404edcSAsim Jamshed #include <netinet/ip.h> 1076404edcSAsim Jamshed #include <sys/queue.h> 1176404edcSAsim Jamshed 1276404edcSAsim Jamshed #include "mtcp.h" 1376404edcSAsim Jamshed #include "socket.h" 1476404edcSAsim Jamshed #include "memory_mgt.h" 1576404edcSAsim Jamshed #include "tcp_rb.h" 1676404edcSAsim Jamshed 1776404edcSAsim Jamshed #define TCP_MAX_SEQ 4294967295 1876404edcSAsim Jamshed 1976404edcSAsim Jamshed #define HAS_STREAM_TYPE(str, type) (str->stream_type & (1 << type)) 2076404edcSAsim Jamshed #define IS_STREAM_TYPE(str, type) (str->stream_type == (1 << type)) 2176404edcSAsim Jamshed #define STREAM_TYPE(type) (1 << type) 2276404edcSAsim Jamshed 2376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/ 2476404edcSAsim Jamshed /** 2576404edcSAsim Jamshed * routines for traversing stream + raw sockets 2676404edcSAsim Jamshed */ 2776404edcSAsim Jamshed struct sockent { 2876404edcSAsim Jamshed struct socket_map *sock; 2976404edcSAsim Jamshed TAILQ_ENTRY(sockent) link; 3076404edcSAsim Jamshed }; 3176404edcSAsim Jamshed #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent) 3276404edcSAsim Jamshed #define SOCKQ_INIT(head) TAILQ_INIT(head) 3376404edcSAsim Jamshed #define SOCKQ_INSERT_TAIL(head, socket) \ 3476404edcSAsim Jamshed do { \ 3576404edcSAsim Jamshed struct sockent *__s = \ 3676404edcSAsim Jamshed (struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \ 3776404edcSAsim Jamshed if (__s) { \ 3876404edcSAsim Jamshed __s->sock = (socket); \ 3976404edcSAsim Jamshed TAILQ_INSERT_TAIL(head, __s, link); \ 4076404edcSAsim Jamshed } \ 4176404edcSAsim Jamshed } while (0) 4276404edcSAsim Jamshed #define SOCKQ_REMOVE(head, socket) \ 4376404edcSAsim Jamshed do { \ 4476404edcSAsim Jamshed struct sockent *__walk, *__temp; \ 4576404edcSAsim Jamshed for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \ 4676404edcSAsim Jamshed __temp = TAILQ_NEXT(__walk, link); \ 4776404edcSAsim Jamshed if ((socket) == __walk->sock) break; \ 4876404edcSAsim Jamshed } \ 4976404edcSAsim Jamshed if (__walk) { \ 5076404edcSAsim Jamshed TAILQ_REMOVE(head, __walk, link); \ 5176404edcSAsim Jamshed MPFreeChunk(mtcp->sockent_pool, __walk); \ 5276404edcSAsim Jamshed } \ 5376404edcSAsim Jamshed } while (0) 5476404edcSAsim Jamshed #define SOCKQ_FOREACH_START(var, head) \ 5576404edcSAsim Jamshed do { \ 5676404edcSAsim Jamshed struct sockent *__walk, *__temp; \ 5776404edcSAsim Jamshed for (__walk = ((head)->tqh_first); \ 5876404edcSAsim Jamshed ((var) = __walk ? __walk->sock : NULL, __walk); \ 5976404edcSAsim Jamshed __walk = __temp) { \ 6076404edcSAsim Jamshed __temp = ((__walk)->link.tqe_next); 61*a14d6bd4SAsim Jamshed #define SOCKQ_FOREACH_REVERSE(var, head) \ 62*a14d6bd4SAsim Jamshed do { \ 63*a14d6bd4SAsim Jamshed struct sockent *__walk, *__temp; \ 64*a14d6bd4SAsim Jamshed for (__walk = (*(((struct mlist *)((head)->tqh_last))->tqh_last)); \ 65*a14d6bd4SAsim Jamshed ((var) = __walk ? __walk->sock : NULL, __walk); \ 66*a14d6bd4SAsim Jamshed __walk = __temp) { \ 67*a14d6bd4SAsim Jamshed __temp = (*(((struct mlist *)((__walk)->link.tqe_prev))->tqh_last)); 6876404edcSAsim Jamshed #define SOCKQ_FOREACH_END }} while (0) 6976404edcSAsim Jamshed /*----------------------------------------------------------------------------*/ 7076404edcSAsim Jamshed 7176404edcSAsim Jamshed struct rtm_stat 7276404edcSAsim Jamshed { 7376404edcSAsim Jamshed uint32_t tdp_ack_cnt; 7476404edcSAsim Jamshed uint32_t tdp_ack_bytes; 7576404edcSAsim Jamshed uint32_t ack_upd_cnt; 7676404edcSAsim Jamshed uint32_t ack_upd_bytes; 7776404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED 7876404edcSAsim Jamshed uint32_t sack_cnt; 7976404edcSAsim Jamshed uint32_t sack_bytes; 8076404edcSAsim Jamshed uint32_t tdp_sack_cnt; 8176404edcSAsim Jamshed uint32_t tdp_sack_bytes; 8276404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */ 8376404edcSAsim Jamshed uint32_t rto_cnt; 8476404edcSAsim Jamshed uint32_t rto_bytes; 8576404edcSAsim Jamshed }; 8676404edcSAsim Jamshed 8776404edcSAsim Jamshed typedef struct seq_remap_entry { 8876404edcSAsim Jamshed uint32_t seq_base; 8976404edcSAsim Jamshed uint32_t seq_off; 9076404edcSAsim Jamshed } seq_remap_entry; 9176404edcSAsim Jamshed 9276404edcSAsim Jamshed #define SRE_MAX 8 9376404edcSAsim Jamshed 9476404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED 9576404edcSAsim Jamshed struct sack_entry 9676404edcSAsim Jamshed { 9776404edcSAsim Jamshed uint32_t left_edge; 9876404edcSAsim Jamshed uint32_t right_edge; 9976404edcSAsim Jamshed uint32_t expire; 10076404edcSAsim Jamshed }; 10176404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */ 10276404edcSAsim Jamshed 10376404edcSAsim Jamshed struct tcp_recv_vars 10476404edcSAsim Jamshed { 10576404edcSAsim Jamshed /* receiver variables */ 10676404edcSAsim Jamshed uint32_t rcv_wnd; /* receive window (unscaled) */ 10776404edcSAsim Jamshed //uint32_t rcv_up; /* receive urgent pointer */ 10876404edcSAsim Jamshed uint32_t irs; /* initial receiving sequence */ 10976404edcSAsim Jamshed uint32_t snd_wl1; /* segment seq number for last window update */ 11076404edcSAsim Jamshed uint32_t snd_wl2; /* segment ack number for last window update */ 11176404edcSAsim Jamshed 11276404edcSAsim Jamshed /* variables for fast retransmission */ 11376404edcSAsim Jamshed uint32_t last_ack_seq; /* highest ackd seq */ 11476404edcSAsim Jamshed uint8_t dup_acks; /* number of duplicated acks */ 11576404edcSAsim Jamshed 11676404edcSAsim Jamshed /* timestamps */ 11776404edcSAsim Jamshed uint32_t ts_recent; /* recent peer timestamp */ 11876404edcSAsim Jamshed uint32_t ts_lastack_rcvd; /* last ack rcvd time */ 11976404edcSAsim Jamshed uint32_t ts_last_ts_upd; /* last peer ts update time */ 12076404edcSAsim Jamshed uint32_t ts_tw_expire; // timestamp for timewait expire 12176404edcSAsim Jamshed 12276404edcSAsim Jamshed /* RTT estimation variables */ 12376404edcSAsim Jamshed uint32_t srtt; /* smoothed round trip time << 3 (scaled) */ 12476404edcSAsim Jamshed uint32_t mdev; /* medium deviation */ 12576404edcSAsim Jamshed uint32_t mdev_max; /* maximal mdev ffor the last rtt period */ 12676404edcSAsim Jamshed uint32_t rttvar; /* smoothed mdev_max */ 12776404edcSAsim Jamshed uint32_t rtt_seq; /* sequence number to update rttvar */ 12876404edcSAsim Jamshed 12976404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED /* currently not used */ 13076404edcSAsim Jamshed #define MAX_SACK_ENTRY 8 13176404edcSAsim Jamshed struct sack_entry sack_table[MAX_SACK_ENTRY]; 13276404edcSAsim Jamshed uint8_t sacks:3; 13376404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */ 13476404edcSAsim Jamshed 13576404edcSAsim Jamshed tcprb_t *rcvbuf; 13676404edcSAsim Jamshed 13776404edcSAsim Jamshed #if USE_SPIN_LOCK 13876404edcSAsim Jamshed pthread_spinlock_t read_lock; 13976404edcSAsim Jamshed #else 14076404edcSAsim Jamshed pthread_mutex_t read_lock; 14176404edcSAsim Jamshed #endif 14276404edcSAsim Jamshed struct hash_bucket_head *he_mybucket; 14376404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) he_link; /* hash table entry link */ 14476404edcSAsim Jamshed }; 14576404edcSAsim Jamshed 14676404edcSAsim Jamshed struct tcp_send_vars 14776404edcSAsim Jamshed { 14876404edcSAsim Jamshed /* IP-level information */ 14976404edcSAsim Jamshed uint16_t ip_id; 15076404edcSAsim Jamshed 15176404edcSAsim Jamshed uint16_t mss; /* maximum segment size */ 15276404edcSAsim Jamshed uint16_t eff_mss; /* effective segment size (excluding tcp option) */ 15376404edcSAsim Jamshed 15476404edcSAsim Jamshed uint8_t wscale_mine; /* my window scale (advertising window) */ 15576404edcSAsim Jamshed uint8_t wscale_peer; /* peer's window scale (advertised window) */ 15676404edcSAsim Jamshed int8_t nif_out; /* cached output network interface */ 15776404edcSAsim Jamshed unsigned char *d_haddr; /* cached destination MAC address */ 15876404edcSAsim Jamshed 15976404edcSAsim Jamshed /* send sequence variables */ 16076404edcSAsim Jamshed uint32_t snd_una; /* send unacknoledged */ 16176404edcSAsim Jamshed uint32_t snd_wnd; /* send window (unscaled) */ 16276404edcSAsim Jamshed uint32_t peer_wnd; /* client window size */ 16376404edcSAsim Jamshed //uint32_t snd_up; /* send urgent pointer (not used) */ 16476404edcSAsim Jamshed uint32_t iss; /* initial sending sequence */ 16576404edcSAsim Jamshed uint32_t fss; /* final sending sequence */ 16676404edcSAsim Jamshed 16776404edcSAsim Jamshed /* retransmission timeout variables */ 16876404edcSAsim Jamshed uint8_t nrtx; /* number of retransmission */ 16976404edcSAsim Jamshed uint8_t max_nrtx; /* max number of retransmission */ 17076404edcSAsim Jamshed uint32_t rto; /* retransmission timeout */ 17176404edcSAsim Jamshed uint32_t ts_rto; /* timestamp for retransmission timeout */ 17276404edcSAsim Jamshed 17376404edcSAsim Jamshed /* congestion control variables */ 17476404edcSAsim Jamshed uint32_t cwnd; /* congestion window */ 17576404edcSAsim Jamshed uint32_t ssthresh; /* slow start threshold */ 17676404edcSAsim Jamshed 17776404edcSAsim Jamshed /* timestamp */ 17876404edcSAsim Jamshed uint32_t ts_lastack_sent; /* last ack sent time */ 17976404edcSAsim Jamshed 18076404edcSAsim Jamshed uint8_t is_wack:1, /* is ack for window adertisement? */ 18176404edcSAsim Jamshed ack_cnt:6; /* number of acks to send. max 64 */ 18276404edcSAsim Jamshed 18376404edcSAsim Jamshed uint8_t on_control_list; 18476404edcSAsim Jamshed uint8_t on_send_list; 18576404edcSAsim Jamshed uint8_t on_ack_list; 18676404edcSAsim Jamshed uint8_t on_sendq; 18776404edcSAsim Jamshed uint8_t on_ackq; 18876404edcSAsim Jamshed uint8_t on_closeq; 18976404edcSAsim Jamshed uint8_t on_resetq; 19076404edcSAsim Jamshed 19176404edcSAsim Jamshed uint8_t on_closeq_int:1, 19276404edcSAsim Jamshed on_resetq_int:1, 19376404edcSAsim Jamshed is_fin_sent:1, 19476404edcSAsim Jamshed is_fin_ackd:1; 19576404edcSAsim Jamshed 19676404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) control_link; 19776404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) send_link; 19876404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) ack_link; 19976404edcSAsim Jamshed 20076404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) timer_link; /* timer link (rto list, tw list) */ 20176404edcSAsim Jamshed TAILQ_ENTRY(tcp_stream) timeout_link; /* connection timeout link */ 20276404edcSAsim Jamshed 20376404edcSAsim Jamshed struct tcp_send_buffer *sndbuf; 20476404edcSAsim Jamshed struct seq_remap_entry sre[SRE_MAX]; /* seq # translation table */ 20576404edcSAsim Jamshed uint8_t sre_index; /* seq # translation index */ 20676404edcSAsim Jamshed 20776404edcSAsim Jamshed #if USE_SPIN_LOCK 20876404edcSAsim Jamshed pthread_spinlock_t write_lock; 20976404edcSAsim Jamshed #else 21076404edcSAsim Jamshed pthread_mutex_t write_lock; 21176404edcSAsim Jamshed #endif 21276404edcSAsim Jamshed 21376404edcSAsim Jamshed #if RTM_STAT 21476404edcSAsim Jamshed struct rtm_stat rstat; /* retransmission statistics */ 21576404edcSAsim Jamshed #endif 21676404edcSAsim Jamshed }; 21776404edcSAsim Jamshed 21876404edcSAsim Jamshed typedef struct tcp_stream 21976404edcSAsim Jamshed { 22076404edcSAsim Jamshed /* 22176404edcSAsim Jamshed * This is a direct replacement for fctx... 22276404edcSAsim Jamshed * However this could be replaced by some 22376404edcSAsim Jamshed * more elaborate data structure that supports 22476404edcSAsim Jamshed * multiple monitors in the future... 22576404edcSAsim Jamshed * 22676404edcSAsim Jamshed * In case no monitor is attached, msock will be 22776404edcSAsim Jamshed * NULL. 22876404edcSAsim Jamshed * 22976404edcSAsim Jamshed * Support for standalone monitors will be patched 23076404edcSAsim Jamshed * in future revisions... 23176404edcSAsim Jamshed */ 23276404edcSAsim Jamshed 233*a14d6bd4SAsim Jamshed SOCKQ_HEAD(mlist) msocks; /* in case monitoring is enabled */ 23476404edcSAsim Jamshed socket_map_t socket; /* relating to MOS_SOCK_STREAM */ 23576404edcSAsim Jamshed 23676404edcSAsim Jamshed uint32_t id; 23776404edcSAsim Jamshed uint32_t stream_type; /* to identify sock_stream/mon_stream */ 23876404edcSAsim Jamshed 23976404edcSAsim Jamshed uint32_t saddr; /* in network order */ 24076404edcSAsim Jamshed uint32_t daddr; /* in network order */ 24176404edcSAsim Jamshed uint16_t sport; /* in network order */ 24276404edcSAsim Jamshed uint16_t dport; /* in network order */ 24376404edcSAsim Jamshed 24476404edcSAsim Jamshed uint32_t actions; 24576404edcSAsim Jamshed uint64_t cb_events; 24676404edcSAsim Jamshed 24776404edcSAsim Jamshed uint8_t state; /* tcp state */ 24876404edcSAsim Jamshed uint8_t close_reason; /* close reason */ 24976404edcSAsim Jamshed uint8_t on_hash_table; 25076404edcSAsim Jamshed uint8_t on_timewait_list; 25176404edcSAsim Jamshed uint8_t ht_idx; 25276404edcSAsim Jamshed uint8_t closed; 25376404edcSAsim Jamshed uint8_t is_bound_addr; 25476404edcSAsim Jamshed uint8_t need_wnd_adv; 25576404edcSAsim Jamshed int16_t on_rto_idx; 25676404edcSAsim Jamshed 25776404edcSAsim Jamshed uint16_t on_timeout_list:1, 25876404edcSAsim Jamshed on_rcv_br_list:1, 25976404edcSAsim Jamshed on_snd_br_list:1, 26076404edcSAsim Jamshed saw_timestamp:1, /* whether peer sends timestamp */ 26176404edcSAsim Jamshed sack_permit:1, /* whether peer permits SACK */ 26276404edcSAsim Jamshed control_list_waiting:1, 26376404edcSAsim Jamshed have_reset:1, 26476404edcSAsim Jamshed side:2, 26576404edcSAsim Jamshed buffer_mgmt:2, 26676404edcSAsim Jamshed status_mgmt:1, 26776404edcSAsim Jamshed allow_pkt_modification:1; 26876404edcSAsim Jamshed 26976404edcSAsim Jamshed uint32_t snd_nxt; /* send next */ 27076404edcSAsim Jamshed uint32_t rcv_nxt; /* receive next */ 27176404edcSAsim Jamshed 27276404edcSAsim Jamshed struct tcp_recv_vars *rcvvar; 27376404edcSAsim Jamshed struct tcp_send_vars *sndvar; 27476404edcSAsim Jamshed 27576404edcSAsim Jamshed uint32_t last_active_ts; /* ts_last_ack_sent or ts_last_ts_upd */ 27676404edcSAsim Jamshed 27776404edcSAsim Jamshed struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */ 278*a14d6bd4SAsim Jamshed #ifdef RECORDPKT_PER_STREAM 27976404edcSAsim Jamshed struct pkt_ctx last_pctx; 28076404edcSAsim Jamshed unsigned char last_pkt_data[ETHERNET_FRAME_LEN]; 281*a14d6bd4SAsim Jamshed #endif 28276404edcSAsim Jamshed 28376404edcSAsim Jamshed } tcp_stream; 28476404edcSAsim Jamshed 28576404edcSAsim Jamshed extern inline char * 28676404edcSAsim Jamshed TCPStateToString(const tcp_stream *cur_stream); 28776404edcSAsim Jamshed 28876404edcSAsim Jamshed extern inline int 28976404edcSAsim Jamshed AddEpollEvent(struct mtcp_epoll *ep, 29076404edcSAsim Jamshed int queue_type, socket_map_t socket, uint32_t event); 29176404edcSAsim Jamshed 29276404edcSAsim Jamshed extern inline void 29376404edcSAsim Jamshed RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream); 29476404edcSAsim Jamshed 29576404edcSAsim Jamshed extern inline void 29676404edcSAsim Jamshed RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream); 29776404edcSAsim Jamshed 29876404edcSAsim Jamshed extern inline void 29976404edcSAsim Jamshed RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream); 30076404edcSAsim Jamshed 30176404edcSAsim Jamshed extern inline int 30276404edcSAsim Jamshed RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream); 30376404edcSAsim Jamshed 30476404edcSAsim Jamshed tcp_stream * 30576404edcSAsim Jamshed CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 30676404edcSAsim Jamshed uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, 30776404edcSAsim Jamshed unsigned int *hash); 30876404edcSAsim Jamshed 30976404edcSAsim Jamshed extern inline tcp_stream * 31076404edcSAsim Jamshed CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr, 31176404edcSAsim Jamshed uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 31276404edcSAsim Jamshed 31376404edcSAsim Jamshed extern inline tcp_stream * 31476404edcSAsim Jamshed CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 31576404edcSAsim Jamshed uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 31676404edcSAsim Jamshed 31776404edcSAsim Jamshed extern inline tcp_stream * 31876404edcSAsim Jamshed AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type, 31976404edcSAsim Jamshed uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport); 32076404edcSAsim Jamshed 32176404edcSAsim Jamshed void 32276404edcSAsim Jamshed DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream); 32376404edcSAsim Jamshed 32476404edcSAsim Jamshed void 32576404edcSAsim Jamshed DumpStream(mtcp_manager_t mtcp, tcp_stream *stream); 32676404edcSAsim Jamshed 32776404edcSAsim Jamshed int 32876404edcSAsim Jamshed GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 32976404edcSAsim Jamshed 33076404edcSAsim Jamshed int 33176404edcSAsim Jamshed GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 33276404edcSAsim Jamshed 33376404edcSAsim Jamshed int 33476404edcSAsim Jamshed GetTCPState(struct tcp_stream *stream, int side, 33576404edcSAsim Jamshed void *optval, socklen_t *optlen); 33676404edcSAsim Jamshed 33776404edcSAsim Jamshed int 33876404edcSAsim Jamshed DisableBuf(socket_map_t sock, int side); 33976404edcSAsim Jamshed 34076404edcSAsim Jamshed int 34176404edcSAsim Jamshed GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz); 34276404edcSAsim Jamshed 34376404edcSAsim Jamshed void 34476404edcSAsim Jamshed posix_seq_srand(unsigned seed); 34576404edcSAsim Jamshed 34676404edcSAsim Jamshed #endif /* __TCP_STREAM_H_ */ 347