176404edcSAsim Jamshed #ifndef __TCP_STREAM_H_
276404edcSAsim Jamshed #define __TCP_STREAM_H_
376404edcSAsim Jamshed 
476404edcSAsim Jamshed #ifdef DARWIN
576404edcSAsim Jamshed #include <netinet/tcp.h>
676404edcSAsim Jamshed #else
776404edcSAsim Jamshed #include <linux/tcp.h>
876404edcSAsim Jamshed #endif
976404edcSAsim Jamshed #include <netinet/ip.h>
1076404edcSAsim Jamshed #include <sys/queue.h>
1176404edcSAsim Jamshed 
1276404edcSAsim Jamshed #include "mtcp.h"
1376404edcSAsim Jamshed #include "socket.h"
1476404edcSAsim Jamshed #include "memory_mgt.h"
1576404edcSAsim Jamshed #include "tcp_rb.h"
1676404edcSAsim Jamshed 
1776404edcSAsim Jamshed #define TCP_MAX_SEQ 		4294967295
1876404edcSAsim Jamshed 
1976404edcSAsim Jamshed #define HAS_STREAM_TYPE(str, type)	(str->stream_type & (1 << type))
2076404edcSAsim Jamshed #define IS_STREAM_TYPE(str, type)	(str->stream_type == (1 << type))
2176404edcSAsim Jamshed #define STREAM_TYPE(type)		(1 << type)
2276404edcSAsim Jamshed 
2376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
2476404edcSAsim Jamshed /**
2576404edcSAsim Jamshed  * routines for traversing stream + raw sockets
2676404edcSAsim Jamshed  */
2776404edcSAsim Jamshed struct sockent {
2876404edcSAsim Jamshed 	struct socket_map *sock;
2976404edcSAsim Jamshed 	TAILQ_ENTRY(sockent) link;
3076404edcSAsim Jamshed };
3176404edcSAsim Jamshed #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent)
3276404edcSAsim Jamshed #define SOCKQ_INIT(head) TAILQ_INIT(head)
3376404edcSAsim Jamshed #define SOCKQ_INSERT_TAIL(head, socket) \
3476404edcSAsim Jamshed do { \
3576404edcSAsim Jamshed 	struct sockent *__s = \
3676404edcSAsim Jamshed 			(struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \
3776404edcSAsim Jamshed 	if (__s) { \
3876404edcSAsim Jamshed 		__s->sock = (socket); \
3976404edcSAsim Jamshed 		TAILQ_INSERT_TAIL(head, __s, link); \
4076404edcSAsim Jamshed 	} \
4176404edcSAsim Jamshed } while (0)
4276404edcSAsim Jamshed #define SOCKQ_REMOVE(head, socket) \
4376404edcSAsim Jamshed do { \
4476404edcSAsim Jamshed 	struct sockent *__walk, *__temp; \
4576404edcSAsim Jamshed 	for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \
4676404edcSAsim Jamshed 		__temp = TAILQ_NEXT(__walk, link); \
4776404edcSAsim Jamshed 		if ((socket) == __walk->sock) break; \
4876404edcSAsim Jamshed 	} \
4976404edcSAsim Jamshed 	if (__walk) { \
5076404edcSAsim Jamshed 		TAILQ_REMOVE(head, __walk, link); \
5176404edcSAsim Jamshed 		MPFreeChunk(mtcp->sockent_pool, __walk); \
5276404edcSAsim Jamshed 	} \
5376404edcSAsim Jamshed } while (0)
5476404edcSAsim Jamshed #define SOCKQ_FOREACH_START(var, head) \
5576404edcSAsim Jamshed do { \
5676404edcSAsim Jamshed 	struct sockent *__walk, *__temp; \
5776404edcSAsim Jamshed 	for (__walk = ((head)->tqh_first); \
5876404edcSAsim Jamshed 		((var) = __walk ? __walk->sock : NULL, __walk); \
5976404edcSAsim Jamshed 		__walk = __temp) { \
6076404edcSAsim Jamshed 		__temp = ((__walk)->link.tqe_next);
61*a14d6bd4SAsim Jamshed #define SOCKQ_FOREACH_REVERSE(var, head) \
62*a14d6bd4SAsim Jamshed 	do { \
63*a14d6bd4SAsim Jamshed         struct sockent *__walk, *__temp; \
64*a14d6bd4SAsim Jamshed         for (__walk = (*(((struct mlist *)((head)->tqh_last))->tqh_last));	\
65*a14d6bd4SAsim Jamshed 	((var) = __walk ? __walk->sock : NULL, __walk); \
66*a14d6bd4SAsim Jamshed 	     __walk = __temp) { \
67*a14d6bd4SAsim Jamshed         __temp = (*(((struct mlist *)((__walk)->link.tqe_prev))->tqh_last));
6876404edcSAsim Jamshed #define SOCKQ_FOREACH_END }} while (0)
6976404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
7076404edcSAsim Jamshed 
7176404edcSAsim Jamshed struct rtm_stat
7276404edcSAsim Jamshed {
7376404edcSAsim Jamshed 	uint32_t tdp_ack_cnt;
7476404edcSAsim Jamshed 	uint32_t tdp_ack_bytes;
7576404edcSAsim Jamshed 	uint32_t ack_upd_cnt;
7676404edcSAsim Jamshed 	uint32_t ack_upd_bytes;
7776404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
7876404edcSAsim Jamshed 	uint32_t sack_cnt;
7976404edcSAsim Jamshed 	uint32_t sack_bytes;
8076404edcSAsim Jamshed 	uint32_t tdp_sack_cnt;
8176404edcSAsim Jamshed 	uint32_t tdp_sack_bytes;
8276404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
8376404edcSAsim Jamshed 	uint32_t rto_cnt;
8476404edcSAsim Jamshed 	uint32_t rto_bytes;
8576404edcSAsim Jamshed };
8676404edcSAsim Jamshed 
8776404edcSAsim Jamshed typedef struct seq_remap_entry {
8876404edcSAsim Jamshed 	uint32_t seq_base;
8976404edcSAsim Jamshed 	uint32_t seq_off;
9076404edcSAsim Jamshed } seq_remap_entry;
9176404edcSAsim Jamshed 
9276404edcSAsim Jamshed #define SRE_MAX			8
9376404edcSAsim Jamshed 
9476404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
9576404edcSAsim Jamshed struct sack_entry
9676404edcSAsim Jamshed {
9776404edcSAsim Jamshed 	uint32_t left_edge;
9876404edcSAsim Jamshed 	uint32_t right_edge;
9976404edcSAsim Jamshed 	uint32_t expire;
10076404edcSAsim Jamshed };
10176404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
10276404edcSAsim Jamshed 
10376404edcSAsim Jamshed struct tcp_recv_vars
10476404edcSAsim Jamshed {
10576404edcSAsim Jamshed 	/* receiver variables */
10676404edcSAsim Jamshed 	uint32_t rcv_wnd;		/* receive window (unscaled) */
10776404edcSAsim Jamshed 	//uint32_t rcv_up;		/* receive urgent pointer */
10876404edcSAsim Jamshed 	uint32_t irs;			/* initial receiving sequence */
10976404edcSAsim Jamshed 	uint32_t snd_wl1;		/* segment seq number for last window update */
11076404edcSAsim Jamshed 	uint32_t snd_wl2;		/* segment ack number for last window update */
11176404edcSAsim Jamshed 
11276404edcSAsim Jamshed 	/* variables for fast retransmission */
11376404edcSAsim Jamshed 	uint32_t last_ack_seq;	/* highest ackd seq */
11476404edcSAsim Jamshed 	uint8_t dup_acks;		/* number of duplicated acks */
11576404edcSAsim Jamshed 
11676404edcSAsim Jamshed 	/* timestamps */
11776404edcSAsim Jamshed 	uint32_t ts_recent;			/* recent peer timestamp */
11876404edcSAsim Jamshed 	uint32_t ts_lastack_rcvd;	/* last ack rcvd time */
11976404edcSAsim Jamshed 	uint32_t ts_last_ts_upd;	/* last peer ts update time */
12076404edcSAsim Jamshed 	uint32_t ts_tw_expire;	// timestamp for timewait expire
12176404edcSAsim Jamshed 
12276404edcSAsim Jamshed 	/* RTT estimation variables */
12376404edcSAsim Jamshed 	uint32_t srtt;			/* smoothed round trip time << 3 (scaled) */
12476404edcSAsim Jamshed 	uint32_t mdev;			/* medium deviation */
12576404edcSAsim Jamshed 	uint32_t mdev_max;		/* maximal mdev ffor the last rtt period */
12676404edcSAsim Jamshed 	uint32_t rttvar;		/* smoothed mdev_max */
12776404edcSAsim Jamshed 	uint32_t rtt_seq;		/* sequence number to update rttvar */
12876404edcSAsim Jamshed 
12976404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED		/* currently not used */
13076404edcSAsim Jamshed #define MAX_SACK_ENTRY 8
13176404edcSAsim Jamshed 	struct sack_entry sack_table[MAX_SACK_ENTRY];
13276404edcSAsim Jamshed 	uint8_t sacks:3;
13376404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
13476404edcSAsim Jamshed 
13576404edcSAsim Jamshed 	tcprb_t *rcvbuf;
13676404edcSAsim Jamshed 
13776404edcSAsim Jamshed #if USE_SPIN_LOCK
13876404edcSAsim Jamshed 	pthread_spinlock_t read_lock;
13976404edcSAsim Jamshed #else
14076404edcSAsim Jamshed 	pthread_mutex_t read_lock;
14176404edcSAsim Jamshed #endif
14276404edcSAsim Jamshed 	struct hash_bucket_head *he_mybucket;
14376404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) he_link;	/* hash table entry link */
14476404edcSAsim Jamshed };
14576404edcSAsim Jamshed 
14676404edcSAsim Jamshed struct tcp_send_vars
14776404edcSAsim Jamshed {
14876404edcSAsim Jamshed 	/* IP-level information */
14976404edcSAsim Jamshed 	uint16_t ip_id;
15076404edcSAsim Jamshed 
15176404edcSAsim Jamshed 	uint16_t mss;			/* maximum segment size */
15276404edcSAsim Jamshed 	uint16_t eff_mss;		/* effective segment size (excluding tcp option) */
15376404edcSAsim Jamshed 
15476404edcSAsim Jamshed 	uint8_t wscale_mine;		/* my window scale (advertising window) */
15576404edcSAsim Jamshed 	uint8_t wscale_peer;		/* peer's window scale (advertised window) */
15676404edcSAsim Jamshed 	int8_t nif_out;			/* cached output network interface */
15776404edcSAsim Jamshed 	unsigned char *d_haddr;	/* cached destination MAC address */
15876404edcSAsim Jamshed 
15976404edcSAsim Jamshed 	/* send sequence variables */
16076404edcSAsim Jamshed 	uint32_t snd_una;		/* send unacknoledged */
16176404edcSAsim Jamshed 	uint32_t snd_wnd;		/* send window (unscaled) */
16276404edcSAsim Jamshed 	uint32_t peer_wnd;		/* client window size */
16376404edcSAsim Jamshed 	//uint32_t snd_up;		/* send urgent pointer (not used) */
16476404edcSAsim Jamshed 	uint32_t iss;			/* initial sending sequence */
16576404edcSAsim Jamshed 	uint32_t fss;			/* final sending sequence */
16676404edcSAsim Jamshed 
16776404edcSAsim Jamshed 	/* retransmission timeout variables */
16876404edcSAsim Jamshed 	uint8_t nrtx;			/* number of retransmission */
16976404edcSAsim Jamshed 	uint8_t max_nrtx;		/* max number of retransmission */
17076404edcSAsim Jamshed 	uint32_t rto;			/* retransmission timeout */
17176404edcSAsim Jamshed 	uint32_t ts_rto;		/* timestamp for retransmission timeout */
17276404edcSAsim Jamshed 
17376404edcSAsim Jamshed 	/* congestion control variables */
17476404edcSAsim Jamshed 	uint32_t cwnd;				/* congestion window */
17576404edcSAsim Jamshed 	uint32_t ssthresh;			/* slow start threshold */
17676404edcSAsim Jamshed 
17776404edcSAsim Jamshed 	/* timestamp */
17876404edcSAsim Jamshed 	uint32_t ts_lastack_sent;	/* last ack sent time */
17976404edcSAsim Jamshed 
18076404edcSAsim Jamshed 	uint8_t is_wack:1, 			/* is ack for window adertisement? */
18176404edcSAsim Jamshed 			ack_cnt:6;			/* number of acks to send. max 64 */
18276404edcSAsim Jamshed 
18376404edcSAsim Jamshed 	uint8_t on_control_list;
18476404edcSAsim Jamshed 	uint8_t on_send_list;
18576404edcSAsim Jamshed 	uint8_t on_ack_list;
18676404edcSAsim Jamshed 	uint8_t on_sendq;
18776404edcSAsim Jamshed 	uint8_t on_ackq;
18876404edcSAsim Jamshed 	uint8_t on_closeq;
18976404edcSAsim Jamshed 	uint8_t on_resetq;
19076404edcSAsim Jamshed 
19176404edcSAsim Jamshed 	uint8_t on_closeq_int:1,
19276404edcSAsim Jamshed 			on_resetq_int:1,
19376404edcSAsim Jamshed 			is_fin_sent:1,
19476404edcSAsim Jamshed 			is_fin_ackd:1;
19576404edcSAsim Jamshed 
19676404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) control_link;
19776404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) send_link;
19876404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) ack_link;
19976404edcSAsim Jamshed 
20076404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) timer_link;		/* timer link (rto list, tw list) */
20176404edcSAsim Jamshed 	TAILQ_ENTRY(tcp_stream) timeout_link;	/* connection timeout link */
20276404edcSAsim Jamshed 
20376404edcSAsim Jamshed 	struct tcp_send_buffer *sndbuf;
20476404edcSAsim Jamshed 	struct seq_remap_entry sre[SRE_MAX];	/* seq # translation table */
20576404edcSAsim Jamshed 	uint8_t sre_index;			/* seq # translation index */
20676404edcSAsim Jamshed 
20776404edcSAsim Jamshed #if USE_SPIN_LOCK
20876404edcSAsim Jamshed 	pthread_spinlock_t write_lock;
20976404edcSAsim Jamshed #else
21076404edcSAsim Jamshed 	pthread_mutex_t write_lock;
21176404edcSAsim Jamshed #endif
21276404edcSAsim Jamshed 
21376404edcSAsim Jamshed #if RTM_STAT
21476404edcSAsim Jamshed 	struct rtm_stat rstat;			/* retransmission statistics */
21576404edcSAsim Jamshed #endif
21676404edcSAsim Jamshed };
21776404edcSAsim Jamshed 
21876404edcSAsim Jamshed typedef struct tcp_stream
21976404edcSAsim Jamshed {
22076404edcSAsim Jamshed 	/*
22176404edcSAsim Jamshed 	 * This is a direct replacement for fctx...
22276404edcSAsim Jamshed 	 * However this could be replaced by some
22376404edcSAsim Jamshed 	 * more elaborate data structure that supports
22476404edcSAsim Jamshed 	 * multiple monitors in the future...
22576404edcSAsim Jamshed 	 *
22676404edcSAsim Jamshed 	 * In case no monitor is attached, msock will be
22776404edcSAsim Jamshed 	 * NULL.
22876404edcSAsim Jamshed 	 *
22976404edcSAsim Jamshed 	 * Support for standalone monitors will be patched
23076404edcSAsim Jamshed 	 * in future revisions...
23176404edcSAsim Jamshed 	 */
23276404edcSAsim Jamshed 
233*a14d6bd4SAsim Jamshed 	SOCKQ_HEAD(mlist) msocks;        /* in case monitoring is enabled */
23476404edcSAsim Jamshed 	socket_map_t socket;		/* relating to MOS_SOCK_STREAM */
23576404edcSAsim Jamshed 
23676404edcSAsim Jamshed 	uint32_t id;
23776404edcSAsim Jamshed 	uint32_t stream_type;		/* to identify sock_stream/mon_stream */
23876404edcSAsim Jamshed 
23976404edcSAsim Jamshed 	uint32_t saddr;			/* in network order */
24076404edcSAsim Jamshed 	uint32_t daddr;			/* in network order */
24176404edcSAsim Jamshed 	uint16_t sport;			/* in network order */
24276404edcSAsim Jamshed 	uint16_t dport;			/* in network order */
24376404edcSAsim Jamshed 
24476404edcSAsim Jamshed 	uint32_t actions;
24576404edcSAsim Jamshed 	uint64_t cb_events;
24676404edcSAsim Jamshed 
24776404edcSAsim Jamshed 	uint8_t state;			/* tcp state */
24876404edcSAsim Jamshed 	uint8_t close_reason;	/* close reason */
24976404edcSAsim Jamshed 	uint8_t on_hash_table;
25076404edcSAsim Jamshed 	uint8_t on_timewait_list;
25176404edcSAsim Jamshed 	uint8_t ht_idx;
25276404edcSAsim Jamshed 	uint8_t closed;
25376404edcSAsim Jamshed 	uint8_t is_bound_addr;
25476404edcSAsim Jamshed 	uint8_t need_wnd_adv;
25576404edcSAsim Jamshed 	int16_t on_rto_idx;
25676404edcSAsim Jamshed 
25776404edcSAsim Jamshed 	uint16_t on_timeout_list:1,
25876404edcSAsim Jamshed 		on_rcv_br_list:1,
25976404edcSAsim Jamshed 		on_snd_br_list:1,
26076404edcSAsim Jamshed 		saw_timestamp:1,	/* whether peer sends timestamp */
26176404edcSAsim Jamshed 		sack_permit:1,		/* whether peer permits SACK */
26276404edcSAsim Jamshed 		control_list_waiting:1,
26376404edcSAsim Jamshed 		have_reset:1,
26476404edcSAsim Jamshed 		side:2,
26576404edcSAsim Jamshed 		buffer_mgmt:2,
26676404edcSAsim Jamshed 		status_mgmt:1,
26776404edcSAsim Jamshed 		allow_pkt_modification:1;
26876404edcSAsim Jamshed 
26976404edcSAsim Jamshed 	uint32_t snd_nxt;		/* send next */
27076404edcSAsim Jamshed 	uint32_t rcv_nxt;		/* receive next */
27176404edcSAsim Jamshed 
27276404edcSAsim Jamshed 	struct tcp_recv_vars *rcvvar;
27376404edcSAsim Jamshed 	struct tcp_send_vars *sndvar;
27476404edcSAsim Jamshed 
27576404edcSAsim Jamshed 	uint32_t last_active_ts;		/* ts_last_ack_sent or ts_last_ts_upd */
27676404edcSAsim Jamshed 
27776404edcSAsim Jamshed 	struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */
278*a14d6bd4SAsim Jamshed #ifdef RECORDPKT_PER_STREAM
27976404edcSAsim Jamshed 	struct pkt_ctx last_pctx;
28076404edcSAsim Jamshed 	unsigned char  last_pkt_data[ETHERNET_FRAME_LEN];
281*a14d6bd4SAsim Jamshed #endif
28276404edcSAsim Jamshed 
28376404edcSAsim Jamshed } tcp_stream;
28476404edcSAsim Jamshed 
28576404edcSAsim Jamshed extern inline char *
28676404edcSAsim Jamshed TCPStateToString(const tcp_stream *cur_stream);
28776404edcSAsim Jamshed 
28876404edcSAsim Jamshed extern inline int
28976404edcSAsim Jamshed AddEpollEvent(struct mtcp_epoll *ep,
29076404edcSAsim Jamshed 		int queue_type, socket_map_t socket, uint32_t event);
29176404edcSAsim Jamshed 
29276404edcSAsim Jamshed extern inline void
29376404edcSAsim Jamshed RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream);
29476404edcSAsim Jamshed 
29576404edcSAsim Jamshed extern inline void
29676404edcSAsim Jamshed RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream);
29776404edcSAsim Jamshed 
29876404edcSAsim Jamshed extern inline void
29976404edcSAsim Jamshed RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream);
30076404edcSAsim Jamshed 
30176404edcSAsim Jamshed extern inline int
30276404edcSAsim Jamshed RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream);
30376404edcSAsim Jamshed 
30476404edcSAsim Jamshed tcp_stream *
30576404edcSAsim Jamshed CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
30676404edcSAsim Jamshed 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
30776404edcSAsim Jamshed 		unsigned int *hash);
30876404edcSAsim Jamshed 
30976404edcSAsim Jamshed extern inline tcp_stream *
31076404edcSAsim Jamshed CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr,
31176404edcSAsim Jamshed 		    uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash);
31276404edcSAsim Jamshed 
31376404edcSAsim Jamshed extern inline tcp_stream *
31476404edcSAsim Jamshed CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
31576404edcSAsim Jamshed 			uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash);
31676404edcSAsim Jamshed 
31776404edcSAsim Jamshed extern inline tcp_stream *
31876404edcSAsim Jamshed AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type,
31976404edcSAsim Jamshed 			uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport);
32076404edcSAsim Jamshed 
32176404edcSAsim Jamshed void
32276404edcSAsim Jamshed DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream);
32376404edcSAsim Jamshed 
32476404edcSAsim Jamshed void
32576404edcSAsim Jamshed DumpStream(mtcp_manager_t mtcp, tcp_stream *stream);
32676404edcSAsim Jamshed 
32776404edcSAsim Jamshed int
32876404edcSAsim Jamshed GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen);
32976404edcSAsim Jamshed 
33076404edcSAsim Jamshed int
33176404edcSAsim Jamshed GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen);
33276404edcSAsim Jamshed 
33376404edcSAsim Jamshed int
33476404edcSAsim Jamshed GetTCPState(struct tcp_stream *stream, int side,
33576404edcSAsim Jamshed 			void *optval, socklen_t *optlen);
33676404edcSAsim Jamshed 
33776404edcSAsim Jamshed int
33876404edcSAsim Jamshed DisableBuf(socket_map_t sock, int side);
33976404edcSAsim Jamshed 
34076404edcSAsim Jamshed int
34176404edcSAsim Jamshed GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz);
34276404edcSAsim Jamshed 
34376404edcSAsim Jamshed void
34476404edcSAsim Jamshed posix_seq_srand(unsigned seed);
34576404edcSAsim Jamshed 
34676404edcSAsim Jamshed #endif /* __TCP_STREAM_H_ */
347