1 #ifndef __TCP_STREAM_H_
2 #define __TCP_STREAM_H_
3 
4 #ifdef DARWIN
5 #include <netinet/tcp.h>
6 #else
7 #include <linux/tcp.h>
8 #endif
9 #include <netinet/ip.h>
10 #include <sys/queue.h>
11 
12 #include "mtcp.h"
13 #include "socket.h"
14 #include "memory_mgt.h"
15 #include "tcp_rb.h"
16 
17 #define TCP_MAX_SEQ 		4294967295
18 
19 #define HAS_STREAM_TYPE(str, type)	(str->stream_type & (1 << type))
20 #define IS_STREAM_TYPE(str, type)	(str->stream_type == (1 << type))
21 #define STREAM_TYPE(type)		(1 << type)
22 
23 /*----------------------------------------------------------------------------*/
24 /**
25  * routines for traversing stream + raw sockets
26  */
27 struct sockent {
28 	struct socket_map *sock;
29 	TAILQ_ENTRY(sockent) link;
30 };
31 #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent)
32 #define SOCKQ_INIT(head) TAILQ_INIT(head)
33 #define SOCKQ_INSERT_TAIL(head, socket) \
34 do { \
35 	struct sockent *__s = \
36 			(struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \
37 	if (__s) { \
38 		__s->sock = (socket); \
39 		TAILQ_INSERT_TAIL(head, __s, link); \
40 	} \
41 } while (0)
42 #define SOCKQ_REMOVE(head, socket) \
43 do { \
44 	struct sockent *__walk, *__temp; \
45 	for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \
46 		__temp = TAILQ_NEXT(__walk, link); \
47 		if ((socket) == __walk->sock) break; \
48 	} \
49 	if (__walk) { \
50 		TAILQ_REMOVE(head, __walk, link); \
51 		MPFreeChunk(mtcp->sockent_pool, __walk); \
52 	} \
53 } while (0)
54 #define SOCKQ_FOREACH_START(var, head) \
55 do { \
56 	struct sockent *__walk, *__temp; \
57 	for (__walk = ((head)->tqh_first); \
58 		((var) = __walk ? __walk->sock : NULL, __walk); \
59 		__walk = __temp) { \
60 		__temp = ((__walk)->link.tqe_next);
61 #define SOCKQ_FOREACH_END }} while (0)
62 /*----------------------------------------------------------------------------*/
63 
64 struct rtm_stat
65 {
66 	uint32_t tdp_ack_cnt;
67 	uint32_t tdp_ack_bytes;
68 	uint32_t ack_upd_cnt;
69 	uint32_t ack_upd_bytes;
70 #if TCP_OPT_SACK_ENABLED
71 	uint32_t sack_cnt;
72 	uint32_t sack_bytes;
73 	uint32_t tdp_sack_cnt;
74 	uint32_t tdp_sack_bytes;
75 #endif /* TCP_OPT_SACK_ENABLED */
76 	uint32_t rto_cnt;
77 	uint32_t rto_bytes;
78 };
79 
80 typedef struct seq_remap_entry {
81 	uint32_t seq_base;
82 	uint32_t seq_off;
83 } seq_remap_entry;
84 
85 #define SRE_MAX			8
86 
87 #if TCP_OPT_SACK_ENABLED
88 struct sack_entry
89 {
90 	uint32_t left_edge;
91 	uint32_t right_edge;
92 	uint32_t expire;
93 };
94 #endif /* TCP_OPT_SACK_ENABLED */
95 
96 struct tcp_recv_vars
97 {
98 	/* receiver variables */
99 	uint32_t rcv_wnd;		/* receive window (unscaled) */
100 	//uint32_t rcv_up;		/* receive urgent pointer */
101 	uint32_t irs;			/* initial receiving sequence */
102 	uint32_t snd_wl1;		/* segment seq number for last window update */
103 	uint32_t snd_wl2;		/* segment ack number for last window update */
104 
105 	/* variables for fast retransmission */
106 	uint32_t last_ack_seq;	/* highest ackd seq */
107 	uint8_t dup_acks;		/* number of duplicated acks */
108 
109 	/* timestamps */
110 	uint32_t ts_recent;			/* recent peer timestamp */
111 	uint32_t ts_lastack_rcvd;	/* last ack rcvd time */
112 	uint32_t ts_last_ts_upd;	/* last peer ts update time */
113 	uint32_t ts_tw_expire;	// timestamp for timewait expire
114 
115 	/* RTT estimation variables */
116 	uint32_t srtt;			/* smoothed round trip time << 3 (scaled) */
117 	uint32_t mdev;			/* medium deviation */
118 	uint32_t mdev_max;		/* maximal mdev ffor the last rtt period */
119 	uint32_t rttvar;		/* smoothed mdev_max */
120 	uint32_t rtt_seq;		/* sequence number to update rttvar */
121 
122 #if TCP_OPT_SACK_ENABLED		/* currently not used */
123 #define MAX_SACK_ENTRY 8
124 	struct sack_entry sack_table[MAX_SACK_ENTRY];
125 	uint8_t sacks:3;
126 #endif /* TCP_OPT_SACK_ENABLED */
127 
128 	tcprb_t *rcvbuf;
129 
130 #if USE_SPIN_LOCK
131 	pthread_spinlock_t read_lock;
132 #else
133 	pthread_mutex_t read_lock;
134 #endif
135 	struct hash_bucket_head *he_mybucket;
136 	TAILQ_ENTRY(tcp_stream) he_link;	/* hash table entry link */
137 };
138 
139 struct tcp_send_vars
140 {
141 	/* IP-level information */
142 	uint16_t ip_id;
143 
144 	uint16_t mss;			/* maximum segment size */
145 	uint16_t eff_mss;		/* effective segment size (excluding tcp option) */
146 
147 	uint8_t wscale_mine;		/* my window scale (advertising window) */
148 	uint8_t wscale_peer;		/* peer's window scale (advertised window) */
149 	int8_t nif_out;			/* cached output network interface */
150 	unsigned char *d_haddr;	/* cached destination MAC address */
151 
152 	/* send sequence variables */
153 	uint32_t snd_una;		/* send unacknoledged */
154 	uint32_t snd_wnd;		/* send window (unscaled) */
155 	uint32_t peer_wnd;		/* client window size */
156 	//uint32_t snd_up;		/* send urgent pointer (not used) */
157 	uint32_t iss;			/* initial sending sequence */
158 	uint32_t fss;			/* final sending sequence */
159 
160 	/* retransmission timeout variables */
161 	uint8_t nrtx;			/* number of retransmission */
162 	uint8_t max_nrtx;		/* max number of retransmission */
163 	uint32_t rto;			/* retransmission timeout */
164 	uint32_t ts_rto;		/* timestamp for retransmission timeout */
165 
166 	/* congestion control variables */
167 	uint32_t cwnd;				/* congestion window */
168 	uint32_t ssthresh;			/* slow start threshold */
169 
170 	/* timestamp */
171 	uint32_t ts_lastack_sent;	/* last ack sent time */
172 
173 	uint8_t is_wack:1, 			/* is ack for window adertisement? */
174 			ack_cnt:6;			/* number of acks to send. max 64 */
175 
176 	uint8_t on_control_list;
177 	uint8_t on_send_list;
178 	uint8_t on_ack_list;
179 	uint8_t on_sendq;
180 	uint8_t on_ackq;
181 	uint8_t on_closeq;
182 	uint8_t on_resetq;
183 
184 	uint8_t on_closeq_int:1,
185 			on_resetq_int:1,
186 			is_fin_sent:1,
187 			is_fin_ackd:1;
188 
189 	TAILQ_ENTRY(tcp_stream) control_link;
190 	TAILQ_ENTRY(tcp_stream) send_link;
191 	TAILQ_ENTRY(tcp_stream) ack_link;
192 
193 	TAILQ_ENTRY(tcp_stream) timer_link;		/* timer link (rto list, tw list) */
194 	TAILQ_ENTRY(tcp_stream) timeout_link;	/* connection timeout link */
195 
196 	struct tcp_send_buffer *sndbuf;
197 	struct seq_remap_entry sre[SRE_MAX];	/* seq # translation table */
198 	uint8_t sre_index;			/* seq # translation index */
199 
200 #if USE_SPIN_LOCK
201 	pthread_spinlock_t write_lock;
202 #else
203 	pthread_mutex_t write_lock;
204 #endif
205 
206 #if RTM_STAT
207 	struct rtm_stat rstat;			/* retransmission statistics */
208 #endif
209 };
210 
211 typedef struct tcp_stream
212 {
213 	/*
214 	 * This is a direct replacement for fctx...
215 	 * However this could be replaced by some
216 	 * more elaborate data structure that supports
217 	 * multiple monitors in the future...
218 	 *
219 	 * In case no monitor is attached, msock will be
220 	 * NULL.
221 	 *
222 	 * Support for standalone monitors will be patched
223 	 * in future revisions...
224 	 */
225 
226 	SOCKQ_HEAD() msocks;        /* in case monitoring is enabled */
227 	socket_map_t socket;		/* relating to MOS_SOCK_STREAM */
228 
229 	uint32_t id;
230 	uint32_t stream_type;		/* to identify sock_stream/mon_stream */
231 
232 	uint32_t saddr;			/* in network order */
233 	uint32_t daddr;			/* in network order */
234 	uint16_t sport;			/* in network order */
235 	uint16_t dport;			/* in network order */
236 
237 	uint32_t actions;
238 	uint64_t cb_events;
239 
240 	uint8_t state;			/* tcp state */
241 	uint8_t close_reason;	/* close reason */
242 	uint8_t on_hash_table;
243 	uint8_t on_timewait_list;
244 	uint8_t ht_idx;
245 	uint8_t closed;
246 	uint8_t is_bound_addr;
247 	uint8_t need_wnd_adv;
248 	int16_t on_rto_idx;
249 
250 	uint16_t on_timeout_list:1,
251 		on_rcv_br_list:1,
252 		on_snd_br_list:1,
253 		saw_timestamp:1,	/* whether peer sends timestamp */
254 		sack_permit:1,		/* whether peer permits SACK */
255 		control_list_waiting:1,
256 		have_reset:1,
257 		side:2,
258 		buffer_mgmt:2,
259 		status_mgmt:1,
260 		allow_pkt_modification:1;
261 
262 	uint32_t snd_nxt;		/* send next */
263 	uint32_t rcv_nxt;		/* receive next */
264 
265 	struct tcp_recv_vars *rcvvar;
266 	struct tcp_send_vars *sndvar;
267 
268 	uint32_t last_active_ts;		/* ts_last_ack_sent or ts_last_ts_upd */
269 
270 	struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */
271 
272 	struct pkt_ctx last_pctx;
273 	unsigned char  last_pkt_data[ETHERNET_FRAME_LEN];
274 
275 } tcp_stream;
276 
277 extern inline char *
278 TCPStateToString(const tcp_stream *cur_stream);
279 
280 extern inline int
281 AddEpollEvent(struct mtcp_epoll *ep,
282 		int queue_type, socket_map_t socket, uint32_t event);
283 
284 extern inline void
285 RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream);
286 
287 extern inline void
288 RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream);
289 
290 extern inline void
291 RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream);
292 
293 extern inline int
294 RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream);
295 
296 tcp_stream *
297 CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
298 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
299 		unsigned int *hash);
300 
301 extern inline tcp_stream *
302 CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr,
303 		    uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash);
304 
305 extern inline tcp_stream *
306 CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
307 			uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash);
308 
309 extern inline tcp_stream *
310 AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type,
311 			uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport);
312 
313 void
314 DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream);
315 
316 void
317 DumpStream(mtcp_manager_t mtcp, tcp_stream *stream);
318 
319 int
320 GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen);
321 
322 int
323 GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen);
324 
325 int
326 GetTCPState(struct tcp_stream *stream, int side,
327 			void *optval, socklen_t *optlen);
328 
329 int
330 DisableBuf(socket_map_t sock, int side);
331 
332 int
333 GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz);
334 
335 int
336 TcpSeqChange(socket_map_t socket, uint32_t seq_drift, int side, uint32_t seqno);
337 
338 uint32_t
339 FetchSeqDrift(struct tcp_stream *stream, uint32_t seqno);
340 
341 void
342 posix_seq_srand(unsigned seed);
343 
344 #endif /* __TCP_STREAM_H_ */
345