xref: /mOS-networking-stack/core/src/tcp_out.c (revision 30883c59)
1 #include <unistd.h>
2 #include <string.h>
3 
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15 
16 #define TCP_CALCULATE_CHECKSUM		TRUE
17 #define ACK_PIGGYBACK			TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
20 
21 #define TCP_MAX_WINDOW 65535
22 
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25 
26 /*----------------------------------------------------------------------------*/
27 static inline uint16_t
CalculateOptionLength(uint8_t flags)28 CalculateOptionLength(uint8_t flags)
29 {
30 	uint16_t optlen = 0;
31 
32 	if (flags & TCP_FLAG_SYN) {
33 		optlen += TCP_OPT_MSS_LEN;
34 #if TCP_OPT_SACK_ENABLED
35 		optlen += TCP_OPT_SACK_PERMIT_LEN;
36 #if !TCP_OPT_TIMESTAMP_ENABLED
37 		optlen += 2;	// insert NOP padding
38 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
39 #endif /* TCP_OPT_SACK_ENABLED */
40 
41 #if TCP_OPT_TIMESTAMP_ENABLED
42 		optlen += TCP_OPT_TIMESTAMP_LEN;
43 #if !TCP_OPT_SACK_ENABLED
44 		optlen += 2;	// insert NOP padding
45 #endif /* TCP_OPT_SACK_ENABLED */
46 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
47 
48 		optlen += TCP_OPT_WSCALE_LEN + 1;
49 
50 	} else {
51 
52 #if TCP_OPT_TIMESTAMP_ENABLED
53 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
54 #endif
55 
56 #if TCP_OPT_SACK_ENABLED
57 		if (flags & TCP_FLAG_SACK) {
58 			optlen += TCP_OPT_SACK_LEN + 2;
59 		}
60 #endif
61 	}
62 
63 	assert(optlen % 4 == 0);
64 
65 	return optlen;
66 }
67 /*----------------------------------------------------------------------------*/
68 static inline void
GenerateTCPTimestamp(tcp_stream * cur_stream,uint8_t * tcpopt,uint32_t cur_ts)69 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
70 {
71 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
72 
73 	tcpopt[0] = TCP_OPT_TIMESTAMP;
74 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
75 	ts[0] = htonl(cur_ts);
76 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
77 }
78 /*----------------------------------------------------------------------------*/
79 static inline void
GenerateTCPOptions(tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * tcpopt,uint16_t optlen)80 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
81 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
82 {
83 	int i = 0;
84 
85 	if (flags & TCP_FLAG_SYN) {
86 		uint16_t mss;
87 
88 		/* MSS option */
89 		mss = cur_stream->sndvar->mss;
90 		tcpopt[i++] = TCP_OPT_MSS;
91 		tcpopt[i++] = TCP_OPT_MSS_LEN;
92 		tcpopt[i++] = mss >> 8;
93 		tcpopt[i++] = mss % 256;
94 
95 		/* SACK permit */
96 #if TCP_OPT_SACK_ENABLED
97 #if !TCP_OPT_TIMESTAMP_ENABLED
98 		tcpopt[i++] = TCP_OPT_NOP;
99 		tcpopt[i++] = TCP_OPT_NOP;
100 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
101 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
102 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
103 		TRACE_SACK("Local SACK permited.\n");
104 #endif /* TCP_OPT_SACK_ENABLED */
105 
106 		/* Timestamp */
107 #if TCP_OPT_TIMESTAMP_ENABLED
108 #if !TCP_OPT_SACK_ENABLED
109 		tcpopt[i++] = TCP_OPT_NOP;
110 		tcpopt[i++] = TCP_OPT_NOP;
111 #endif /* TCP_OPT_SACK_ENABLED */
112 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
113 		i += TCP_OPT_TIMESTAMP_LEN;
114 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
115 
116 		/* Window scale */
117 		tcpopt[i++] = TCP_OPT_NOP;
118 		tcpopt[i++] = TCP_OPT_WSCALE;
119 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
120 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
121 
122 	} else {
123 
124 #if TCP_OPT_TIMESTAMP_ENABLED
125 		tcpopt[i++] = TCP_OPT_NOP;
126 		tcpopt[i++] = TCP_OPT_NOP;
127 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
128 		i += TCP_OPT_TIMESTAMP_LEN;
129 #endif
130 
131 #if TCP_OPT_SACK_ENABLED
132 		if (flags & TCP_OPT_SACK) {
133 			// TODO: implement SACK support
134 		}
135 #endif
136 	}
137 
138 	assert (i == optlen);
139 }
140 /*----------------------------------------------------------------------------*/
141 int
SendTCPPacketStandalone(struct mtcp_manager * mtcp,uint32_t saddr,uint16_t sport,uint32_t daddr,uint16_t dport,uint32_t seq,uint32_t ack_seq,uint16_t window,uint8_t flags,uint8_t * payload,uint16_t payloadlen,uint32_t cur_ts,uint32_t echo_ts,uint16_t ip_id,int8_t in_ifidx)142 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
143 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
144 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
145 		uint8_t *payload, uint16_t payloadlen,
146 		uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
147 {
148 	struct tcphdr *tcph;
149 	uint8_t *tcpopt;
150 	uint32_t *ts;
151 	uint16_t optlen;
152 	struct pkt_ctx pctx;
153 	int rc = -1;
154 
155 	memset(&pctx, 0, sizeof(pctx));
156 	pctx.p.in_ifidx = in_ifidx;
157 	optlen = CalculateOptionLength(flags);
158 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
159 		TRACE_ERROR("Payload size exceeds MSS.\n");
160 		assert(0);
161 		return ERROR;
162 	}
163 
164 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
165 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
166 	if (tcph == NULL) {
167 		return ERROR;
168 	}
169 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
170 
171 	tcph->source = sport;
172 	tcph->dest = dport;
173 
174 	if (flags & TCP_FLAG_SYN)
175 		tcph->syn = TRUE;
176 	if (flags & TCP_FLAG_FIN)
177 		tcph->fin = TRUE;
178 	if (flags & TCP_FLAG_RST)
179 		tcph->rst = TRUE;
180 	if (flags & TCP_FLAG_PSH)
181 		tcph->psh = TRUE;
182 
183 	tcph->seq = htonl(seq);
184 	if (flags & TCP_FLAG_ACK) {
185 		tcph->ack = TRUE;
186 		tcph->ack_seq = htonl(ack_seq);
187 	}
188 
189 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
190 
191 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
192 	ts = (uint32_t *)(tcpopt + 4);
193 
194 	tcpopt[0] = TCP_OPT_NOP;
195 	tcpopt[1] = TCP_OPT_NOP;
196 	tcpopt[2] = TCP_OPT_TIMESTAMP;
197 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
198 	ts[0] = htonl(cur_ts);
199 	ts[1] = htonl(echo_ts);
200 
201 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
202 	// copy payload if exist
203 	if (payloadlen > 0) {
204 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
205 	}
206 
207 #if TCP_CALCULATE_CHECKSUM
208 	/* offload TCP checkum if possible */
209 	if (likely(mtcp->iom->dev_ioctl != NULL))
210 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
211 					  pctx.out_ifidx,
212 					  PKT_TX_TCP_CSUM,
213 					  pctx.p.iph);
214 	/* otherwise calculate TCP checksum in S/W */
215 	if (rc == -1)
216 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
217 					      TCP_HEADER_LEN +
218 					      optlen + payloadlen,
219 					      saddr, daddr);
220 #endif
221 
222 	if (tcph->syn || tcph->fin) {
223 		payloadlen++;
224 	}
225 
226 #ifdef PKTDUMP
227 	DumpPacket(mtcp,
228 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
229 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
230 			"OUT", -1);
231 #endif
232 
233 	struct mon_listener *walk;
234 	/* callback for monitor raw socket */
235 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
236 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
237 			if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
238 				EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
239 							   pctx.p.eth_len))
240 				HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
241 							   &pctx, MOS_ON_PKT_IN);
242 
243 	return payloadlen;
244 }
245 /*----------------------------------------------------------------------------*/
246 int
SendTCPPacket(struct mtcp_manager * mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * payload,uint16_t payloadlen)247 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
248 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
249 {
250 	struct tcphdr *tcph;
251 	uint16_t optlen;
252 	uint8_t wscale = 0;
253 	uint32_t window32 = 0;
254 	struct pkt_ctx pctx;
255 	int rc = -1;
256 
257 	memset(&pctx, 0, sizeof(pctx));
258 	optlen = CalculateOptionLength(flags);
259 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
260 		TRACE_ERROR("Payload size exceeds MSS\n");
261 		return ERROR;
262 	}
263 
264 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
265 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
266 	if (tcph == NULL) {
267 		return -2;
268 	}
269 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
270 
271 	tcph->source = cur_stream->sport;
272 	tcph->dest = cur_stream->dport;
273 
274 	if (flags & TCP_FLAG_SYN) {
275 		tcph->syn = TRUE;
276 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
277 			TRACE_DBG("Stream %d: weird SYN sequence. "
278 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
279 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
280 		}
281 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
282 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
283 	}
284 	if (flags & TCP_FLAG_RST) {
285 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
286 		tcph->rst = TRUE;
287 	}
288 	if (flags & TCP_FLAG_PSH)
289 		tcph->psh = TRUE;
290 
291 	if (flags & TCP_FLAG_WACK) {
292 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
293 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
294 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
295 				cur_stream->id,
296 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
297 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
298 	} else if (flags & TCP_FLAG_FIN) {
299 		tcph->fin = TRUE;
300 
301 		if (cur_stream->sndvar->fss == 0) {
302 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
303 					cur_stream->id, cur_stream->closed);
304 		}
305 		tcph->seq = htonl(cur_stream->sndvar->fss);
306 		cur_stream->sndvar->is_fin_sent = TRUE;
307 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
308 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
309 	} else {
310 		tcph->seq = htonl(cur_stream->snd_nxt);
311 	}
312 
313 	if (flags & TCP_FLAG_ACK) {
314 		tcph->ack = TRUE;
315 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
316 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
317 		cur_stream->last_active_ts = cur_ts;
318 		UpdateTimeoutList(mtcp, cur_stream);
319 	}
320 
321 	if (flags & TCP_FLAG_SYN) {
322 		wscale = 0;
323 	} else {
324 		wscale = cur_stream->sndvar->wscale_mine;
325 	}
326 
327 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
328 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
329 	/* if the advertised window is 0, we need to advertise again later */
330 	if (window32 == 0) {
331 		cur_stream->need_wnd_adv = TRUE;
332 	}
333 
334 	GenerateTCPOptions(cur_stream, cur_ts, flags,
335 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
336 
337 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
338 	// copy payload if exist
339 	if (payloadlen > 0) {
340 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
341 	}
342 
343 #if TCP_CALCULATE_CHECKSUM
344 	if (likely(mtcp->iom->dev_ioctl != NULL))
345 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
346 					  pctx.out_ifidx,
347 					  PKT_TX_TCP_CSUM,
348 					  pctx.p.iph);
349 	if (rc == -1)
350 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
351 					      TCP_HEADER_LEN +
352 					      optlen + payloadlen,
353 					      cur_stream->saddr,
354 					      cur_stream->daddr);
355 #endif
356 	cur_stream->snd_nxt += payloadlen;
357 
358 	if (tcph->syn || tcph->fin) {
359 		cur_stream->snd_nxt++;
360 		payloadlen++;
361 	}
362 
363 	if (payloadlen > 0) {
364 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
365 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
366 				  payloadlen, cur_stream->snd_nxt);
367 		}
368 
369 		/* update retransmission timer if have payload */
370 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
371 		TRACE_RTO("Updating retransmission timer. "
372 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
373 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
374 		AddtoRTOList(mtcp, cur_stream);
375 	}
376 
377 	struct mon_listener *walk;
378 	/* callback for monitor raw socket */
379 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
380 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
381 			if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
382 				EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
383 							   pctx.p.eth_len))
384 				HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
385 							   &pctx, MOS_ON_PKT_IN);
386 
387 	if (mtcp->num_msp /* this means that stream monitor is on */) {
388 		FillPacketContextTCPInfo(&pctx, tcph);
389 
390 		/* New abstraction for monitor stream */
391 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
392 		struct tcp_stream *sendside_stream = cur_stream;
393 
394 		if (recvside_stream) {
395 			if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
396 				pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
397 												   pctx.p.seq,
398 												   recvside_stream->rcvvar->irs + 1);
399 			UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
400 		}
401 	}
402 
403 #ifdef PKTDUMP
404 	DumpPacket(mtcp,
405 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
406 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
407 			"OUT", -1);
408 #endif
409 
410 
411 	return payloadlen;
412 }
413 /*----------------------------------------------------------------------------*/
414 static int
FlushTCPSendingBuffer(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)415 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
416 {
417 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
418 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
419 	uint8_t *data;
420 	uint32_t buffered_len;
421 	uint32_t seq;
422 	uint16_t len;
423 	int16_t sndlen;
424 	uint32_t window;
425 	int packets = 0;
426 	uint8_t wack_sent = 0;
427 
428 	if (!sndvar->sndbuf) {
429 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
430 		assert(0);
431 		return 0;
432 	}
433 
434 	SBUF_LOCK(&sndvar->write_lock);
435 
436 	if (sndvar->sndbuf->len == 0) {
437 		packets = 0;
438 		goto out;
439 	}
440 
441 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
442 
443 	while (1) {
444 		seq = cur_stream->snd_nxt;
445 
446 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
447 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
448 					"state: %s, seq: %u, head_seq: %u.\n",
449 					cur_stream->id, TCPStateToString(cur_stream),
450 					seq, sndvar->sndbuf->head_seq);
451 			assert(0);
452 			break;
453 		}
454 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
455 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
456 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
457 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
458 					sndvar->sndbuf->len, seq, buffered_len);
459 		}
460 		if (buffered_len == 0)
461 			break;
462 
463 		data = sndvar->sndbuf->head +
464 				(seq - sndvar->sndbuf->head_seq);
465 
466 		if (buffered_len > maxlen) {
467 			len = maxlen;
468 		} else {
469 			len = buffered_len;
470 		}
471 
472 		if (len > window)
473 			len = window;
474 
475 		if (len <= 0)
476 			break;
477 
478 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
479 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
480 					"buffered_len: %u\n", seq, len, buffered_len);
481 		}
482 
483 		if (seq - sndvar->snd_una + len > window) {
484 			/* Ask for new window advertisement to peer */
485 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
486 				TRACE_DBG("Full peer window. "
487 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
488 					  sndvar->peer_wnd, seq - sndvar->snd_una);
489 				if (!wack_sent && TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
490 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
491 				} else
492 					wack_sent = 1;
493 			}
494 			packets = -3;
495 			goto out;
496 		}
497 
498 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
499 				TCP_FLAG_ACK, data, len);
500 		if (sndlen < 0) {
501 			packets = sndlen;
502 			goto out;
503 		}
504 		packets++;
505 
506 		window -= len;
507 	}
508 
509  out:
510 	SBUF_UNLOCK(&sndvar->write_lock);
511 	return packets;
512 }
513 /*----------------------------------------------------------------------------*/
514 static inline int
SendControlPacket(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)515 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
516 {
517 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
518 	int ret = 0;
519     int flag = 0;
520 
521     switch (cur_stream->state) {
522        case TCP_ST_SYN_SENT: 		/* Send SYN here */
523           flag = TCP_FLAG_SYN;
524           break;
525        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
526           cur_stream->snd_nxt = sndvar->iss;
527           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
528           break;
529        case TCP_ST_ESTABLISHED:     /* Send ACK here */
530        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
531        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
532        case TCP_ST_TIME_WAIT:       /* Send ACK here */
533           flag = TCP_FLAG_ACK;
534           break;
535        case TCP_ST_LAST_ACK:
536        case TCP_ST_FIN_WAIT_1:
537           /* if it is on ack_list, send it after sending ack */
538           if (sndvar->on_send_list || sndvar->on_ack_list)
539              return (-1);
540           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
541           break;
542        case TCP_ST_CLOSING:
543           if (sndvar->is_fin_sent) {
544              /* if the sequence is for FIN, send FIN */
545              flag = (cur_stream->snd_nxt == sndvar->fss) ?
546                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
547           } else {
548              /* if FIN is not sent, send fin with ack */
549              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
550           }
551        case TCP_ST_CLOSED_RSVD: /* Send RST here */
552           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
553                     cur_stream->id);
554           /* first flush the data and ack */
555           if (sndvar->on_send_list || sndvar->on_ack_list)
556              return (-1);
557           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
558           if (ret >= 0)
559              DestroyTCPStream(mtcp, cur_stream);
560           return (ret);
561        default:
562           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
563                       cur_stream->id);
564           assert(0); /* can't reach here! */
565           return (0);
566     }
567 
568     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
569 }
570 /*----------------------------------------------------------------------------*/
571 inline int
WriteTCPControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)572 WriteTCPControlList(mtcp_manager_t mtcp,
573 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
574 {
575 	tcp_stream *cur_stream;
576 	tcp_stream *next, *last;
577 	int cnt = 0;
578 	int ret;
579 
580 	thresh = MIN(thresh, sender->control_list_cnt);
581 
582 	/* Send TCP control messages */
583 	cnt = 0;
584 	cur_stream = TAILQ_FIRST(&sender->control_list);
585 	last = TAILQ_LAST(&sender->control_list, control_head);
586 	while (cur_stream) {
587 		if (++cnt > thresh)
588 			break;
589 
590 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
591 				cnt, cur_stream->id);
592 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
593 
594 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
595 		sender->control_list_cnt--;
596 
597 		if (cur_stream->sndvar->on_control_list) {
598 			cur_stream->sndvar->on_control_list = FALSE;
599 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
600 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
601 			if (ret < 0) {
602 				TAILQ_INSERT_HEAD(&sender->control_list,
603 						cur_stream, sndvar->control_link);
604 				cur_stream->sndvar->on_control_list = TRUE;
605 				sender->control_list_cnt++;
606 				/* since there is no available write buffer, break */
607 				break;
608 			}
609 		} else {
610 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
611 		}
612 
613 		if (cur_stream == last)
614 			break;
615 		cur_stream = next;
616 	}
617 
618 	return cnt;
619 }
620 /*----------------------------------------------------------------------------*/
621 inline int
WriteTCPDataList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)622 WriteTCPDataList(mtcp_manager_t mtcp,
623 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
624 {
625 	tcp_stream *cur_stream;
626 	tcp_stream *next, *last;
627 	int cnt = 0;
628 	int ret;
629 
630 	/* Send data */
631 	cnt = 0;
632 	cur_stream = TAILQ_FIRST(&sender->send_list);
633 	last = TAILQ_LAST(&sender->send_list, send_head);
634 	while (cur_stream) {
635 		if (++cnt > thresh)
636 			break;
637 
638 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
639 				cnt, cur_stream->id);
640 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
641 
642 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
643 		if (cur_stream->sndvar->on_send_list) {
644 			ret = 0;
645 
646 			/* Send data here */
647 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
648 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
649 				if (cur_stream->sndvar->on_control_list) {
650 					/* delay sending data after until on_control_list becomes off */
651 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
652 					ret = -1;
653 				} else {
654 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
655 				}
656 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
657 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
658 					cur_stream->state == TCP_ST_LAST_ACK) {
659 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
660 			} else {
661 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
662 						cur_stream->id, TCPStateToString(cur_stream));
663 #if DUMP_STREAM
664 				DumpStream(mtcp, cur_stream);
665 #endif
666 			}
667 
668 			if (ret < 0) {
669 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
670 				/* since there is no available write buffer, break */
671 				break;
672 
673 			} else {
674 				cur_stream->sndvar->on_send_list = FALSE;
675 				sender->send_list_cnt--;
676 				/* the ret value is the number of packets sent. */
677 				/* decrease ack_cnt for the piggybacked acks */
678 #if ACK_PIGGYBACK
679 				if (cur_stream->sndvar->ack_cnt > 0) {
680 					if (cur_stream->sndvar->ack_cnt > ret) {
681 						cur_stream->sndvar->ack_cnt -= ret;
682 					} else {
683 						cur_stream->sndvar->ack_cnt = 0;
684 					}
685 				}
686 #endif
687 #if 1
688 				if (cur_stream->control_list_waiting) {
689 					if (!cur_stream->sndvar->on_ack_list) {
690 						cur_stream->control_list_waiting = FALSE;
691 						AddtoControlList(mtcp, cur_stream, cur_ts);
692 					}
693 				}
694 #endif
695 			}
696 		} else {
697 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
698 #ifdef DUMP_STREAM
699 			DumpStream(mtcp, cur_stream);
700 #endif
701 		}
702 
703 		if (cur_stream == last)
704 			break;
705 		cur_stream = next;
706 	}
707 
708 	return cnt;
709 }
710 /*----------------------------------------------------------------------------*/
711 inline int
WriteTCPACKList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)712 WriteTCPACKList(mtcp_manager_t mtcp,
713 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
714 {
715 	tcp_stream *cur_stream;
716 	tcp_stream *next, *last;
717 	int to_ack;
718 	int cnt = 0;
719 	int ret;
720 
721 	/* Send aggregated acks */
722 	cnt = 0;
723 	cur_stream = TAILQ_FIRST(&sender->ack_list);
724 	last = TAILQ_LAST(&sender->ack_list, ack_head);
725 	while (cur_stream) {
726 		if (++cnt > thresh)
727 			break;
728 
729 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
730 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
731 
732 		if (cur_stream->sndvar->on_ack_list) {
733 			/* this list is only to ack the data packets */
734 			/* if the ack is not data ack, then it will not process here */
735 			to_ack = FALSE;
736 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
737 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
738 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
739 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
740 					cur_stream->state == TCP_ST_TIME_WAIT) {
741 				/* TIMEWAIT is possible since the ack is queued
742 				   at FIN_WAIT_2 */
743 				tcprb_t *rb;
744 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
745 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
746 						(cur_stream->rcvvar->irs + 1) + rb->pile
747 						+ tcprb_cflen(rb))) {
748 					to_ack = TRUE;
749 				}
750 			} else {
751 				TRACE_DBG("Stream %u (%s): "
752 						"Try sending ack at not proper state. "
753 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
754 						cur_stream->id, TCPStateToString(cur_stream),
755 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
756 						cur_stream->sndvar->on_control_list);
757 #ifdef DUMP_STREAM
758 				DumpStream(mtcp, cur_stream);
759 #endif
760 			}
761 
762 			if (to_ack) {
763 				/* send the queued ack packets */
764 				while (cur_stream->sndvar->ack_cnt > 0) {
765 					ret = SendTCPPacket(mtcp, cur_stream,
766 							cur_ts, TCP_FLAG_ACK, NULL, 0);
767 					if (ret < 0) {
768 						/* since there is no available write buffer, break */
769 						break;
770 					}
771 					cur_stream->sndvar->ack_cnt--;
772 				}
773 
774 				/* if is_wack is set, send packet to get window advertisement */
775 				if (cur_stream->sndvar->is_wack) {
776 					cur_stream->sndvar->is_wack = FALSE;
777 					ret = SendTCPPacket(mtcp, cur_stream,
778 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
779 					if (ret < 0) {
780 						/* since there is no available write buffer, break */
781 						cur_stream->sndvar->is_wack = TRUE;
782 					}
783 				}
784 
785 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
786 					cur_stream->sndvar->on_ack_list = FALSE;
787 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
788 					sender->ack_list_cnt--;
789 				}
790 			} else {
791 				cur_stream->sndvar->on_ack_list = FALSE;
792 				cur_stream->sndvar->ack_cnt = 0;
793 				cur_stream->sndvar->is_wack = 0;
794 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
795 				sender->ack_list_cnt--;
796 			}
797 
798 			if (cur_stream->control_list_waiting) {
799 				if (!cur_stream->sndvar->on_send_list) {
800 					cur_stream->control_list_waiting = FALSE;
801 					AddtoControlList(mtcp, cur_stream, cur_ts);
802 				}
803 			}
804 		} else {
805 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
806 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
807 			sender->ack_list_cnt--;
808 #ifdef DUMP_STREAM
809 			thread_printf(mtcp, mtcp->log_fp,
810 					"Stream %u: not on ack list.\n", cur_stream->id);
811 			DumpStream(mtcp, cur_stream);
812 #endif
813 		}
814 
815 		if (cur_stream == last)
816 			break;
817 		cur_stream = next;
818 	}
819 
820 	return cnt;
821 }
822 /*----------------------------------------------------------------------------*/
823 inline struct mtcp_sender *
GetSender(mtcp_manager_t mtcp,tcp_stream * cur_stream)824 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
825 {
826 	if (cur_stream->sndvar->nif_out < 0) {
827 		return mtcp->g_sender;
828 
829 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
830 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
831 		return NULL;
832 
833 	} else {
834 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
835 	}
836 }
837 /*----------------------------------------------------------------------------*/
838 inline void
AddtoControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)839 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
840 {
841 #if TRY_SEND_BEFORE_QUEUE
842 	int ret;
843 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
844 	assert(sender != NULL);
845 
846 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
847 	if (ret < 0) {
848 #endif
849 		if (!cur_stream->sndvar->on_control_list) {
850 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
851 			assert(sender != NULL);
852 
853 			cur_stream->sndvar->on_control_list = TRUE;
854 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
855 			sender->control_list_cnt++;
856 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
857 			//		cur_stream->id, sender->control_list_cnt);
858 		}
859 #if TRY_SEND_BEFORE_QUEUE
860 	} else {
861 		if (cur_stream->sndvar->on_control_list) {
862 			cur_stream->sndvar->on_control_list = FALSE;
863 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
864 			sender->control_list_cnt--;
865 		}
866 	}
867 #endif
868 }
869 /*----------------------------------------------------------------------------*/
870 inline void
AddtoSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)871 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
872 {
873 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
874 	assert(sender != NULL);
875 
876 	if(!cur_stream->sndvar->sndbuf) {
877 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
878 				mtcp->ctx->cpu,
879 				cur_stream->id);
880 		assert(0);
881 		return;
882 	}
883 
884 	if (!cur_stream->sndvar->on_send_list) {
885 		cur_stream->sndvar->on_send_list = TRUE;
886 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
887 		sender->send_list_cnt++;
888 	}
889 }
890 /*----------------------------------------------------------------------------*/
891 inline void
AddtoACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)892 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
893 {
894 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
895 	assert(sender != NULL);
896 
897 	if (!cur_stream->sndvar->on_ack_list) {
898 		cur_stream->sndvar->on_ack_list = TRUE;
899 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
900 		sender->ack_list_cnt++;
901 	}
902 }
903 /*----------------------------------------------------------------------------*/
904 inline void
RemoveFromControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream)905 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
906 {
907 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
908 	assert(sender != NULL);
909 
910 	if (cur_stream->sndvar->on_control_list) {
911 		cur_stream->sndvar->on_control_list = FALSE;
912 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
913 		sender->control_list_cnt--;
914 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
915 		//		cur_stream->id, sender->control_list_cnt);
916 	}
917 }
918 /*----------------------------------------------------------------------------*/
919 inline void
RemoveFromSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)920 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
921 {
922 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
923 	assert(sender != NULL);
924 
925 	if (cur_stream->sndvar->on_send_list) {
926 		cur_stream->sndvar->on_send_list = FALSE;
927 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
928 		sender->send_list_cnt--;
929 	}
930 }
931 /*----------------------------------------------------------------------------*/
932 inline void
RemoveFromACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)933 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
934 {
935 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
936 	assert(sender != NULL);
937 
938 	if (cur_stream->sndvar->on_ack_list) {
939 		cur_stream->sndvar->on_ack_list = FALSE;
940 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
941 		sender->ack_list_cnt--;
942 	}
943 }
944 /*----------------------------------------------------------------------------*/
945 inline void
EnqueueACK(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t opt)946 EnqueueACK(mtcp_manager_t mtcp,
947 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
948 {
949 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
950 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
951 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
952 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
953 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
954 				cur_stream->id, TCPStateToString(cur_stream));
955 	}
956 
957 	if (opt == ACK_OPT_NOW) {
958 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
959 			cur_stream->sndvar->ack_cnt++;
960 		}
961 	} else if (opt == ACK_OPT_AGGREGATE) {
962 		if (cur_stream->sndvar->ack_cnt == 0) {
963 			cur_stream->sndvar->ack_cnt = 1;
964 		}
965 	} else if (opt == ACK_OPT_WACK) {
966 		cur_stream->sndvar->is_wack = TRUE;
967 	}
968 	AddtoACKList(mtcp, cur_stream);
969 }
970 /*----------------------------------------------------------------------------*/
971 inline void
DumpControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender)972 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
973 {
974 	tcp_stream *stream;
975 
976 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
977 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
978 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
979 	}
980 }
981 /*----------------------------------------------------------------------------*/
982 static inline void
UpdatePassiveSendTCPContext_SynSent(struct tcp_stream * cur_stream,struct pkt_ctx * pctx)983 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
984 				    struct pkt_ctx *pctx)
985 {
986 	assert(cur_stream);
987 	assert(pctx);
988 
989 	/* add event */
990 	if (cur_stream->state < TCP_ST_SYN_SENT) {
991 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
992 		cur_stream->cb_events |= MOS_ON_CONN_START;
993 	}
994 	/* initialize TCP send variables of send-side stream */
995 	cur_stream->sndvar->cwnd = 1;
996 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
997 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
998 	cur_stream->sndvar->iss = pctx->p.seq;
999 	cur_stream->snd_nxt = pctx->p.seq + 1;
1000 	cur_stream->state = TCP_ST_SYN_SENT;
1001 	cur_stream->last_active_ts = pctx->p.cur_ts;
1002 
1003 	/* receive-side conn start event can also be tagged here */
1004 	/* blocked since tcp_in.c takes care of this.. */
1005 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
1006 }
1007 /*----------------------------------------------------------------------------*/
1008 /**
1009  * Called (when monitoring mode is enabled).. for every incoming packet from the
1010  * NIC.
1011  */
1012 void
UpdatePassiveSendTCPContext(mtcp_manager_t mtcp,struct tcp_stream * cur_stream,struct pkt_ctx * pctx)1013 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
1014 			    struct pkt_ctx *pctx)
1015 {
1016 	struct tcphdr *tcph;
1017 
1018 	assert(cur_stream);
1019 	tcph = pctx->p.tcph;
1020 
1021 	/* if it is a new TCP stream from client */
1022 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1023 		TRACE_STATE("Stream %d: %s\n",
1024 			    cur_stream->id, TCPStateToString(cur_stream));
1025 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1026 		AddtoTimeoutList(mtcp, cur_stream);
1027 		return;
1028 	}
1029 
1030 	if (tcph->ack) {
1031 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1032 		cur_stream->last_active_ts = pctx->p.cur_ts;
1033 	}
1034 
1035 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1036 
1037 	/* test for reset packet */
1038 	if (tcph->rst) {
1039 		cur_stream->have_reset = TRUE;
1040 		/* test for reset packet */
1041 		cur_stream->state = TCP_ST_CLOSED_RSVD;
1042 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1043 		TRACE_STATE("Stream %d: %s\n",
1044 				cur_stream->id,
1045 				TCPStateToString(cur_stream));
1046 		return;
1047 	}
1048 
1049 	/*
1050 	 * for all others, state transitioning is based on
1051 	 * current tcp_stream state
1052 	 */
1053 	switch (cur_stream->state) {
1054 	case TCP_ST_SYN_SENT:
1055 		/* control should not come here */
1056 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1057 #ifdef BE_RESILIENT_TO_PACKET_DROP
1058 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1059 			cur_stream->state = TCP_ST_ESTABLISHED;
1060 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1061 			cur_stream->snd_nxt = pctx->p.seq;
1062 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1063 			goto __Handle_TCP_ST_ESTABLISHED;
1064 		}
1065 #endif
1066 		break;
1067 	case TCP_ST_SYN_RCVD:
1068 		if (!tcph->ack)
1069 			break;
1070 
1071 		if (tcph->syn) {
1072 			cur_stream->sndvar->iss = pctx->p.seq;
1073 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1074 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1075 				  "setting seq: %u = iss\n",
1076 				  cur_stream->id, pctx->p.seq);
1077 		}
1078 #ifdef BE_RESILIENT_TO_PACKET_DROP
1079 		else {
1080 			cur_stream->state = TCP_ST_ESTABLISHED;
1081 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1082 			cur_stream->snd_nxt = pctx->p.seq;
1083 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1084 			goto __Handle_TCP_ST_ESTABLISHED;
1085 		}
1086 #endif
1087 		TRACE_STATE("Stream %d: %s\n",
1088 			    cur_stream->id,
1089 			    TCPStateToString(cur_stream));
1090 		break;
1091 	case TCP_ST_ESTABLISHED:
1092 #ifdef BE_RESILIENT_TO_PACKET_DROP
1093 __Handle_TCP_ST_ESTABLISHED:
1094 #endif
1095 		/* if application decides to close, fin pkt is sent */
1096 #ifdef BE_RESILIENT_TO_PACKET_DROP
1097 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1098 		{
1099 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1100 					"Move rcv_nxt from %u to %u.\n",
1101 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1102 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1103 		}
1104 #endif
1105 		if (tcph->fin) {
1106 			cur_stream->state = TCP_ST_FIN_WAIT_1;
1107 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1108 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1109 			cur_stream->sndvar->is_fin_sent = TRUE;
1110 			cur_stream->snd_nxt++;
1111 			TRACE_STATE("Stream %d: %s\n",
1112 				    cur_stream->id,
1113 				    TCPStateToString(cur_stream));
1114 		} else {
1115 			/* creating tcp send buffer still pending.. */
1116 			/* do we need peek for send buffer? */
1117 		}
1118 		break;
1119 	case TCP_ST_CLOSE_WAIT:
1120 		/* if application decides to close, fin pkt is sent */
1121 #ifdef BE_RESILIENT_TO_PACKET_DROP
1122 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1123 		{
1124 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1125 					"Move rcv_nxt from %u to %u.\n",
1126 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1127 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1128 		}
1129 #endif
1130 		if (tcph->fin) {
1131 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1132 			cur_stream->sndvar->is_fin_sent = TRUE;
1133 			cur_stream->snd_nxt++;
1134 
1135 			/* verify whether the FIN from the other end is acked */
1136 			if ((tcph->ack) && (ntohl(tcph->ack_seq) == cur_stream->rcv_nxt))
1137 				cur_stream->state = TCP_ST_LAST_ACK;
1138 			else
1139 				cur_stream->state = TCP_ST_CLOSING;
1140 
1141 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1142 			TRACE_STATE("Stream %d: %s\n",
1143 				    cur_stream->id,
1144 				    TCPStateToString(cur_stream));
1145 		} else if (tcph->ack) {
1146 			TRACE_STATE("Stream %d: %s\n",
1147 				    cur_stream->id,
1148 				    TCPStateToString(cur_stream));
1149 		}
1150 		break;
1151 	case TCP_ST_LAST_ACK:
1152 		/* control should not come here */
1153 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1154 		break;
1155 	case TCP_ST_FIN_WAIT_1:
1156 		/* control should not come here */
1157 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1158 		break;
1159 	case TCP_ST_FIN_WAIT_2:
1160 		/* control should not come here */
1161 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1162 		break;
1163 	case TCP_ST_CLOSING:
1164 		/* control should not come here */
1165 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1166 		break;
1167 	case TCP_ST_TIME_WAIT:
1168 		/* control may come here but... */
1169 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1170 		if (tcph->ack) {
1171 			TRACE_STATE("Stream %d: %s\n",
1172 				    cur_stream->id,
1173 				    TCPStateToString(cur_stream));
1174 		}
1175 		break;
1176 	case TCP_ST_CLOSED:
1177 	case TCP_ST_CLOSED_RSVD:
1178 		/* Waiting to be destroyed */
1179 		break;
1180 	default:
1181 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1182 			  "tcph->syn: %d, tcph->ack: %d\n",
1183 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
1184 			  pctx->p.tcph->ack);
1185 		assert(0);
1186 		/* This will be enabled once passiverecvcontext is completed */
1187 		/*exit(EXIT_FAILURE);*/
1188 	}
1189 
1190 	UNUSED(mtcp);
1191 	return;
1192 }
1193 /*----------------------------------------------------------------------------*/
1194 void
PostSendTCPAction(mtcp_manager_t mtcp,struct pkt_ctx * pctx,struct tcp_stream * recvside_stream,struct tcp_stream * sendside_stream)1195 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1196 		  struct tcp_stream *recvside_stream,
1197 		  struct tcp_stream *sendside_stream)
1198 {
1199 	/* this is empty for the time being */
1200 }
1201 /*----------------------------------------------------------------------------*/
1202