xref: /mOS-networking-stack/core/src/tcp_out.c (revision a834ea89)
1 #include <unistd.h>
2 #include <string.h>
3 
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15 
16 #define TCP_CALCULATE_CHECKSUM		TRUE
17 #define ACK_PIGGYBACK			TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
20 
21 #define TCP_MAX_WINDOW 65535
22 
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25 
26 extern struct pkt_info *
27 ClonePacketCtx(struct pkt_info *to, unsigned char *frame, struct pkt_ctx *from);
28 
29 /*----------------------------------------------------------------------------*/
30 static inline uint16_t
31 CalculateOptionLength(uint8_t flags)
32 {
33 	uint16_t optlen = 0;
34 
35 	if (flags & TCP_FLAG_SYN) {
36 		optlen += TCP_OPT_MSS_LEN;
37 #if TCP_OPT_SACK_ENABLED
38 		optlen += TCP_OPT_SACK_PERMIT_LEN;
39 #if !TCP_OPT_TIMESTAMP_ENABLED
40 		optlen += 2;	// insert NOP padding
41 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
42 #endif /* TCP_OPT_SACK_ENABLED */
43 
44 #if TCP_OPT_TIMESTAMP_ENABLED
45 		optlen += TCP_OPT_TIMESTAMP_LEN;
46 #if !TCP_OPT_SACK_ENABLED
47 		optlen += 2;	// insert NOP padding
48 #endif /* TCP_OPT_SACK_ENABLED */
49 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
50 
51 		optlen += TCP_OPT_WSCALE_LEN + 1;
52 
53 	} else {
54 
55 #if TCP_OPT_TIMESTAMP_ENABLED
56 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
57 #endif
58 
59 #if TCP_OPT_SACK_ENABLED
60 		if (flags & TCP_FLAG_SACK) {
61 			optlen += TCP_OPT_SACK_LEN + 2;
62 		}
63 #endif
64 	}
65 
66 	assert(optlen % 4 == 0);
67 
68 	return optlen;
69 }
70 /*----------------------------------------------------------------------------*/
71 static inline void
72 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
73 {
74 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
75 
76 	tcpopt[0] = TCP_OPT_TIMESTAMP;
77 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
78 	ts[0] = htonl(cur_ts);
79 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
80 }
81 /*----------------------------------------------------------------------------*/
82 static inline void
83 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
84 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
85 {
86 	int i = 0;
87 
88 	if (flags & TCP_FLAG_SYN) {
89 		uint16_t mss;
90 
91 		/* MSS option */
92 		mss = cur_stream->sndvar->mss;
93 		tcpopt[i++] = TCP_OPT_MSS;
94 		tcpopt[i++] = TCP_OPT_MSS_LEN;
95 		tcpopt[i++] = mss >> 8;
96 		tcpopt[i++] = mss % 256;
97 
98 		/* SACK permit */
99 #if TCP_OPT_SACK_ENABLED
100 #if !TCP_OPT_TIMESTAMP_ENABLED
101 		tcpopt[i++] = TCP_OPT_NOP;
102 		tcpopt[i++] = TCP_OPT_NOP;
103 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
104 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
105 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
106 		TRACE_SACK("Local SACK permited.\n");
107 #endif /* TCP_OPT_SACK_ENABLED */
108 
109 		/* Timestamp */
110 #if TCP_OPT_TIMESTAMP_ENABLED
111 #if !TCP_OPT_SACK_ENABLED
112 		tcpopt[i++] = TCP_OPT_NOP;
113 		tcpopt[i++] = TCP_OPT_NOP;
114 #endif /* TCP_OPT_SACK_ENABLED */
115 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
116 		i += TCP_OPT_TIMESTAMP_LEN;
117 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
118 
119 		/* Window scale */
120 		tcpopt[i++] = TCP_OPT_NOP;
121 		tcpopt[i++] = TCP_OPT_WSCALE;
122 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
123 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
124 
125 	} else {
126 
127 #if TCP_OPT_TIMESTAMP_ENABLED
128 		tcpopt[i++] = TCP_OPT_NOP;
129 		tcpopt[i++] = TCP_OPT_NOP;
130 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
131 		i += TCP_OPT_TIMESTAMP_LEN;
132 #endif
133 
134 #if TCP_OPT_SACK_ENABLED
135 		if (flags & TCP_OPT_SACK) {
136 			// TODO: implement SACK support
137 		}
138 #endif
139 	}
140 
141 	assert (i == optlen);
142 }
143 /*----------------------------------------------------------------------------*/
144 int
145 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
146 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
147 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
148 		uint8_t *payload, uint16_t payloadlen,
149 		uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
150 {
151 	struct tcphdr *tcph;
152 	uint8_t *tcpopt;
153 	uint32_t *ts;
154 	uint16_t optlen;
155 	struct pkt_ctx pctx;
156 	int rc = -1;
157 
158 	memset(&pctx, 0, sizeof(pctx));
159 	pctx.p.in_ifidx = in_ifidx;
160 	optlen = CalculateOptionLength(flags);
161 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
162 		TRACE_ERROR("Payload size exceeds MSS.\n");
163 		assert(0);
164 		return ERROR;
165 	}
166 
167 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
168 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
169 	if (tcph == NULL) {
170 		return ERROR;
171 	}
172 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
173 
174 	tcph->source = sport;
175 	tcph->dest = dport;
176 
177 	if (flags & TCP_FLAG_SYN)
178 		tcph->syn = TRUE;
179 	if (flags & TCP_FLAG_FIN)
180 		tcph->fin = TRUE;
181 	if (flags & TCP_FLAG_RST)
182 		tcph->rst = TRUE;
183 	if (flags & TCP_FLAG_PSH)
184 		tcph->psh = TRUE;
185 
186 	tcph->seq = htonl(seq);
187 	if (flags & TCP_FLAG_ACK) {
188 		tcph->ack = TRUE;
189 		tcph->ack_seq = htonl(ack_seq);
190 	}
191 
192 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
193 
194 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
195 	ts = (uint32_t *)(tcpopt + 4);
196 
197 	tcpopt[0] = TCP_OPT_NOP;
198 	tcpopt[1] = TCP_OPT_NOP;
199 	tcpopt[2] = TCP_OPT_TIMESTAMP;
200 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
201 	ts[0] = htonl(cur_ts);
202 	ts[1] = htonl(echo_ts);
203 
204 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
205 	// copy payload if exist
206 	if (payloadlen > 0) {
207 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
208 	}
209 
210 #if TCP_CALCULATE_CHECKSUM
211 	/* offload TCP checkum if possible */
212 	if (likely(mtcp->iom->dev_ioctl != NULL))
213 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
214 					  pctx.out_ifidx,
215 					  PKT_TX_TCP_CSUM,
216 					  pctx.p.iph);
217 	/* otherwise calculate TCP checksum in S/W */
218 	if (rc == -1)
219 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
220 					      TCP_HEADER_LEN +
221 					      optlen + payloadlen,
222 					      saddr, daddr);
223 #endif
224 
225 	if (tcph->syn || tcph->fin) {
226 		payloadlen++;
227 	}
228 
229 	struct mon_listener *walk;
230 	/* callback for monitor raw socket */
231 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
232 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
233 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
234 				       &pctx, MOS_ON_PKT_IN);
235 	return payloadlen;
236 }
237 /*----------------------------------------------------------------------------*/
238 int
239 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
240 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
241 {
242 	struct tcphdr *tcph;
243 	uint16_t optlen;
244 	uint8_t wscale = 0;
245 	uint32_t window32 = 0;
246 	struct pkt_ctx pctx;
247 	int rc = -1;
248 
249 	memset(&pctx, 0, sizeof(pctx));
250 	optlen = CalculateOptionLength(flags);
251 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
252 		TRACE_ERROR("Payload size exceeds MSS\n");
253 		return ERROR;
254 	}
255 
256 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
257 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
258 	if (tcph == NULL) {
259 		return -2;
260 	}
261 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
262 
263 	tcph->source = cur_stream->sport;
264 	tcph->dest = cur_stream->dport;
265 
266 	if (flags & TCP_FLAG_SYN) {
267 		tcph->syn = TRUE;
268 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
269 			TRACE_DBG("Stream %d: weird SYN sequence. "
270 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
271 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
272 		}
273 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
274 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
275 	}
276 	if (flags & TCP_FLAG_RST) {
277 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
278 		tcph->rst = TRUE;
279 	}
280 	if (flags & TCP_FLAG_PSH)
281 		tcph->psh = TRUE;
282 
283 	if (flags & TCP_FLAG_WACK) {
284 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
285 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
286 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
287 				cur_stream->id,
288 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
289 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
290 	} else if (flags & TCP_FLAG_FIN) {
291 		tcph->fin = TRUE;
292 
293 		if (cur_stream->sndvar->fss == 0) {
294 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
295 					cur_stream->id, cur_stream->closed);
296 		}
297 		tcph->seq = htonl(cur_stream->sndvar->fss);
298 		cur_stream->sndvar->is_fin_sent = TRUE;
299 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
300 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
301 	} else {
302 		tcph->seq = htonl(cur_stream->snd_nxt);
303 	}
304 
305 	if (flags & TCP_FLAG_ACK) {
306 		tcph->ack = TRUE;
307 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
308 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
309 		cur_stream->last_active_ts = cur_ts;
310 		UpdateTimeoutList(mtcp, cur_stream);
311 	}
312 
313 	if (flags & TCP_FLAG_SYN) {
314 		wscale = 0;
315 	} else {
316 		wscale = cur_stream->sndvar->wscale_mine;
317 	}
318 
319 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
320 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
321 	/* if the advertised window is 0, we need to advertise again later */
322 	if (window32 == 0) {
323 		cur_stream->need_wnd_adv = TRUE;
324 	}
325 
326 	GenerateTCPOptions(cur_stream, cur_ts, flags,
327 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
328 
329 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
330 	// copy payload if exist
331 	if (payloadlen > 0) {
332 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
333 	}
334 
335 #if TCP_CALCULATE_CHECKSUM
336 	if (likely(mtcp->iom->dev_ioctl != NULL))
337 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
338 					  pctx.out_ifidx,
339 					  PKT_TX_TCP_CSUM,
340 					  pctx.p.iph);
341 	if (rc == -1)
342 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
343 					      TCP_HEADER_LEN +
344 					      optlen + payloadlen,
345 					      cur_stream->saddr,
346 					      cur_stream->daddr);
347 #endif
348 	cur_stream->snd_nxt += payloadlen;
349 
350 	if (tcph->syn || tcph->fin) {
351 		cur_stream->snd_nxt++;
352 		payloadlen++;
353 	}
354 
355 	if (payloadlen > 0) {
356 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
357 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
358 				  payloadlen, cur_stream->snd_nxt);
359 		}
360 
361 		/* update retransmission timer if have payload */
362 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
363 		TRACE_RTO("Updating retransmission timer. "
364 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
365 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
366 		AddtoRTOList(mtcp, cur_stream);
367 	}
368 
369 	struct mon_listener *walk;
370 	/* callback for monitor raw socket */
371 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
372 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
373 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
374 				       &pctx, MOS_ON_PKT_IN);
375 
376 	if (mtcp->num_msp /* this means that stream monitor is on */) {
377 		FillPacketContextTCPInfo(&pctx, tcph);
378 
379 		/* New abstraction for monitor stream */
380 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
381 		struct tcp_stream *sendside_stream = cur_stream;
382 
383 		if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
384 			pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
385 					pctx.p.seq, recvside_stream->rcvvar->irs + 1);
386 
387 		UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
388 	}
389 
390 #ifdef PKTDUMP
391 	DumpPacket(mtcp,
392 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
393 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
394 			"OUT", -1);
395 #endif
396 
397 
398 	return payloadlen;
399 }
400 /*----------------------------------------------------------------------------*/
401 static int
402 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
403 {
404 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
405 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
406 	uint8_t *data;
407 	uint32_t buffered_len;
408 	uint32_t seq;
409 	uint16_t len;
410 	int16_t sndlen;
411 	uint32_t window;
412 	int packets = 0;
413 
414 	if (!sndvar->sndbuf) {
415 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
416 		assert(0);
417 		return 0;
418 	}
419 
420 	SBUF_LOCK(&sndvar->write_lock);
421 
422 	if (sndvar->sndbuf->len == 0) {
423 		packets = 0;
424 		goto out;
425 	}
426 
427 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
428 
429 	while (1) {
430 		seq = cur_stream->snd_nxt;
431 
432 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
433 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
434 					"state: %s, seq: %u, head_seq: %u.\n",
435 					cur_stream->id, TCPStateToString(cur_stream),
436 					seq, sndvar->sndbuf->head_seq);
437 			assert(0);
438 			break;
439 		}
440 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
441 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
442 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
443 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
444 					sndvar->sndbuf->len, seq, buffered_len);
445 		}
446 		if (buffered_len == 0)
447 			break;
448 
449 		data = sndvar->sndbuf->head +
450 				(seq - sndvar->sndbuf->head_seq);
451 
452 		if (buffered_len > maxlen) {
453 			len = maxlen;
454 		} else {
455 			len = buffered_len;
456 		}
457 
458 		if (len <= 0)
459 			break;
460 
461 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
462 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
463 					"buffered_len: %u\n", seq, len, buffered_len);
464 		}
465 
466 		if (seq - sndvar->snd_una + len > window) {
467 			/* Ask for new window advertisement to peer */
468 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
469 				TRACE_DBG("Full peer window. "
470 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
471 					  sndvar->peer_wnd, seq - sndvar->snd_una);
472 				if (TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
473 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
474 				}
475 			}
476 			packets = -3;
477 			goto out;
478 		}
479 
480 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
481 				TCP_FLAG_ACK, data, len);
482 		if (sndlen < 0) {
483 			packets = sndlen;
484 			goto out;
485 		}
486 		packets++;
487 	}
488 
489  out:
490 	SBUF_UNLOCK(&sndvar->write_lock);
491 	return packets;
492 }
493 /*----------------------------------------------------------------------------*/
494 static inline int
495 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
496 {
497 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
498 	int ret = 0;
499     int flag = 0;
500 
501     switch (cur_stream->state) {
502        case TCP_ST_SYN_SENT: 		/* Send SYN here */
503           flag = TCP_FLAG_SYN;
504           break;
505        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
506           cur_stream->snd_nxt = sndvar->iss;
507           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
508           break;
509        case TCP_ST_ESTABLISHED:     /* Send ACK here */
510        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
511        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
512        case TCP_ST_TIME_WAIT:       /* Send ACK here */
513           flag = TCP_FLAG_ACK;
514           break;
515        case TCP_ST_LAST_ACK:
516        case TCP_ST_FIN_WAIT_1:
517           /* if it is on ack_list, send it after sending ack */
518           if (sndvar->on_send_list || sndvar->on_ack_list)
519              return (-1);
520           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
521           break;
522        case TCP_ST_CLOSING:
523           if (sndvar->is_fin_sent) {
524              /* if the sequence is for FIN, send FIN */
525              flag = (cur_stream->snd_nxt == sndvar->fss) ?
526                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
527           } else {
528              /* if FIN is not sent, send fin with ack */
529              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
530           }
531        case TCP_ST_CLOSED_RSVD: /* Send RST here */
532           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
533                     cur_stream->id);
534           /* first flush the data and ack */
535           if (sndvar->on_send_list || sndvar->on_ack_list)
536              return (-1);
537           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
538           if (ret >= 0)
539              DestroyTCPStream(mtcp, cur_stream);
540           return (ret);
541        default:
542           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
543                       cur_stream->id);
544           assert(0); /* can't reach here! */
545           return (0);
546     }
547 
548     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
549 }
550 /*----------------------------------------------------------------------------*/
551 inline int
552 WriteTCPControlList(mtcp_manager_t mtcp,
553 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
554 {
555 	tcp_stream *cur_stream;
556 	tcp_stream *next, *last;
557 	int cnt = 0;
558 	int ret;
559 
560 	thresh = MIN(thresh, sender->control_list_cnt);
561 
562 	/* Send TCP control messages */
563 	cnt = 0;
564 	cur_stream = TAILQ_FIRST(&sender->control_list);
565 	last = TAILQ_LAST(&sender->control_list, control_head);
566 	while (cur_stream) {
567 		if (++cnt > thresh)
568 			break;
569 
570 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
571 				cnt, cur_stream->id);
572 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
573 
574 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
575 		sender->control_list_cnt--;
576 
577 		if (cur_stream->sndvar->on_control_list) {
578 			cur_stream->sndvar->on_control_list = FALSE;
579 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
580 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
581 			if (ret < 0) {
582 				TAILQ_INSERT_HEAD(&sender->control_list,
583 						cur_stream, sndvar->control_link);
584 				cur_stream->sndvar->on_control_list = TRUE;
585 				sender->control_list_cnt++;
586 				/* since there is no available write buffer, break */
587 				break;
588 			}
589 		} else {
590 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
591 		}
592 
593 		if (cur_stream == last)
594 			break;
595 		cur_stream = next;
596 	}
597 
598 	return cnt;
599 }
600 /*----------------------------------------------------------------------------*/
601 inline int
602 WriteTCPDataList(mtcp_manager_t mtcp,
603 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
604 {
605 	tcp_stream *cur_stream;
606 	tcp_stream *next, *last;
607 	int cnt = 0;
608 	int ret;
609 
610 	/* Send data */
611 	cnt = 0;
612 	cur_stream = TAILQ_FIRST(&sender->send_list);
613 	last = TAILQ_LAST(&sender->send_list, send_head);
614 	while (cur_stream) {
615 		if (++cnt > thresh)
616 			break;
617 
618 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
619 				cnt, cur_stream->id);
620 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
621 
622 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
623 		if (cur_stream->sndvar->on_send_list) {
624 			ret = 0;
625 
626 			/* Send data here */
627 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
628 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
629 				if (cur_stream->sndvar->on_control_list) {
630 					/* delay sending data after until on_control_list becomes off */
631 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
632 					ret = -1;
633 				} else {
634 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
635 				}
636 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
637 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
638 					cur_stream->state == TCP_ST_LAST_ACK) {
639 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
640 			} else {
641 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
642 						cur_stream->id, TCPStateToString(cur_stream));
643 #if DUMP_STREAM
644 				DumpStream(mtcp, cur_stream);
645 #endif
646 			}
647 
648 			if (ret < 0) {
649 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
650 				/* since there is no available write buffer, break */
651 				break;
652 
653 			} else {
654 				cur_stream->sndvar->on_send_list = FALSE;
655 				sender->send_list_cnt--;
656 				/* the ret value is the number of packets sent. */
657 				/* decrease ack_cnt for the piggybacked acks */
658 #if ACK_PIGGYBACK
659 				if (cur_stream->sndvar->ack_cnt > 0) {
660 					if (cur_stream->sndvar->ack_cnt > ret) {
661 						cur_stream->sndvar->ack_cnt -= ret;
662 					} else {
663 						cur_stream->sndvar->ack_cnt = 0;
664 					}
665 				}
666 #endif
667 #if 1
668 				if (cur_stream->control_list_waiting) {
669 					if (!cur_stream->sndvar->on_ack_list) {
670 						cur_stream->control_list_waiting = FALSE;
671 						AddtoControlList(mtcp, cur_stream, cur_ts);
672 					}
673 				}
674 #endif
675 			}
676 		} else {
677 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
678 #ifdef DUMP_STREAM
679 			DumpStream(mtcp, cur_stream);
680 #endif
681 		}
682 
683 		if (cur_stream == last)
684 			break;
685 		cur_stream = next;
686 	}
687 
688 	return cnt;
689 }
690 /*----------------------------------------------------------------------------*/
691 inline int
692 WriteTCPACKList(mtcp_manager_t mtcp,
693 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
694 {
695 	tcp_stream *cur_stream;
696 	tcp_stream *next, *last;
697 	int to_ack;
698 	int cnt = 0;
699 	int ret;
700 
701 	/* Send aggregated acks */
702 	cnt = 0;
703 	cur_stream = TAILQ_FIRST(&sender->ack_list);
704 	last = TAILQ_LAST(&sender->ack_list, ack_head);
705 	while (cur_stream) {
706 		if (++cnt > thresh)
707 			break;
708 
709 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
710 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
711 
712 		if (cur_stream->sndvar->on_ack_list) {
713 			/* this list is only to ack the data packets */
714 			/* if the ack is not data ack, then it will not process here */
715 			to_ack = FALSE;
716 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
717 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
718 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
719 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
720 					cur_stream->state == TCP_ST_TIME_WAIT) {
721 				/* TIMEWAIT is possible since the ack is queued
722 				   at FIN_WAIT_2 */
723 				tcprb_t *rb;
724 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
725 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
726 						(cur_stream->rcvvar->irs + 1) + rb->pile
727 						+ tcprb_cflen(rb))) {
728 					to_ack = TRUE;
729 				}
730 			} else {
731 				TRACE_DBG("Stream %u (%s): "
732 						"Try sending ack at not proper state. "
733 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
734 						cur_stream->id, TCPStateToString(cur_stream),
735 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
736 						cur_stream->sndvar->on_control_list);
737 #ifdef DUMP_STREAM
738 				DumpStream(mtcp, cur_stream);
739 #endif
740 			}
741 
742 			if (to_ack) {
743 				/* send the queued ack packets */
744 				while (cur_stream->sndvar->ack_cnt > 0) {
745 					ret = SendTCPPacket(mtcp, cur_stream,
746 							cur_ts, TCP_FLAG_ACK, NULL, 0);
747 					if (ret < 0) {
748 						/* since there is no available write buffer, break */
749 						break;
750 					}
751 					cur_stream->sndvar->ack_cnt--;
752 				}
753 
754 				/* if is_wack is set, send packet to get window advertisement */
755 				if (cur_stream->sndvar->is_wack) {
756 					cur_stream->sndvar->is_wack = FALSE;
757 					ret = SendTCPPacket(mtcp, cur_stream,
758 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
759 					if (ret < 0) {
760 						/* since there is no available write buffer, break */
761 						cur_stream->sndvar->is_wack = TRUE;
762 					}
763 				}
764 
765 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
766 					cur_stream->sndvar->on_ack_list = FALSE;
767 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
768 					sender->ack_list_cnt--;
769 				}
770 			} else {
771 				cur_stream->sndvar->on_ack_list = FALSE;
772 				cur_stream->sndvar->ack_cnt = 0;
773 				cur_stream->sndvar->is_wack = 0;
774 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
775 				sender->ack_list_cnt--;
776 			}
777 
778 			if (cur_stream->control_list_waiting) {
779 				if (!cur_stream->sndvar->on_send_list) {
780 					cur_stream->control_list_waiting = FALSE;
781 					AddtoControlList(mtcp, cur_stream, cur_ts);
782 				}
783 			}
784 		} else {
785 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
786 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
787 			sender->ack_list_cnt--;
788 #ifdef DUMP_STREAM
789 			thread_printf(mtcp, mtcp->log_fp,
790 					"Stream %u: not on ack list.\n", cur_stream->id);
791 			DumpStream(mtcp, cur_stream);
792 #endif
793 		}
794 
795 		if (cur_stream == last)
796 			break;
797 		cur_stream = next;
798 	}
799 
800 	return cnt;
801 }
802 /*----------------------------------------------------------------------------*/
803 inline struct mtcp_sender *
804 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
805 {
806 	if (cur_stream->sndvar->nif_out < 0) {
807 		return mtcp->g_sender;
808 
809 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
810 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
811 		return NULL;
812 
813 	} else {
814 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
815 	}
816 }
817 /*----------------------------------------------------------------------------*/
818 inline void
819 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
820 {
821 #if TRY_SEND_BEFORE_QUEUE
822 	int ret;
823 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
824 	assert(sender != NULL);
825 
826 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
827 	if (ret < 0) {
828 #endif
829 		if (!cur_stream->sndvar->on_control_list) {
830 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
831 			assert(sender != NULL);
832 
833 			cur_stream->sndvar->on_control_list = TRUE;
834 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
835 			sender->control_list_cnt++;
836 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
837 			//		cur_stream->id, sender->control_list_cnt);
838 		}
839 #if TRY_SEND_BEFORE_QUEUE
840 	} else {
841 		if (cur_stream->sndvar->on_control_list) {
842 			cur_stream->sndvar->on_control_list = FALSE;
843 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
844 			sender->control_list_cnt--;
845 		}
846 	}
847 #endif
848 }
849 /*----------------------------------------------------------------------------*/
850 inline void
851 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
852 {
853 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
854 	assert(sender != NULL);
855 
856 	if(!cur_stream->sndvar->sndbuf) {
857 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
858 				mtcp->ctx->cpu,
859 				cur_stream->id);
860 		assert(0);
861 		return;
862 	}
863 
864 	if (!cur_stream->sndvar->on_send_list) {
865 		cur_stream->sndvar->on_send_list = TRUE;
866 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
867 		sender->send_list_cnt++;
868 	}
869 }
870 /*----------------------------------------------------------------------------*/
871 inline void
872 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
873 {
874 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
875 	assert(sender != NULL);
876 
877 	if (!cur_stream->sndvar->on_ack_list) {
878 		cur_stream->sndvar->on_ack_list = TRUE;
879 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
880 		sender->ack_list_cnt++;
881 	}
882 }
883 /*----------------------------------------------------------------------------*/
884 inline void
885 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
886 {
887 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
888 	assert(sender != NULL);
889 
890 	if (cur_stream->sndvar->on_control_list) {
891 		cur_stream->sndvar->on_control_list = FALSE;
892 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
893 		sender->control_list_cnt--;
894 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
895 		//		cur_stream->id, sender->control_list_cnt);
896 	}
897 }
898 /*----------------------------------------------------------------------------*/
899 inline void
900 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
901 {
902 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
903 	assert(sender != NULL);
904 
905 	if (cur_stream->sndvar->on_send_list) {
906 		cur_stream->sndvar->on_send_list = FALSE;
907 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
908 		sender->send_list_cnt--;
909 	}
910 }
911 /*----------------------------------------------------------------------------*/
912 inline void
913 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
914 {
915 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
916 	assert(sender != NULL);
917 
918 	if (cur_stream->sndvar->on_ack_list) {
919 		cur_stream->sndvar->on_ack_list = FALSE;
920 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
921 		sender->ack_list_cnt--;
922 	}
923 }
924 /*----------------------------------------------------------------------------*/
925 inline void
926 EnqueueACK(mtcp_manager_t mtcp,
927 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
928 {
929 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
930 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
931 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
932 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
933 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
934 				cur_stream->id, TCPStateToString(cur_stream));
935 	}
936 
937 	if (opt == ACK_OPT_NOW) {
938 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
939 			cur_stream->sndvar->ack_cnt++;
940 		}
941 	} else if (opt == ACK_OPT_AGGREGATE) {
942 		if (cur_stream->sndvar->ack_cnt == 0) {
943 			cur_stream->sndvar->ack_cnt = 1;
944 		}
945 	} else if (opt == ACK_OPT_WACK) {
946 		cur_stream->sndvar->is_wack = TRUE;
947 	}
948 	AddtoACKList(mtcp, cur_stream);
949 }
950 /*----------------------------------------------------------------------------*/
951 inline void
952 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
953 {
954 	tcp_stream *stream;
955 
956 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
957 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
958 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
959 	}
960 }
961 /*----------------------------------------------------------------------------*/
962 static inline void
963 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
964 				    struct pkt_ctx *pctx)
965 {
966 	assert(cur_stream);
967 	assert(pctx);
968 
969 	/* add event */
970 	if (cur_stream->state < TCP_ST_SYN_SENT) {
971 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
972 		cur_stream->cb_events |= MOS_ON_CONN_START;
973 	}
974 	/* initialize TCP send variables of send-side stream */
975 	cur_stream->sndvar->cwnd = 1;
976 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
977 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
978 	cur_stream->sndvar->iss = pctx->p.seq;
979 	cur_stream->snd_nxt = pctx->p.seq + 1;
980 	cur_stream->state = TCP_ST_SYN_SENT;
981 	cur_stream->last_active_ts = pctx->p.cur_ts;
982 
983 	/* receive-side conn start event can also be tagged here */
984 	/* blocked since tcp_in.c takes care of this.. */
985 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
986 }
987 /*----------------------------------------------------------------------------*/
988 /**
989  * Called (when monitoring mode is enabled).. for every incoming packet from the
990  * NIC.
991  */
992 void
993 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
994 			    struct pkt_ctx *pctx)
995 {
996 	struct tcphdr *tcph;
997 
998 	assert(cur_stream);
999 	tcph = pctx->p.tcph;
1000 
1001 	/* if it is a new TCP stream from client */
1002 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1003 		TRACE_STATE("Stream %d: %s\n",
1004 			    cur_stream->id, TCPStateToString(cur_stream));
1005 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1006 		AddtoTimeoutList(mtcp, cur_stream);
1007 		return;
1008 	}
1009 
1010 	if (tcph->ack) {
1011 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1012 		cur_stream->last_active_ts = pctx->p.cur_ts;
1013 	}
1014 
1015 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1016 
1017 	/* test for reset packet */
1018 	if (tcph->rst) {
1019 		cur_stream->have_reset = TRUE;
1020 		/* test for reset packet */
1021 		cur_stream->state = TCP_ST_CLOSED_RSVD;
1022 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1023 		TRACE_STATE("Stream %d: %s\n",
1024 				cur_stream->id,
1025 				TCPStateToString(cur_stream));
1026 		return;
1027 	}
1028 
1029 	/*
1030 	 * for all others, state transitioning is based on
1031 	 * current tcp_stream state
1032 	 */
1033 	switch (cur_stream->state) {
1034 	case TCP_ST_SYN_SENT:
1035 		/* control should not come here */
1036 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1037 #ifdef BE_RESILIENT_TO_PACKET_DROP
1038 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1039 			cur_stream->state = TCP_ST_ESTABLISHED;
1040 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1041 			cur_stream->snd_nxt = pctx->p.seq;
1042 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1043 			goto __Handle_TCP_ST_ESTABLISHED;
1044 		}
1045 #endif
1046 		break;
1047 	case TCP_ST_SYN_RCVD:
1048 		if (!tcph->ack)
1049 			break;
1050 
1051 		if (tcph->syn) {
1052 			cur_stream->sndvar->iss = pctx->p.seq;
1053 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1054 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1055 				  "setting seq: %u = iss\n",
1056 				  cur_stream->id, pctx->p.seq);
1057 		}
1058 #ifdef BE_RESILIENT_TO_PACKET_DROP
1059 		else {
1060 			cur_stream->state = TCP_ST_ESTABLISHED;
1061 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1062 			cur_stream->snd_nxt = pctx->p.seq;
1063 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1064 			goto __Handle_TCP_ST_ESTABLISHED;
1065 		}
1066 #endif
1067 		TRACE_STATE("Stream %d: %s\n",
1068 			    cur_stream->id,
1069 			    TCPStateToString(cur_stream));
1070 		break;
1071 	case TCP_ST_ESTABLISHED:
1072 #ifdef BE_RESILIENT_TO_PACKET_DROP
1073 __Handle_TCP_ST_ESTABLISHED:
1074 #endif
1075 		/* if application decides to close, fin pkt is sent */
1076 #ifdef BE_RESILIENT_TO_PACKET_DROP
1077 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1078 		{
1079 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1080 					"Move rcv_nxt from %u to %u.\n",
1081 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1082 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1083 		}
1084 #endif
1085 		if (tcph->fin) {
1086 			cur_stream->state = TCP_ST_FIN_WAIT_1;
1087 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1088 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1089 			cur_stream->sndvar->is_fin_sent = TRUE;
1090 			cur_stream->snd_nxt++;
1091 			TRACE_STATE("Stream %d: %s\n",
1092 				    cur_stream->id,
1093 				    TCPStateToString(cur_stream));
1094 		} else {
1095 			/* creating tcp send buffer still pending.. */
1096 			/* do we need peek for send buffer? */
1097 		}
1098 		break;
1099 	case TCP_ST_CLOSE_WAIT:
1100 		/* if application decides to close, fin pkt is sent */
1101 #ifdef BE_RESILIENT_TO_PACKET_DROP
1102 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1103 		{
1104 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1105 					"Move rcv_nxt from %u to %u.\n",
1106 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1107 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1108 		}
1109 #endif
1110 		if (tcph->fin) {
1111 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1112 			cur_stream->sndvar->is_fin_sent = TRUE;
1113 			cur_stream->snd_nxt++;
1114 			cur_stream->state = TCP_ST_LAST_ACK;
1115 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1116 			TRACE_STATE("Stream %d: %s\n",
1117 				    cur_stream->id,
1118 				    TCPStateToString(cur_stream));
1119 		} else if (tcph->ack) {
1120 			TRACE_STATE("Stream %d: %s\n",
1121 				    cur_stream->id,
1122 				    TCPStateToString(cur_stream));
1123 		}
1124 		break;
1125 	case TCP_ST_LAST_ACK:
1126 		/* control should not come here */
1127 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1128 		break;
1129 	case TCP_ST_FIN_WAIT_1:
1130 		/* control should not come here */
1131 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1132 		break;
1133 	case TCP_ST_FIN_WAIT_2:
1134 		/* control should not come here */
1135 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1136 		break;
1137 	case TCP_ST_CLOSING:
1138 		/* control should not come here */
1139 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1140 		break;
1141 	case TCP_ST_TIME_WAIT:
1142 		/* control may come here but... */
1143 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1144 		if (tcph->ack) {
1145 			TRACE_STATE("Stream %d: %s\n",
1146 				    cur_stream->id,
1147 				    TCPStateToString(cur_stream));
1148 		}
1149 		break;
1150 	case TCP_ST_CLOSED:
1151 	case TCP_ST_CLOSED_RSVD:
1152 		/* Waiting to be destroyed */
1153 		break;
1154 	default:
1155 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1156 			  "tcph->syn: %d, tcph->ack: %d\n",
1157 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
1158 			  pctx->p.tcph->ack);
1159 		assert(0);
1160 		/* This will be enabled once passiverecvcontext is completed */
1161 		/*exit(EXIT_FAILURE);*/
1162 	}
1163 
1164 	UNUSED(mtcp);
1165 	return;
1166 }
1167 /*----------------------------------------------------------------------------*/
1168 void
1169 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1170 		  struct tcp_stream *recvside_stream,
1171 		  struct tcp_stream *sendside_stream)
1172 {
1173 	uint32_t snd_seq_drift, rcv_seq_drift;
1174 
1175 	snd_seq_drift = FetchSeqDrift(sendside_stream, pctx->p.seq);
1176 	rcv_seq_drift = FetchSeqDrift(recvside_stream, pctx->p.ack_seq);
1177 
1178 	if (snd_seq_drift != 0 || rcv_seq_drift != 0) {
1179 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1180 					  snd_seq_drift);
1181 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1182 					      rcv_seq_drift);
1183 		pctx->p.seq += snd_seq_drift;
1184 		pctx->p.ack_seq -= rcv_seq_drift;
1185 
1186 		/* Recompute checksums */
1187 		pctx->p.iph->check = 0;
1188 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1189 
1190 		pctx->p.tcph->check = 0;
1191 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1192 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1193 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1194 	}
1195 #if 0
1196 	/* This block of code will go away in future revisions */
1197 	/* update sequence no. if seq_drift > 0 */
1198 	if (sendside_stream->sndvar->seq_drift != 0 ||
1199 	    recvside_stream->sndvar->seq_drift != 0) {
1200 
1201 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1202 					  sendside_stream->sndvar->seq_drift);
1203 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1204 					      recvside_stream->sndvar->seq_drift);
1205 		pctx->p.seq += sendside_stream->sndvar->seq_drift;
1206 		pctx->p.ack_seq -= recvside_stream->sndvar->seq_drift;
1207 
1208 		/* Recompute checksums */
1209 		pctx->p.iph->check = 0;
1210 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1211 
1212 		pctx->p.tcph->check = 0;
1213 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1214 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1215 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1216 	}
1217 #endif
1218 }
1219 /*----------------------------------------------------------------------------*/
1220