xref: /mOS-networking-stack/core/src/tcp_out.c (revision e160edca)
1 #include <unistd.h>
2 #include <string.h>
3 
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15 
16 #define TCP_CALCULATE_CHECKSUM		TRUE
17 #define ACK_PIGGYBACK			TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
20 
21 #define TCP_MAX_WINDOW 65535
22 
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25 
26 extern struct pkt_info *
27 ClonePacketCtx(struct pkt_info *to, unsigned char *frame, struct pkt_ctx *from);
28 
29 /*----------------------------------------------------------------------------*/
30 static inline uint16_t
31 CalculateOptionLength(uint8_t flags)
32 {
33 	uint16_t optlen = 0;
34 
35 	if (flags & TCP_FLAG_SYN) {
36 		optlen += TCP_OPT_MSS_LEN;
37 #if TCP_OPT_SACK_ENABLED
38 		optlen += TCP_OPT_SACK_PERMIT_LEN;
39 #if !TCP_OPT_TIMESTAMP_ENABLED
40 		optlen += 2;	// insert NOP padding
41 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
42 #endif /* TCP_OPT_SACK_ENABLED */
43 
44 #if TCP_OPT_TIMESTAMP_ENABLED
45 		optlen += TCP_OPT_TIMESTAMP_LEN;
46 #if !TCP_OPT_SACK_ENABLED
47 		optlen += 2;	// insert NOP padding
48 #endif /* TCP_OPT_SACK_ENABLED */
49 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
50 
51 		optlen += TCP_OPT_WSCALE_LEN + 1;
52 
53 	} else {
54 
55 #if TCP_OPT_TIMESTAMP_ENABLED
56 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
57 #endif
58 
59 #if TCP_OPT_SACK_ENABLED
60 		if (flags & TCP_FLAG_SACK) {
61 			optlen += TCP_OPT_SACK_LEN + 2;
62 		}
63 #endif
64 	}
65 
66 	assert(optlen % 4 == 0);
67 
68 	return optlen;
69 }
70 /*----------------------------------------------------------------------------*/
71 static inline void
72 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
73 {
74 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
75 
76 	tcpopt[0] = TCP_OPT_TIMESTAMP;
77 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
78 	ts[0] = htonl(cur_ts);
79 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
80 }
81 /*----------------------------------------------------------------------------*/
82 static inline void
83 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
84 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
85 {
86 	int i = 0;
87 
88 	if (flags & TCP_FLAG_SYN) {
89 		uint16_t mss;
90 
91 		/* MSS option */
92 		mss = cur_stream->sndvar->mss;
93 		tcpopt[i++] = TCP_OPT_MSS;
94 		tcpopt[i++] = TCP_OPT_MSS_LEN;
95 		tcpopt[i++] = mss >> 8;
96 		tcpopt[i++] = mss % 256;
97 
98 		/* SACK permit */
99 #if TCP_OPT_SACK_ENABLED
100 #if !TCP_OPT_TIMESTAMP_ENABLED
101 		tcpopt[i++] = TCP_OPT_NOP;
102 		tcpopt[i++] = TCP_OPT_NOP;
103 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
104 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
105 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
106 		TRACE_SACK("Local SACK permited.\n");
107 #endif /* TCP_OPT_SACK_ENABLED */
108 
109 		/* Timestamp */
110 #if TCP_OPT_TIMESTAMP_ENABLED
111 #if !TCP_OPT_SACK_ENABLED
112 		tcpopt[i++] = TCP_OPT_NOP;
113 		tcpopt[i++] = TCP_OPT_NOP;
114 #endif /* TCP_OPT_SACK_ENABLED */
115 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
116 		i += TCP_OPT_TIMESTAMP_LEN;
117 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
118 
119 		/* Window scale */
120 		tcpopt[i++] = TCP_OPT_NOP;
121 		tcpopt[i++] = TCP_OPT_WSCALE;
122 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
123 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
124 
125 	} else {
126 
127 #if TCP_OPT_TIMESTAMP_ENABLED
128 		tcpopt[i++] = TCP_OPT_NOP;
129 		tcpopt[i++] = TCP_OPT_NOP;
130 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
131 		i += TCP_OPT_TIMESTAMP_LEN;
132 #endif
133 
134 #if TCP_OPT_SACK_ENABLED
135 		if (flags & TCP_OPT_SACK) {
136 			// TODO: implement SACK support
137 		}
138 #endif
139 	}
140 
141 	assert (i == optlen);
142 }
143 /*----------------------------------------------------------------------------*/
144 int
145 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
146 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
147 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
148 		uint8_t *payload, uint16_t payloadlen,
149 		uint32_t cur_ts, uint32_t echo_ts)
150 {
151 	struct tcphdr *tcph;
152 	uint8_t *tcpopt;
153 	uint32_t *ts;
154 	uint16_t optlen;
155 	struct pkt_ctx pctx;
156 	int rc = -1;
157 
158 	memset(&pctx, 0, sizeof(pctx));
159 	optlen = CalculateOptionLength(flags);
160 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
161 		TRACE_ERROR("Payload size exceeds MSS.\n");
162 		assert(0);
163 		return ERROR;
164 	}
165 
166 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, 0,
167 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
168 	if (tcph == NULL) {
169 		return ERROR;
170 	}
171 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
172 
173 	tcph->source = sport;
174 	tcph->dest = dport;
175 
176 	if (flags & TCP_FLAG_SYN)
177 		tcph->syn = TRUE;
178 	if (flags & TCP_FLAG_FIN)
179 		tcph->fin = TRUE;
180 	if (flags & TCP_FLAG_RST)
181 		tcph->rst = TRUE;
182 	if (flags & TCP_FLAG_PSH)
183 		tcph->psh = TRUE;
184 
185 	tcph->seq = htonl(seq);
186 	if (flags & TCP_FLAG_ACK) {
187 		tcph->ack = TRUE;
188 		tcph->ack_seq = htonl(ack_seq);
189 	}
190 
191 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
192 
193 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
194 	ts = (uint32_t *)(tcpopt + 4);
195 
196 	tcpopt[0] = TCP_OPT_NOP;
197 	tcpopt[1] = TCP_OPT_NOP;
198 	tcpopt[2] = TCP_OPT_TIMESTAMP;
199 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
200 	ts[0] = htonl(cur_ts);
201 	ts[1] = htonl(echo_ts);
202 
203 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
204 	// copy payload if exist
205 	if (payloadlen > 0) {
206 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
207 	}
208 
209 #if TCP_CALCULATE_CHECKSUM
210 	/* offload TCP checkum if possible */
211 	if (likely(mtcp->iom->dev_ioctl != NULL))
212 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
213 					  pctx.out_ifidx,
214 					  PKT_TX_TCP_CSUM,
215 					  pctx.p.iph);
216 	/* otherwise calculate TCP checksum in S/W */
217 	if (rc == -1)
218 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
219 					      TCP_HEADER_LEN +
220 					      optlen + payloadlen,
221 					      saddr, daddr);
222 #endif
223 
224 	if (tcph->syn || tcph->fin) {
225 		payloadlen++;
226 	}
227 
228 	struct mon_listener *walk;
229 	/* callback for monitor raw socket */
230 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
231 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
232 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
233 				       &pctx, MOS_ON_PKT_IN);
234 	return payloadlen;
235 }
236 /*----------------------------------------------------------------------------*/
237 int
238 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
239 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
240 {
241 	struct tcphdr *tcph;
242 	uint16_t optlen;
243 	uint8_t wscale = 0;
244 	uint32_t window32 = 0;
245 	struct pkt_ctx pctx;
246 	int rc = -1;
247 
248 	memset(&pctx, 0, sizeof(pctx));
249 	optlen = CalculateOptionLength(flags);
250 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
251 		TRACE_ERROR("Payload size exceeds MSS\n");
252 		return ERROR;
253 	}
254 
255 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
256 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
257 	if (tcph == NULL) {
258 		return -2;
259 	}
260 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
261 
262 	tcph->source = cur_stream->sport;
263 	tcph->dest = cur_stream->dport;
264 
265 	if (flags & TCP_FLAG_SYN) {
266 		tcph->syn = TRUE;
267 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
268 			TRACE_DBG("Stream %d: weird SYN sequence. "
269 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
270 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
271 		}
272 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
273 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
274 	}
275 	if (flags & TCP_FLAG_RST) {
276 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
277 		tcph->rst = TRUE;
278 	}
279 	if (flags & TCP_FLAG_PSH)
280 		tcph->psh = TRUE;
281 
282 	if (flags & TCP_FLAG_WACK) {
283 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
284 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
285 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
286 				cur_stream->id,
287 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
288 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
289 	} else if (flags & TCP_FLAG_FIN) {
290 		tcph->fin = TRUE;
291 
292 		if (cur_stream->sndvar->fss == 0) {
293 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
294 					cur_stream->id, cur_stream->closed);
295 		}
296 		tcph->seq = htonl(cur_stream->sndvar->fss);
297 		cur_stream->sndvar->is_fin_sent = TRUE;
298 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
299 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
300 	} else {
301 		tcph->seq = htonl(cur_stream->snd_nxt);
302 	}
303 
304 	if (flags & TCP_FLAG_ACK) {
305 		tcph->ack = TRUE;
306 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
307 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
308 		cur_stream->last_active_ts = cur_ts;
309 		UpdateTimeoutList(mtcp, cur_stream);
310 	}
311 
312 	if (flags & TCP_FLAG_SYN) {
313 		wscale = 0;
314 	} else {
315 		wscale = cur_stream->sndvar->wscale_mine;
316 	}
317 
318 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
319 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
320 	/* if the advertised window is 0, we need to advertise again later */
321 	if (window32 == 0) {
322 		cur_stream->need_wnd_adv = TRUE;
323 	}
324 
325 	GenerateTCPOptions(cur_stream, cur_ts, flags,
326 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
327 
328 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
329 	// copy payload if exist
330 	if (payloadlen > 0) {
331 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
332 	}
333 
334 #if TCP_CALCULATE_CHECKSUM
335 	if (likely(mtcp->iom->dev_ioctl != NULL))
336 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
337 					  pctx.out_ifidx,
338 					  PKT_TX_TCP_CSUM,
339 					  pctx.p.iph);
340 	if (rc == -1)
341 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
342 					      TCP_HEADER_LEN +
343 					      optlen + payloadlen,
344 					      cur_stream->saddr,
345 					      cur_stream->daddr);
346 #endif
347 	cur_stream->snd_nxt += payloadlen;
348 
349 	if (tcph->syn || tcph->fin) {
350 		cur_stream->snd_nxt++;
351 		payloadlen++;
352 	}
353 
354 	if (payloadlen > 0) {
355 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
356 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
357 				  payloadlen, cur_stream->snd_nxt);
358 		}
359 
360 		/* update retransmission timer if have payload */
361 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
362 		TRACE_RTO("Updating retransmission timer. "
363 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
364 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
365 		AddtoRTOList(mtcp, cur_stream);
366 	}
367 
368 	struct mon_listener *walk;
369 	/* callback for monitor raw socket */
370 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
371 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
372 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
373 				       &pctx, MOS_ON_PKT_IN);
374 
375 	if (mtcp->num_msp /* this means that stream monitor is on */) {
376 		FillPacketContextTCPInfo(&pctx, tcph);
377 
378 		/* New abstraction for monitor stream */
379 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
380 		struct tcp_stream *sendside_stream = cur_stream;
381 
382 #ifdef NEWPPEEK
383 		if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
384 			pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
385 					pctx.p.seq, recvside_stream->rcvvar->irs + 1);
386 #endif
387 
388 		UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
389 	}
390 
391 #ifdef PKTDUMP
392 	DumpPacket(mtcp,
393 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
394 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
395 			"OUT", -1);
396 #endif
397 
398 
399 	return payloadlen;
400 }
401 /*----------------------------------------------------------------------------*/
402 static int
403 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
404 {
405 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
406 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
407 	uint8_t *data;
408 	uint32_t buffered_len;
409 	uint32_t seq;
410 	uint16_t len;
411 	int16_t sndlen;
412 	uint32_t window;
413 	int packets = 0;
414 
415 	if (!sndvar->sndbuf) {
416 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
417 		assert(0);
418 		return 0;
419 	}
420 
421 	SBUF_LOCK(&sndvar->write_lock);
422 
423 	if (sndvar->sndbuf->len == 0) {
424 		packets = 0;
425 		goto out;
426 	}
427 
428 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
429 
430 	while (1) {
431 		seq = cur_stream->snd_nxt;
432 
433 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
434 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
435 					"state: %s, seq: %u, head_seq: %u.\n",
436 					cur_stream->id, TCPStateToString(cur_stream),
437 					seq, sndvar->sndbuf->head_seq);
438 			assert(0);
439 			break;
440 		}
441 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
442 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
443 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
444 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
445 					sndvar->sndbuf->len, seq, buffered_len);
446 		}
447 		if (buffered_len == 0)
448 			break;
449 
450 		data = sndvar->sndbuf->head +
451 				(seq - sndvar->sndbuf->head_seq);
452 
453 		if (buffered_len > maxlen) {
454 			len = maxlen;
455 		} else {
456 			len = buffered_len;
457 		}
458 
459 		if (len <= 0)
460 			break;
461 
462 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
463 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
464 					"buffered_len: %u\n", seq, len, buffered_len);
465 		}
466 
467 		if (seq - sndvar->snd_una + len > window) {
468 			/* Ask for new window advertisement to peer */
469 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
470 				TRACE_DBG("Full peer window. "
471 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
472 					  sndvar->peer_wnd, seq - sndvar->snd_una);
473 				if (TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
474 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
475 				}
476 			}
477 			packets = -3;
478 			goto out;
479 		}
480 
481 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
482 				TCP_FLAG_ACK, data, len);
483 		if (sndlen < 0) {
484 			packets = sndlen;
485 			goto out;
486 		}
487 		packets++;
488 	}
489 
490  out:
491 	SBUF_UNLOCK(&sndvar->write_lock);
492 	return packets;
493 }
494 /*----------------------------------------------------------------------------*/
495 static inline int
496 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
497 {
498 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
499 	int ret = 0;
500     int flag = 0;
501 
502     switch (cur_stream->state) {
503        case TCP_ST_SYN_SENT: 		/* Send SYN here */
504           flag = TCP_FLAG_SYN;
505           break;
506        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
507           cur_stream->snd_nxt = sndvar->iss;
508           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
509           break;
510        case TCP_ST_ESTABLISHED:     /* Send ACK here */
511        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
512        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
513        case TCP_ST_TIME_WAIT:       /* Send ACK here */
514           flag = TCP_FLAG_ACK;
515           break;
516        case TCP_ST_LAST_ACK:
517        case TCP_ST_FIN_WAIT_1:
518           /* if it is on ack_list, send it after sending ack */
519           if (sndvar->on_send_list || sndvar->on_ack_list)
520              return (-1);
521           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
522           break;
523        case TCP_ST_CLOSING:
524           if (sndvar->is_fin_sent) {
525              /* if the sequence is for FIN, send FIN */
526              flag = (cur_stream->snd_nxt == sndvar->fss) ?
527                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
528           } else {
529              /* if FIN is not sent, send fin with ack */
530              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
531           }
532        case TCP_ST_CLOSED_RSVD: /* Send RST here */
533           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
534                     cur_stream->id);
535           /* first flush the data and ack */
536           if (sndvar->on_send_list || sndvar->on_ack_list)
537              return (-1);
538           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
539           if (ret >= 0)
540              DestroyTCPStream(mtcp, cur_stream);
541           return (ret);
542        default:
543           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
544                       cur_stream->id);
545           assert(0); /* can't reach here! */
546           return (0);
547     }
548 
549     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
550 }
551 /*----------------------------------------------------------------------------*/
552 inline int
553 WriteTCPControlList(mtcp_manager_t mtcp,
554 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
555 {
556 	tcp_stream *cur_stream;
557 	tcp_stream *next, *last;
558 	int cnt = 0;
559 	int ret;
560 
561 	thresh = MIN(thresh, sender->control_list_cnt);
562 
563 	/* Send TCP control messages */
564 	cnt = 0;
565 	cur_stream = TAILQ_FIRST(&sender->control_list);
566 	last = TAILQ_LAST(&sender->control_list, control_head);
567 	while (cur_stream) {
568 		if (++cnt > thresh)
569 			break;
570 
571 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
572 				cnt, cur_stream->id);
573 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
574 
575 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
576 		sender->control_list_cnt--;
577 
578 		if (cur_stream->sndvar->on_control_list) {
579 			cur_stream->sndvar->on_control_list = FALSE;
580 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
581 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
582 			if (ret < 0) {
583 				TAILQ_INSERT_HEAD(&sender->control_list,
584 						cur_stream, sndvar->control_link);
585 				cur_stream->sndvar->on_control_list = TRUE;
586 				sender->control_list_cnt++;
587 				/* since there is no available write buffer, break */
588 				break;
589 			}
590 		} else {
591 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
592 		}
593 
594 		if (cur_stream == last)
595 			break;
596 		cur_stream = next;
597 	}
598 
599 	return cnt;
600 }
601 /*----------------------------------------------------------------------------*/
602 inline int
603 WriteTCPDataList(mtcp_manager_t mtcp,
604 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
605 {
606 	tcp_stream *cur_stream;
607 	tcp_stream *next, *last;
608 	int cnt = 0;
609 	int ret;
610 
611 	/* Send data */
612 	cnt = 0;
613 	cur_stream = TAILQ_FIRST(&sender->send_list);
614 	last = TAILQ_LAST(&sender->send_list, send_head);
615 	while (cur_stream) {
616 		if (++cnt > thresh)
617 			break;
618 
619 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
620 				cnt, cur_stream->id);
621 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
622 
623 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
624 		if (cur_stream->sndvar->on_send_list) {
625 			ret = 0;
626 
627 			/* Send data here */
628 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
629 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
630 				if (cur_stream->sndvar->on_control_list) {
631 					/* delay sending data after until on_control_list becomes off */
632 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
633 					ret = -1;
634 				} else {
635 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
636 				}
637 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
638 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
639 					cur_stream->state == TCP_ST_LAST_ACK) {
640 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
641 			} else {
642 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
643 						cur_stream->id, TCPStateToString(cur_stream));
644 #if DUMP_STREAM
645 				DumpStream(mtcp, cur_stream);
646 #endif
647 			}
648 
649 			if (ret < 0) {
650 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
651 				/* since there is no available write buffer, break */
652 				break;
653 
654 			} else {
655 				cur_stream->sndvar->on_send_list = FALSE;
656 				sender->send_list_cnt--;
657 				/* the ret value is the number of packets sent. */
658 				/* decrease ack_cnt for the piggybacked acks */
659 #if ACK_PIGGYBACK
660 				if (cur_stream->sndvar->ack_cnt > 0) {
661 					if (cur_stream->sndvar->ack_cnt > ret) {
662 						cur_stream->sndvar->ack_cnt -= ret;
663 					} else {
664 						cur_stream->sndvar->ack_cnt = 0;
665 					}
666 				}
667 #endif
668 #if 1
669 				if (cur_stream->control_list_waiting) {
670 					if (!cur_stream->sndvar->on_ack_list) {
671 						cur_stream->control_list_waiting = FALSE;
672 						AddtoControlList(mtcp, cur_stream, cur_ts);
673 					}
674 				}
675 #endif
676 			}
677 		} else {
678 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
679 #ifdef DUMP_STREAM
680 			DumpStream(mtcp, cur_stream);
681 #endif
682 		}
683 
684 		if (cur_stream == last)
685 			break;
686 		cur_stream = next;
687 	}
688 
689 	return cnt;
690 }
691 /*----------------------------------------------------------------------------*/
692 inline int
693 WriteTCPACKList(mtcp_manager_t mtcp,
694 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
695 {
696 	tcp_stream *cur_stream;
697 	tcp_stream *next, *last;
698 	int to_ack;
699 	int cnt = 0;
700 	int ret;
701 
702 	/* Send aggregated acks */
703 	cnt = 0;
704 	cur_stream = TAILQ_FIRST(&sender->ack_list);
705 	last = TAILQ_LAST(&sender->ack_list, ack_head);
706 	while (cur_stream) {
707 		if (++cnt > thresh)
708 			break;
709 
710 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
711 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
712 
713 		if (cur_stream->sndvar->on_ack_list) {
714 			/* this list is only to ack the data packets */
715 			/* if the ack is not data ack, then it will not process here */
716 			to_ack = FALSE;
717 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
718 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
719 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
720 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
721 					cur_stream->state == TCP_ST_TIME_WAIT) {
722 				/* TIMEWAIT is possible since the ack is queued
723 				   at FIN_WAIT_2 */
724 #ifdef NEWRB
725 				tcprb_t *rb;
726 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
727 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
728 						(cur_stream->rcvvar->irs + 1) + rb->pile
729 						+ tcprb_cflen(rb))) {
730 					to_ack = TRUE;
731 				}
732 #else
733 				if (cur_stream->rcvvar->rcvbuf) {
734 					if (TCP_SEQ_LEQ(cur_stream->rcv_nxt,
735 								cur_stream->rcvvar->rcvbuf->head_seq +
736 								cur_stream->rcvvar->rcvbuf->merged_len)) {
737 						to_ack = TRUE;
738 					}
739 				}
740 #endif
741 			} else {
742 				TRACE_DBG("Stream %u (%s): "
743 						"Try sending ack at not proper state. "
744 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
745 						cur_stream->id, TCPStateToString(cur_stream),
746 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
747 						cur_stream->sndvar->on_control_list);
748 #ifdef DUMP_STREAM
749 				DumpStream(mtcp, cur_stream);
750 #endif
751 			}
752 
753 			if (to_ack) {
754 				/* send the queued ack packets */
755 				while (cur_stream->sndvar->ack_cnt > 0) {
756 					ret = SendTCPPacket(mtcp, cur_stream,
757 							cur_ts, TCP_FLAG_ACK, NULL, 0);
758 					if (ret < 0) {
759 						/* since there is no available write buffer, break */
760 						break;
761 					}
762 					cur_stream->sndvar->ack_cnt--;
763 				}
764 
765 				/* if is_wack is set, send packet to get window advertisement */
766 				if (cur_stream->sndvar->is_wack) {
767 					cur_stream->sndvar->is_wack = FALSE;
768 					ret = SendTCPPacket(mtcp, cur_stream,
769 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
770 					if (ret < 0) {
771 						/* since there is no available write buffer, break */
772 						cur_stream->sndvar->is_wack = TRUE;
773 					}
774 				}
775 
776 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
777 					cur_stream->sndvar->on_ack_list = FALSE;
778 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
779 					sender->ack_list_cnt--;
780 				}
781 			} else {
782 				cur_stream->sndvar->on_ack_list = FALSE;
783 				cur_stream->sndvar->ack_cnt = 0;
784 				cur_stream->sndvar->is_wack = 0;
785 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
786 				sender->ack_list_cnt--;
787 			}
788 
789 			if (cur_stream->control_list_waiting) {
790 				if (!cur_stream->sndvar->on_send_list) {
791 					cur_stream->control_list_waiting = FALSE;
792 					AddtoControlList(mtcp, cur_stream, cur_ts);
793 				}
794 			}
795 		} else {
796 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
797 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
798 			sender->ack_list_cnt--;
799 #ifdef DUMP_STREAM
800 			thread_printf(mtcp, mtcp->log_fp,
801 					"Stream %u: not on ack list.\n", cur_stream->id);
802 			DumpStream(mtcp, cur_stream);
803 #endif
804 		}
805 
806 		if (cur_stream == last)
807 			break;
808 		cur_stream = next;
809 	}
810 
811 	return cnt;
812 }
813 /*----------------------------------------------------------------------------*/
814 inline struct mtcp_sender *
815 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
816 {
817 	if (cur_stream->sndvar->nif_out < 0) {
818 		return mtcp->g_sender;
819 
820 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
821 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
822 		return NULL;
823 
824 	} else {
825 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
826 	}
827 }
828 /*----------------------------------------------------------------------------*/
829 inline void
830 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
831 {
832 #if TRY_SEND_BEFORE_QUEUE
833 	int ret;
834 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
835 	assert(sender != NULL);
836 
837 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
838 	if (ret < 0) {
839 #endif
840 		if (!cur_stream->sndvar->on_control_list) {
841 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
842 			assert(sender != NULL);
843 
844 			cur_stream->sndvar->on_control_list = TRUE;
845 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
846 			sender->control_list_cnt++;
847 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
848 			//		cur_stream->id, sender->control_list_cnt);
849 		}
850 #if TRY_SEND_BEFORE_QUEUE
851 	} else {
852 		if (cur_stream->sndvar->on_control_list) {
853 			cur_stream->sndvar->on_control_list = FALSE;
854 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
855 			sender->control_list_cnt--;
856 		}
857 	}
858 #endif
859 }
860 /*----------------------------------------------------------------------------*/
861 inline void
862 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
863 {
864 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
865 	assert(sender != NULL);
866 
867 	if(!cur_stream->sndvar->sndbuf) {
868 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
869 				mtcp->ctx->cpu,
870 				cur_stream->id);
871 		assert(0);
872 		return;
873 	}
874 
875 	if (!cur_stream->sndvar->on_send_list) {
876 		cur_stream->sndvar->on_send_list = TRUE;
877 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
878 		sender->send_list_cnt++;
879 	}
880 }
881 /*----------------------------------------------------------------------------*/
882 inline void
883 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
884 {
885 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
886 	assert(sender != NULL);
887 
888 	if (!cur_stream->sndvar->on_ack_list) {
889 		cur_stream->sndvar->on_ack_list = TRUE;
890 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
891 		sender->ack_list_cnt++;
892 	}
893 }
894 /*----------------------------------------------------------------------------*/
895 inline void
896 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
897 {
898 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
899 	assert(sender != NULL);
900 
901 	if (cur_stream->sndvar->on_control_list) {
902 		cur_stream->sndvar->on_control_list = FALSE;
903 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
904 		sender->control_list_cnt--;
905 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
906 		//		cur_stream->id, sender->control_list_cnt);
907 	}
908 }
909 /*----------------------------------------------------------------------------*/
910 inline void
911 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
912 {
913 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
914 	assert(sender != NULL);
915 
916 	if (cur_stream->sndvar->on_send_list) {
917 		cur_stream->sndvar->on_send_list = FALSE;
918 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
919 		sender->send_list_cnt--;
920 	}
921 }
922 /*----------------------------------------------------------------------------*/
923 inline void
924 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
925 {
926 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
927 	assert(sender != NULL);
928 
929 	if (cur_stream->sndvar->on_ack_list) {
930 		cur_stream->sndvar->on_ack_list = FALSE;
931 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
932 		sender->ack_list_cnt--;
933 	}
934 }
935 /*----------------------------------------------------------------------------*/
936 inline void
937 EnqueueACK(mtcp_manager_t mtcp,
938 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
939 {
940 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
941 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
942 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
943 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
944 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
945 				cur_stream->id, TCPStateToString(cur_stream));
946 	}
947 
948 	if (opt == ACK_OPT_NOW) {
949 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
950 			cur_stream->sndvar->ack_cnt++;
951 		}
952 	} else if (opt == ACK_OPT_AGGREGATE) {
953 		if (cur_stream->sndvar->ack_cnt == 0) {
954 			cur_stream->sndvar->ack_cnt = 1;
955 		}
956 	} else if (opt == ACK_OPT_WACK) {
957 		cur_stream->sndvar->is_wack = TRUE;
958 	}
959 	AddtoACKList(mtcp, cur_stream);
960 }
961 /*----------------------------------------------------------------------------*/
962 inline void
963 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
964 {
965 	tcp_stream *stream;
966 
967 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
968 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
969 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
970 	}
971 }
972 /*----------------------------------------------------------------------------*/
973 static inline void
974 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
975 				    struct pkt_ctx *pctx)
976 {
977 	assert(cur_stream);
978 	assert(pctx);
979 
980 	/* add event */
981 	if (cur_stream->state < TCP_ST_SYN_SENT) {
982 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
983 		cur_stream->cb_events |= MOS_ON_CONN_START;
984 	}
985 	/* initialize TCP send variables of send-side stream */
986 	cur_stream->sndvar->cwnd = 1;
987 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
988 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
989 	cur_stream->sndvar->iss = pctx->p.seq;
990 	cur_stream->snd_nxt = pctx->p.seq + 1;
991 	cur_stream->state = TCP_ST_SYN_SENT;
992 	cur_stream->last_active_ts = pctx->p.cur_ts;
993 
994 	/* receive-side conn start event can also be tagged here */
995 	/* blocked since tcp_in.c takes care of this.. */
996 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
997 }
998 /*----------------------------------------------------------------------------*/
999 /**
1000  * Called (when monitoring mode is enabled).. for every incoming packet from the
1001  * NIC.
1002  */
1003 void
1004 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
1005 			    struct pkt_ctx *pctx)
1006 {
1007 	struct tcphdr *tcph;
1008 
1009 	assert(cur_stream);
1010 	tcph = pctx->p.tcph;
1011 
1012 	/* if it is a new TCP stream from client */
1013 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1014 		TRACE_STATE("Stream %d: %s\n",
1015 			    cur_stream->id, TCPStateToString(cur_stream));
1016 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1017 		AddtoTimeoutList(mtcp, cur_stream);
1018 		return;
1019 	}
1020 
1021 	if (tcph->ack) {
1022 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1023 		cur_stream->last_active_ts = pctx->p.cur_ts;
1024 	}
1025 
1026 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1027 
1028 	/* test for reset packet */
1029 	if (tcph->rst) {
1030 		cur_stream->have_reset = TRUE;
1031 		/* test for reset packet */
1032 		cur_stream->state = TCP_ST_CLOSED_RSVD;
1033 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1034 		TRACE_STATE("Stream %d: %s\n",
1035 				cur_stream->id,
1036 				TCPStateToString(cur_stream));
1037 		return;
1038 	}
1039 
1040 	/*
1041 	 * for all others, state transitioning is based on
1042 	 * current tcp_stream state
1043 	 */
1044 	switch (cur_stream->state) {
1045 	case TCP_ST_SYN_SENT:
1046 		/* control should not come here */
1047 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1048 #ifdef BE_RESILIENT_TO_PACKET_DROP
1049 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1050 			cur_stream->state = TCP_ST_ESTABLISHED;
1051 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1052 			cur_stream->snd_nxt = pctx->p.seq;
1053 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1054 			goto __Handle_TCP_ST_ESTABLISHED;
1055 		}
1056 #endif
1057 		break;
1058 	case TCP_ST_SYN_RCVD:
1059 		if (!tcph->ack)
1060 			break;
1061 
1062 		if (tcph->syn) {
1063 			cur_stream->sndvar->iss = pctx->p.seq;
1064 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1065 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1066 				  "setting seq: %u = iss\n",
1067 				  cur_stream->id, pctx->p.seq);
1068 		}
1069 #ifdef BE_RESILIENT_TO_PACKET_DROP
1070 		else {
1071 			cur_stream->state = TCP_ST_ESTABLISHED;
1072 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1073 			cur_stream->snd_nxt = pctx->p.seq;
1074 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1075 			goto __Handle_TCP_ST_ESTABLISHED;
1076 		}
1077 #endif
1078 		TRACE_STATE("Stream %d: %s\n",
1079 			    cur_stream->id,
1080 			    TCPStateToString(cur_stream));
1081 		break;
1082 	case TCP_ST_ESTABLISHED:
1083 #ifdef BE_RESILIENT_TO_PACKET_DROP
1084 __Handle_TCP_ST_ESTABLISHED:
1085 #endif
1086 		/* if application decides to close, fin pkt is sent */
1087 #ifdef BE_RESILIENT_TO_PACKET_DROP
1088 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1089 		{
1090 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1091 					"Move rcv_nxt from %u to %u.\n",
1092 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1093 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1094 		}
1095 #endif
1096 		if (tcph->fin) {
1097 			cur_stream->state = TCP_ST_FIN_WAIT_1;
1098 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1099 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1100 			cur_stream->sndvar->is_fin_sent = TRUE;
1101 			cur_stream->snd_nxt++;
1102 			TRACE_STATE("Stream %d: %s\n",
1103 				    cur_stream->id,
1104 				    TCPStateToString(cur_stream));
1105 		} else {
1106 			/* creating tcp send buffer still pending.. */
1107 			/* do we need peek for send buffer? */
1108 		}
1109 		break;
1110 	case TCP_ST_CLOSE_WAIT:
1111 		/* if application decides to close, fin pkt is sent */
1112 #ifdef BE_RESILIENT_TO_PACKET_DROP
1113 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1114 		{
1115 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1116 					"Move rcv_nxt from %u to %u.\n",
1117 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1118 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1119 		}
1120 #endif
1121 		if (tcph->fin) {
1122 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1123 			cur_stream->sndvar->is_fin_sent = TRUE;
1124 			cur_stream->snd_nxt++;
1125 			cur_stream->state = TCP_ST_LAST_ACK;
1126 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1127 			TRACE_STATE("Stream %d: %s\n",
1128 				    cur_stream->id,
1129 				    TCPStateToString(cur_stream));
1130 		} else if (tcph->ack) {
1131 			TRACE_STATE("Stream %d: %s\n",
1132 				    cur_stream->id,
1133 				    TCPStateToString(cur_stream));
1134 		}
1135 		break;
1136 	case TCP_ST_LAST_ACK:
1137 		/* control should not come here */
1138 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1139 		break;
1140 	case TCP_ST_FIN_WAIT_1:
1141 		/* control should not come here */
1142 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1143 		break;
1144 	case TCP_ST_FIN_WAIT_2:
1145 		/* control should not come here */
1146 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1147 		break;
1148 	case TCP_ST_CLOSING:
1149 		/* control should not come here */
1150 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1151 		break;
1152 	case TCP_ST_TIME_WAIT:
1153 		/* control may come here but... */
1154 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1155 		if (tcph->ack) {
1156 			TRACE_STATE("Stream %d: %s\n",
1157 				    cur_stream->id,
1158 				    TCPStateToString(cur_stream));
1159 		}
1160 		break;
1161 	case TCP_ST_CLOSED:
1162 	case TCP_ST_CLOSED_RSVD:
1163 		/* Waiting to be destroyed */
1164 		break;
1165 	default:
1166 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1167 			  "tcph->syn: %d, tcph->ack: %d\n",
1168 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
1169 			  pctx->p.tcph->ack);
1170 		assert(0);
1171 		/* This will be enabled once passiverecvcontext is completed */
1172 		/*exit(EXIT_FAILURE);*/
1173 	}
1174 
1175 	UNUSED(mtcp);
1176 	return;
1177 }
1178 /*----------------------------------------------------------------------------*/
1179 void
1180 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1181 		  struct tcp_stream *recvside_stream,
1182 		  struct tcp_stream *sendside_stream)
1183 {
1184 	uint32_t snd_seq_drift, rcv_seq_drift;
1185 
1186 	snd_seq_drift = FetchSeqDrift(sendside_stream, pctx->p.seq);
1187 	rcv_seq_drift = FetchSeqDrift(recvside_stream, pctx->p.ack_seq);
1188 
1189 	if (snd_seq_drift != 0 || rcv_seq_drift != 0) {
1190 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1191 					  snd_seq_drift);
1192 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1193 					      rcv_seq_drift);
1194 		pctx->p.seq += snd_seq_drift;
1195 		pctx->p.ack_seq -= rcv_seq_drift;
1196 
1197 		/* Recompute checksums */
1198 		pctx->p.iph->check = 0;
1199 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1200 
1201 		pctx->p.tcph->check = 0;
1202 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1203 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1204 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1205 	}
1206 #if 0
1207 	/* This block of code will go away in future revisions */
1208 	/* update sequence no. if seq_drift > 0 */
1209 	if (sendside_stream->sndvar->seq_drift != 0 ||
1210 	    recvside_stream->sndvar->seq_drift != 0) {
1211 
1212 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1213 					  sendside_stream->sndvar->seq_drift);
1214 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1215 					      recvside_stream->sndvar->seq_drift);
1216 		pctx->p.seq += sendside_stream->sndvar->seq_drift;
1217 		pctx->p.ack_seq -= recvside_stream->sndvar->seq_drift;
1218 
1219 		/* Recompute checksums */
1220 		pctx->p.iph->check = 0;
1221 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1222 
1223 		pctx->p.tcph->check = 0;
1224 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1225 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1226 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1227 	}
1228 #endif
1229 }
1230 /*----------------------------------------------------------------------------*/
1231