xref: /mOS-networking-stack/core/src/tcp_out.c (revision 3f59ddac)
1 #include <unistd.h>
2 #include <string.h>
3 
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15 
16 #define TCP_CALCULATE_CHECKSUM		TRUE
17 #define ACK_PIGGYBACK			TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
20 
21 #define TCP_MAX_WINDOW 65535
22 
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25 
26 /*----------------------------------------------------------------------------*/
27 static inline uint16_t
28 CalculateOptionLength(uint8_t flags)
29 {
30 	uint16_t optlen = 0;
31 
32 	if (flags & TCP_FLAG_SYN) {
33 		optlen += TCP_OPT_MSS_LEN;
34 #if TCP_OPT_SACK_ENABLED
35 		optlen += TCP_OPT_SACK_PERMIT_LEN;
36 #if !TCP_OPT_TIMESTAMP_ENABLED
37 		optlen += 2;	// insert NOP padding
38 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
39 #endif /* TCP_OPT_SACK_ENABLED */
40 
41 #if TCP_OPT_TIMESTAMP_ENABLED
42 		optlen += TCP_OPT_TIMESTAMP_LEN;
43 #if !TCP_OPT_SACK_ENABLED
44 		optlen += 2;	// insert NOP padding
45 #endif /* TCP_OPT_SACK_ENABLED */
46 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
47 
48 		optlen += TCP_OPT_WSCALE_LEN + 1;
49 
50 	} else {
51 
52 #if TCP_OPT_TIMESTAMP_ENABLED
53 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
54 #endif
55 
56 #if TCP_OPT_SACK_ENABLED
57 		if (flags & TCP_FLAG_SACK) {
58 			optlen += TCP_OPT_SACK_LEN + 2;
59 		}
60 #endif
61 	}
62 
63 	assert(optlen % 4 == 0);
64 
65 	return optlen;
66 }
67 /*----------------------------------------------------------------------------*/
68 static inline void
69 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
70 {
71 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
72 
73 	tcpopt[0] = TCP_OPT_TIMESTAMP;
74 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
75 	ts[0] = htonl(cur_ts);
76 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
77 }
78 /*----------------------------------------------------------------------------*/
79 static inline void
80 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
81 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
82 {
83 	int i = 0;
84 
85 	if (flags & TCP_FLAG_SYN) {
86 		uint16_t mss;
87 
88 		/* MSS option */
89 		mss = cur_stream->sndvar->mss;
90 		tcpopt[i++] = TCP_OPT_MSS;
91 		tcpopt[i++] = TCP_OPT_MSS_LEN;
92 		tcpopt[i++] = mss >> 8;
93 		tcpopt[i++] = mss % 256;
94 
95 		/* SACK permit */
96 #if TCP_OPT_SACK_ENABLED
97 #if !TCP_OPT_TIMESTAMP_ENABLED
98 		tcpopt[i++] = TCP_OPT_NOP;
99 		tcpopt[i++] = TCP_OPT_NOP;
100 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
101 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
102 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
103 		TRACE_SACK("Local SACK permited.\n");
104 #endif /* TCP_OPT_SACK_ENABLED */
105 
106 		/* Timestamp */
107 #if TCP_OPT_TIMESTAMP_ENABLED
108 #if !TCP_OPT_SACK_ENABLED
109 		tcpopt[i++] = TCP_OPT_NOP;
110 		tcpopt[i++] = TCP_OPT_NOP;
111 #endif /* TCP_OPT_SACK_ENABLED */
112 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
113 		i += TCP_OPT_TIMESTAMP_LEN;
114 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
115 
116 		/* Window scale */
117 		tcpopt[i++] = TCP_OPT_NOP;
118 		tcpopt[i++] = TCP_OPT_WSCALE;
119 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
120 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
121 
122 	} else {
123 
124 #if TCP_OPT_TIMESTAMP_ENABLED
125 		tcpopt[i++] = TCP_OPT_NOP;
126 		tcpopt[i++] = TCP_OPT_NOP;
127 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
128 		i += TCP_OPT_TIMESTAMP_LEN;
129 #endif
130 
131 #if TCP_OPT_SACK_ENABLED
132 		if (flags & TCP_OPT_SACK) {
133 			// TODO: implement SACK support
134 		}
135 #endif
136 	}
137 
138 	assert (i == optlen);
139 }
140 /*----------------------------------------------------------------------------*/
141 int
142 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
143 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
144 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
145 		uint8_t *payload, uint16_t payloadlen,
146 		uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
147 {
148 	struct tcphdr *tcph;
149 	uint8_t *tcpopt;
150 	uint32_t *ts;
151 	uint16_t optlen;
152 	struct pkt_ctx pctx;
153 	int rc = -1;
154 
155 	memset(&pctx, 0, sizeof(pctx));
156 	pctx.p.in_ifidx = in_ifidx;
157 	optlen = CalculateOptionLength(flags);
158 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
159 		TRACE_ERROR("Payload size exceeds MSS.\n");
160 		assert(0);
161 		return ERROR;
162 	}
163 
164 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
165 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
166 	if (tcph == NULL) {
167 		return ERROR;
168 	}
169 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
170 
171 	tcph->source = sport;
172 	tcph->dest = dport;
173 
174 	if (flags & TCP_FLAG_SYN)
175 		tcph->syn = TRUE;
176 	if (flags & TCP_FLAG_FIN)
177 		tcph->fin = TRUE;
178 	if (flags & TCP_FLAG_RST)
179 		tcph->rst = TRUE;
180 	if (flags & TCP_FLAG_PSH)
181 		tcph->psh = TRUE;
182 
183 	tcph->seq = htonl(seq);
184 	if (flags & TCP_FLAG_ACK) {
185 		tcph->ack = TRUE;
186 		tcph->ack_seq = htonl(ack_seq);
187 	}
188 
189 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
190 
191 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
192 	ts = (uint32_t *)(tcpopt + 4);
193 
194 	tcpopt[0] = TCP_OPT_NOP;
195 	tcpopt[1] = TCP_OPT_NOP;
196 	tcpopt[2] = TCP_OPT_TIMESTAMP;
197 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
198 	ts[0] = htonl(cur_ts);
199 	ts[1] = htonl(echo_ts);
200 
201 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
202 	// copy payload if exist
203 	if (payloadlen > 0) {
204 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
205 	}
206 
207 #if TCP_CALCULATE_CHECKSUM
208 	/* offload TCP checkum if possible */
209 	if (likely(mtcp->iom->dev_ioctl != NULL))
210 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
211 					  pctx.out_ifidx,
212 					  PKT_TX_TCP_CSUM,
213 					  pctx.p.iph);
214 	/* otherwise calculate TCP checksum in S/W */
215 	if (rc == -1)
216 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
217 					      TCP_HEADER_LEN +
218 					      optlen + payloadlen,
219 					      saddr, daddr);
220 #endif
221 
222 	if (tcph->syn || tcph->fin) {
223 		payloadlen++;
224 	}
225 
226 #ifdef PKTDUMP
227 	DumpPacket(mtcp,
228 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
229 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
230 			"OUT", -1);
231 #endif
232 
233 	struct mon_listener *walk;
234 	/* callback for monitor raw socket */
235 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
236 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
237 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
238 				       &pctx, MOS_ON_PKT_IN);
239 	return payloadlen;
240 }
241 /*----------------------------------------------------------------------------*/
242 int
243 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
244 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
245 {
246 	struct tcphdr *tcph;
247 	uint16_t optlen;
248 	uint8_t wscale = 0;
249 	uint32_t window32 = 0;
250 	struct pkt_ctx pctx;
251 	int rc = -1;
252 
253 	memset(&pctx, 0, sizeof(pctx));
254 	optlen = CalculateOptionLength(flags);
255 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
256 		TRACE_ERROR("Payload size exceeds MSS\n");
257 		return ERROR;
258 	}
259 
260 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
261 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
262 	if (tcph == NULL) {
263 		return -2;
264 	}
265 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
266 
267 	tcph->source = cur_stream->sport;
268 	tcph->dest = cur_stream->dport;
269 
270 	if (flags & TCP_FLAG_SYN) {
271 		tcph->syn = TRUE;
272 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
273 			TRACE_DBG("Stream %d: weird SYN sequence. "
274 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
275 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
276 		}
277 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
278 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
279 	}
280 	if (flags & TCP_FLAG_RST) {
281 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
282 		tcph->rst = TRUE;
283 	}
284 	if (flags & TCP_FLAG_PSH)
285 		tcph->psh = TRUE;
286 
287 	if (flags & TCP_FLAG_WACK) {
288 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
289 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
290 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
291 				cur_stream->id,
292 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
293 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
294 	} else if (flags & TCP_FLAG_FIN) {
295 		tcph->fin = TRUE;
296 
297 		if (cur_stream->sndvar->fss == 0) {
298 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
299 					cur_stream->id, cur_stream->closed);
300 		}
301 		tcph->seq = htonl(cur_stream->sndvar->fss);
302 		cur_stream->sndvar->is_fin_sent = TRUE;
303 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
304 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
305 	} else {
306 		tcph->seq = htonl(cur_stream->snd_nxt);
307 	}
308 
309 	if (flags & TCP_FLAG_ACK) {
310 		tcph->ack = TRUE;
311 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
312 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
313 		cur_stream->last_active_ts = cur_ts;
314 		UpdateTimeoutList(mtcp, cur_stream);
315 	}
316 
317 	if (flags & TCP_FLAG_SYN) {
318 		wscale = 0;
319 	} else {
320 		wscale = cur_stream->sndvar->wscale_mine;
321 	}
322 
323 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
324 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
325 	/* if the advertised window is 0, we need to advertise again later */
326 	if (window32 == 0) {
327 		cur_stream->need_wnd_adv = TRUE;
328 	}
329 
330 	GenerateTCPOptions(cur_stream, cur_ts, flags,
331 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
332 
333 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
334 	// copy payload if exist
335 	if (payloadlen > 0) {
336 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
337 	}
338 
339 #if TCP_CALCULATE_CHECKSUM
340 	if (likely(mtcp->iom->dev_ioctl != NULL))
341 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
342 					  pctx.out_ifidx,
343 					  PKT_TX_TCP_CSUM,
344 					  pctx.p.iph);
345 	if (rc == -1)
346 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
347 					      TCP_HEADER_LEN +
348 					      optlen + payloadlen,
349 					      cur_stream->saddr,
350 					      cur_stream->daddr);
351 #endif
352 	cur_stream->snd_nxt += payloadlen;
353 
354 	if (tcph->syn || tcph->fin) {
355 		cur_stream->snd_nxt++;
356 		payloadlen++;
357 	}
358 
359 	if (payloadlen > 0) {
360 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
361 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
362 				  payloadlen, cur_stream->snd_nxt);
363 		}
364 
365 		/* update retransmission timer if have payload */
366 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
367 		TRACE_RTO("Updating retransmission timer. "
368 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
369 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
370 		AddtoRTOList(mtcp, cur_stream);
371 	}
372 
373 	struct mon_listener *walk;
374 	/* callback for monitor raw socket */
375 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
376 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
377 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
378 				       &pctx, MOS_ON_PKT_IN);
379 
380 	if (mtcp->num_msp /* this means that stream monitor is on */) {
381 		FillPacketContextTCPInfo(&pctx, tcph);
382 
383 		/* New abstraction for monitor stream */
384 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
385 		struct tcp_stream *sendside_stream = cur_stream;
386 
387 		if (recvside_stream) {
388 			if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
389 				pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
390 												   pctx.p.seq,
391 												   recvside_stream->rcvvar->irs + 1);
392 			UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
393 		}
394 	}
395 
396 #ifdef PKTDUMP
397 	DumpPacket(mtcp,
398 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
399 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
400 			"OUT", -1);
401 #endif
402 
403 
404 	return payloadlen;
405 }
406 /*----------------------------------------------------------------------------*/
407 static int
408 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
409 {
410 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
411 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
412 	uint8_t *data;
413 	uint32_t buffered_len;
414 	uint32_t seq;
415 	uint16_t len;
416 	int16_t sndlen;
417 	uint32_t window;
418 	int packets = 0;
419 	uint8_t wack_sent = 0;
420 
421 	if (!sndvar->sndbuf) {
422 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
423 		assert(0);
424 		return 0;
425 	}
426 
427 	SBUF_LOCK(&sndvar->write_lock);
428 
429 	if (sndvar->sndbuf->len == 0) {
430 		packets = 0;
431 		goto out;
432 	}
433 
434 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
435 
436 	while (1) {
437 		seq = cur_stream->snd_nxt;
438 
439 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
440 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
441 					"state: %s, seq: %u, head_seq: %u.\n",
442 					cur_stream->id, TCPStateToString(cur_stream),
443 					seq, sndvar->sndbuf->head_seq);
444 			assert(0);
445 			break;
446 		}
447 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
448 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
449 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
450 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
451 					sndvar->sndbuf->len, seq, buffered_len);
452 		}
453 		if (buffered_len == 0)
454 			break;
455 
456 		data = sndvar->sndbuf->head +
457 				(seq - sndvar->sndbuf->head_seq);
458 
459 		if (buffered_len > maxlen) {
460 			len = maxlen;
461 		} else {
462 			len = buffered_len;
463 		}
464 
465 		if (len > window)
466 			len = window;
467 
468 		if (len <= 0)
469 			break;
470 
471 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
472 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
473 					"buffered_len: %u\n", seq, len, buffered_len);
474 		}
475 
476 		if (seq - sndvar->snd_una + len > window) {
477 			/* Ask for new window advertisement to peer */
478 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
479 				TRACE_DBG("Full peer window. "
480 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
481 					  sndvar->peer_wnd, seq - sndvar->snd_una);
482 				if (!wack_sent && TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
483 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
484 				} else
485 					wack_sent = 1;
486 			}
487 			packets = -3;
488 			goto out;
489 		}
490 
491 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
492 				TCP_FLAG_ACK, data, len);
493 		if (sndlen < 0) {
494 			packets = sndlen;
495 			goto out;
496 		}
497 		packets++;
498 
499 		window -= len;
500 	}
501 
502  out:
503 	SBUF_UNLOCK(&sndvar->write_lock);
504 	return packets;
505 }
506 /*----------------------------------------------------------------------------*/
507 static inline int
508 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
509 {
510 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
511 	int ret = 0;
512     int flag = 0;
513 
514     switch (cur_stream->state) {
515        case TCP_ST_SYN_SENT: 		/* Send SYN here */
516           flag = TCP_FLAG_SYN;
517           break;
518        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
519           cur_stream->snd_nxt = sndvar->iss;
520           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
521           break;
522        case TCP_ST_ESTABLISHED:     /* Send ACK here */
523        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
524        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
525        case TCP_ST_TIME_WAIT:       /* Send ACK here */
526           flag = TCP_FLAG_ACK;
527           break;
528        case TCP_ST_LAST_ACK:
529        case TCP_ST_FIN_WAIT_1:
530           /* if it is on ack_list, send it after sending ack */
531           if (sndvar->on_send_list || sndvar->on_ack_list)
532              return (-1);
533           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
534           break;
535        case TCP_ST_CLOSING:
536           if (sndvar->is_fin_sent) {
537              /* if the sequence is for FIN, send FIN */
538              flag = (cur_stream->snd_nxt == sndvar->fss) ?
539                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
540           } else {
541              /* if FIN is not sent, send fin with ack */
542              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
543           }
544        case TCP_ST_CLOSED_RSVD: /* Send RST here */
545           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
546                     cur_stream->id);
547           /* first flush the data and ack */
548           if (sndvar->on_send_list || sndvar->on_ack_list)
549              return (-1);
550           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
551           if (ret >= 0)
552              DestroyTCPStream(mtcp, cur_stream);
553           return (ret);
554        default:
555           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
556                       cur_stream->id);
557           assert(0); /* can't reach here! */
558           return (0);
559     }
560 
561     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
562 }
563 /*----------------------------------------------------------------------------*/
564 inline int
565 WriteTCPControlList(mtcp_manager_t mtcp,
566 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
567 {
568 	tcp_stream *cur_stream;
569 	tcp_stream *next, *last;
570 	int cnt = 0;
571 	int ret;
572 
573 	thresh = MIN(thresh, sender->control_list_cnt);
574 
575 	/* Send TCP control messages */
576 	cnt = 0;
577 	cur_stream = TAILQ_FIRST(&sender->control_list);
578 	last = TAILQ_LAST(&sender->control_list, control_head);
579 	while (cur_stream) {
580 		if (++cnt > thresh)
581 			break;
582 
583 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
584 				cnt, cur_stream->id);
585 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
586 
587 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
588 		sender->control_list_cnt--;
589 
590 		if (cur_stream->sndvar->on_control_list) {
591 			cur_stream->sndvar->on_control_list = FALSE;
592 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
593 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
594 			if (ret < 0) {
595 				TAILQ_INSERT_HEAD(&sender->control_list,
596 						cur_stream, sndvar->control_link);
597 				cur_stream->sndvar->on_control_list = TRUE;
598 				sender->control_list_cnt++;
599 				/* since there is no available write buffer, break */
600 				break;
601 			}
602 		} else {
603 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
604 		}
605 
606 		if (cur_stream == last)
607 			break;
608 		cur_stream = next;
609 	}
610 
611 	return cnt;
612 }
613 /*----------------------------------------------------------------------------*/
614 inline int
615 WriteTCPDataList(mtcp_manager_t mtcp,
616 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
617 {
618 	tcp_stream *cur_stream;
619 	tcp_stream *next, *last;
620 	int cnt = 0;
621 	int ret;
622 
623 	/* Send data */
624 	cnt = 0;
625 	cur_stream = TAILQ_FIRST(&sender->send_list);
626 	last = TAILQ_LAST(&sender->send_list, send_head);
627 	while (cur_stream) {
628 		if (++cnt > thresh)
629 			break;
630 
631 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
632 				cnt, cur_stream->id);
633 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
634 
635 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
636 		if (cur_stream->sndvar->on_send_list) {
637 			ret = 0;
638 
639 			/* Send data here */
640 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
641 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
642 				if (cur_stream->sndvar->on_control_list) {
643 					/* delay sending data after until on_control_list becomes off */
644 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
645 					ret = -1;
646 				} else {
647 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
648 				}
649 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
650 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
651 					cur_stream->state == TCP_ST_LAST_ACK) {
652 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
653 			} else {
654 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
655 						cur_stream->id, TCPStateToString(cur_stream));
656 #if DUMP_STREAM
657 				DumpStream(mtcp, cur_stream);
658 #endif
659 			}
660 
661 			if (ret < 0) {
662 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
663 				/* since there is no available write buffer, break */
664 				break;
665 
666 			} else {
667 				cur_stream->sndvar->on_send_list = FALSE;
668 				sender->send_list_cnt--;
669 				/* the ret value is the number of packets sent. */
670 				/* decrease ack_cnt for the piggybacked acks */
671 #if ACK_PIGGYBACK
672 				if (cur_stream->sndvar->ack_cnt > 0) {
673 					if (cur_stream->sndvar->ack_cnt > ret) {
674 						cur_stream->sndvar->ack_cnt -= ret;
675 					} else {
676 						cur_stream->sndvar->ack_cnt = 0;
677 					}
678 				}
679 #endif
680 #if 1
681 				if (cur_stream->control_list_waiting) {
682 					if (!cur_stream->sndvar->on_ack_list) {
683 						cur_stream->control_list_waiting = FALSE;
684 						AddtoControlList(mtcp, cur_stream, cur_ts);
685 					}
686 				}
687 #endif
688 			}
689 		} else {
690 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
691 #ifdef DUMP_STREAM
692 			DumpStream(mtcp, cur_stream);
693 #endif
694 		}
695 
696 		if (cur_stream == last)
697 			break;
698 		cur_stream = next;
699 	}
700 
701 	return cnt;
702 }
703 /*----------------------------------------------------------------------------*/
704 inline int
705 WriteTCPACKList(mtcp_manager_t mtcp,
706 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
707 {
708 	tcp_stream *cur_stream;
709 	tcp_stream *next, *last;
710 	int to_ack;
711 	int cnt = 0;
712 	int ret;
713 
714 	/* Send aggregated acks */
715 	cnt = 0;
716 	cur_stream = TAILQ_FIRST(&sender->ack_list);
717 	last = TAILQ_LAST(&sender->ack_list, ack_head);
718 	while (cur_stream) {
719 		if (++cnt > thresh)
720 			break;
721 
722 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
723 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
724 
725 		if (cur_stream->sndvar->on_ack_list) {
726 			/* this list is only to ack the data packets */
727 			/* if the ack is not data ack, then it will not process here */
728 			to_ack = FALSE;
729 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
730 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
731 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
732 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
733 					cur_stream->state == TCP_ST_TIME_WAIT) {
734 				/* TIMEWAIT is possible since the ack is queued
735 				   at FIN_WAIT_2 */
736 				tcprb_t *rb;
737 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
738 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
739 						(cur_stream->rcvvar->irs + 1) + rb->pile
740 						+ tcprb_cflen(rb))) {
741 					to_ack = TRUE;
742 				}
743 			} else {
744 				TRACE_DBG("Stream %u (%s): "
745 						"Try sending ack at not proper state. "
746 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
747 						cur_stream->id, TCPStateToString(cur_stream),
748 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
749 						cur_stream->sndvar->on_control_list);
750 #ifdef DUMP_STREAM
751 				DumpStream(mtcp, cur_stream);
752 #endif
753 			}
754 
755 			if (to_ack) {
756 				/* send the queued ack packets */
757 				while (cur_stream->sndvar->ack_cnt > 0) {
758 					ret = SendTCPPacket(mtcp, cur_stream,
759 							cur_ts, TCP_FLAG_ACK, NULL, 0);
760 					if (ret < 0) {
761 						/* since there is no available write buffer, break */
762 						break;
763 					}
764 					cur_stream->sndvar->ack_cnt--;
765 				}
766 
767 				/* if is_wack is set, send packet to get window advertisement */
768 				if (cur_stream->sndvar->is_wack) {
769 					cur_stream->sndvar->is_wack = FALSE;
770 					ret = SendTCPPacket(mtcp, cur_stream,
771 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
772 					if (ret < 0) {
773 						/* since there is no available write buffer, break */
774 						cur_stream->sndvar->is_wack = TRUE;
775 					}
776 				}
777 
778 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
779 					cur_stream->sndvar->on_ack_list = FALSE;
780 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
781 					sender->ack_list_cnt--;
782 				}
783 			} else {
784 				cur_stream->sndvar->on_ack_list = FALSE;
785 				cur_stream->sndvar->ack_cnt = 0;
786 				cur_stream->sndvar->is_wack = 0;
787 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
788 				sender->ack_list_cnt--;
789 			}
790 
791 			if (cur_stream->control_list_waiting) {
792 				if (!cur_stream->sndvar->on_send_list) {
793 					cur_stream->control_list_waiting = FALSE;
794 					AddtoControlList(mtcp, cur_stream, cur_ts);
795 				}
796 			}
797 		} else {
798 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
799 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
800 			sender->ack_list_cnt--;
801 #ifdef DUMP_STREAM
802 			thread_printf(mtcp, mtcp->log_fp,
803 					"Stream %u: not on ack list.\n", cur_stream->id);
804 			DumpStream(mtcp, cur_stream);
805 #endif
806 		}
807 
808 		if (cur_stream == last)
809 			break;
810 		cur_stream = next;
811 	}
812 
813 	return cnt;
814 }
815 /*----------------------------------------------------------------------------*/
816 inline struct mtcp_sender *
817 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
818 {
819 	if (cur_stream->sndvar->nif_out < 0) {
820 		return mtcp->g_sender;
821 
822 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
823 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
824 		return NULL;
825 
826 	} else {
827 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
828 	}
829 }
830 /*----------------------------------------------------------------------------*/
831 inline void
832 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
833 {
834 #if TRY_SEND_BEFORE_QUEUE
835 	int ret;
836 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
837 	assert(sender != NULL);
838 
839 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
840 	if (ret < 0) {
841 #endif
842 		if (!cur_stream->sndvar->on_control_list) {
843 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
844 			assert(sender != NULL);
845 
846 			cur_stream->sndvar->on_control_list = TRUE;
847 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
848 			sender->control_list_cnt++;
849 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
850 			//		cur_stream->id, sender->control_list_cnt);
851 		}
852 #if TRY_SEND_BEFORE_QUEUE
853 	} else {
854 		if (cur_stream->sndvar->on_control_list) {
855 			cur_stream->sndvar->on_control_list = FALSE;
856 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
857 			sender->control_list_cnt--;
858 		}
859 	}
860 #endif
861 }
862 /*----------------------------------------------------------------------------*/
863 inline void
864 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
865 {
866 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
867 	assert(sender != NULL);
868 
869 	if(!cur_stream->sndvar->sndbuf) {
870 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
871 				mtcp->ctx->cpu,
872 				cur_stream->id);
873 		assert(0);
874 		return;
875 	}
876 
877 	if (!cur_stream->sndvar->on_send_list) {
878 		cur_stream->sndvar->on_send_list = TRUE;
879 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
880 		sender->send_list_cnt++;
881 	}
882 }
883 /*----------------------------------------------------------------------------*/
884 inline void
885 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
886 {
887 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
888 	assert(sender != NULL);
889 
890 	if (!cur_stream->sndvar->on_ack_list) {
891 		cur_stream->sndvar->on_ack_list = TRUE;
892 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
893 		sender->ack_list_cnt++;
894 	}
895 }
896 /*----------------------------------------------------------------------------*/
897 inline void
898 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
899 {
900 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
901 	assert(sender != NULL);
902 
903 	if (cur_stream->sndvar->on_control_list) {
904 		cur_stream->sndvar->on_control_list = FALSE;
905 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
906 		sender->control_list_cnt--;
907 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
908 		//		cur_stream->id, sender->control_list_cnt);
909 	}
910 }
911 /*----------------------------------------------------------------------------*/
912 inline void
913 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
914 {
915 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
916 	assert(sender != NULL);
917 
918 	if (cur_stream->sndvar->on_send_list) {
919 		cur_stream->sndvar->on_send_list = FALSE;
920 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
921 		sender->send_list_cnt--;
922 	}
923 }
924 /*----------------------------------------------------------------------------*/
925 inline void
926 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
927 {
928 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
929 	assert(sender != NULL);
930 
931 	if (cur_stream->sndvar->on_ack_list) {
932 		cur_stream->sndvar->on_ack_list = FALSE;
933 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
934 		sender->ack_list_cnt--;
935 	}
936 }
937 /*----------------------------------------------------------------------------*/
938 inline void
939 EnqueueACK(mtcp_manager_t mtcp,
940 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
941 {
942 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
943 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
944 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
945 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
946 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
947 				cur_stream->id, TCPStateToString(cur_stream));
948 	}
949 
950 	if (opt == ACK_OPT_NOW) {
951 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
952 			cur_stream->sndvar->ack_cnt++;
953 		}
954 	} else if (opt == ACK_OPT_AGGREGATE) {
955 		if (cur_stream->sndvar->ack_cnt == 0) {
956 			cur_stream->sndvar->ack_cnt = 1;
957 		}
958 	} else if (opt == ACK_OPT_WACK) {
959 		cur_stream->sndvar->is_wack = TRUE;
960 	}
961 	AddtoACKList(mtcp, cur_stream);
962 }
963 /*----------------------------------------------------------------------------*/
964 inline void
965 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
966 {
967 	tcp_stream *stream;
968 
969 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
970 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
971 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
972 	}
973 }
974 /*----------------------------------------------------------------------------*/
975 static inline void
976 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
977 				    struct pkt_ctx *pctx)
978 {
979 	assert(cur_stream);
980 	assert(pctx);
981 
982 	/* add event */
983 	if (cur_stream->state < TCP_ST_SYN_SENT) {
984 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
985 		cur_stream->cb_events |= MOS_ON_CONN_START;
986 	}
987 	/* initialize TCP send variables of send-side stream */
988 	cur_stream->sndvar->cwnd = 1;
989 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
990 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
991 	cur_stream->sndvar->iss = pctx->p.seq;
992 	cur_stream->snd_nxt = pctx->p.seq + 1;
993 	cur_stream->state = TCP_ST_SYN_SENT;
994 	cur_stream->last_active_ts = pctx->p.cur_ts;
995 
996 	/* receive-side conn start event can also be tagged here */
997 	/* blocked since tcp_in.c takes care of this.. */
998 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
999 }
1000 /*----------------------------------------------------------------------------*/
1001 /**
1002  * Called (when monitoring mode is enabled).. for every incoming packet from the
1003  * NIC.
1004  */
1005 void
1006 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
1007 			    struct pkt_ctx *pctx)
1008 {
1009 	struct tcphdr *tcph;
1010 
1011 	assert(cur_stream);
1012 	tcph = pctx->p.tcph;
1013 
1014 	/* if it is a new TCP stream from client */
1015 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1016 		TRACE_STATE("Stream %d: %s\n",
1017 			    cur_stream->id, TCPStateToString(cur_stream));
1018 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1019 		AddtoTimeoutList(mtcp, cur_stream);
1020 		return;
1021 	}
1022 
1023 	if (tcph->ack) {
1024 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1025 		cur_stream->last_active_ts = pctx->p.cur_ts;
1026 	}
1027 
1028 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1029 
1030 	/* test for reset packet */
1031 	if (tcph->rst) {
1032 		cur_stream->have_reset = TRUE;
1033 		/* test for reset packet */
1034 		cur_stream->state = TCP_ST_CLOSED_RSVD;
1035 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1036 		TRACE_STATE("Stream %d: %s\n",
1037 				cur_stream->id,
1038 				TCPStateToString(cur_stream));
1039 		return;
1040 	}
1041 
1042 	/*
1043 	 * for all others, state transitioning is based on
1044 	 * current tcp_stream state
1045 	 */
1046 	switch (cur_stream->state) {
1047 	case TCP_ST_SYN_SENT:
1048 		/* control should not come here */
1049 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1050 #ifdef BE_RESILIENT_TO_PACKET_DROP
1051 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1052 			cur_stream->state = TCP_ST_ESTABLISHED;
1053 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1054 			cur_stream->snd_nxt = pctx->p.seq;
1055 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1056 			goto __Handle_TCP_ST_ESTABLISHED;
1057 		}
1058 #endif
1059 		break;
1060 	case TCP_ST_SYN_RCVD:
1061 		if (!tcph->ack)
1062 			break;
1063 
1064 		if (tcph->syn) {
1065 			cur_stream->sndvar->iss = pctx->p.seq;
1066 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1067 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1068 				  "setting seq: %u = iss\n",
1069 				  cur_stream->id, pctx->p.seq);
1070 		}
1071 #ifdef BE_RESILIENT_TO_PACKET_DROP
1072 		else {
1073 			cur_stream->state = TCP_ST_ESTABLISHED;
1074 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1075 			cur_stream->snd_nxt = pctx->p.seq;
1076 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1077 			goto __Handle_TCP_ST_ESTABLISHED;
1078 		}
1079 #endif
1080 		TRACE_STATE("Stream %d: %s\n",
1081 			    cur_stream->id,
1082 			    TCPStateToString(cur_stream));
1083 		break;
1084 	case TCP_ST_ESTABLISHED:
1085 #ifdef BE_RESILIENT_TO_PACKET_DROP
1086 __Handle_TCP_ST_ESTABLISHED:
1087 #endif
1088 		/* if application decides to close, fin pkt is sent */
1089 #ifdef BE_RESILIENT_TO_PACKET_DROP
1090 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1091 		{
1092 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1093 					"Move rcv_nxt from %u to %u.\n",
1094 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1095 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1096 		}
1097 #endif
1098 		if (tcph->fin) {
1099 			cur_stream->state = TCP_ST_FIN_WAIT_1;
1100 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1101 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1102 			cur_stream->sndvar->is_fin_sent = TRUE;
1103 			cur_stream->snd_nxt++;
1104 			TRACE_STATE("Stream %d: %s\n",
1105 				    cur_stream->id,
1106 				    TCPStateToString(cur_stream));
1107 		} else {
1108 			/* creating tcp send buffer still pending.. */
1109 			/* do we need peek for send buffer? */
1110 		}
1111 		break;
1112 	case TCP_ST_CLOSE_WAIT:
1113 		/* if application decides to close, fin pkt is sent */
1114 #ifdef BE_RESILIENT_TO_PACKET_DROP
1115 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1116 		{
1117 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1118 					"Move rcv_nxt from %u to %u.\n",
1119 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1120 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1121 		}
1122 #endif
1123 		if (tcph->fin) {
1124 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1125 			cur_stream->sndvar->is_fin_sent = TRUE;
1126 			cur_stream->snd_nxt++;
1127 
1128 			/* verify whether the FIN from the other end is acked */
1129 			if ((tcph->ack) && (ntohl(tcph->ack_seq) == cur_stream->rcv_nxt))
1130 				cur_stream->state = TCP_ST_LAST_ACK;
1131 			else
1132 				cur_stream->state = TCP_ST_CLOSING;
1133 
1134 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1135 			TRACE_STATE("Stream %d: %s\n",
1136 				    cur_stream->id,
1137 				    TCPStateToString(cur_stream));
1138 		} else if (tcph->ack) {
1139 			TRACE_STATE("Stream %d: %s\n",
1140 				    cur_stream->id,
1141 				    TCPStateToString(cur_stream));
1142 		}
1143 		break;
1144 	case TCP_ST_LAST_ACK:
1145 		/* control should not come here */
1146 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1147 		break;
1148 	case TCP_ST_FIN_WAIT_1:
1149 		/* control should not come here */
1150 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1151 		break;
1152 	case TCP_ST_FIN_WAIT_2:
1153 		/* control should not come here */
1154 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1155 		break;
1156 	case TCP_ST_CLOSING:
1157 		/* control should not come here */
1158 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1159 		break;
1160 	case TCP_ST_TIME_WAIT:
1161 		/* control may come here but... */
1162 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1163 		if (tcph->ack) {
1164 			TRACE_STATE("Stream %d: %s\n",
1165 				    cur_stream->id,
1166 				    TCPStateToString(cur_stream));
1167 		}
1168 		break;
1169 	case TCP_ST_CLOSED:
1170 	case TCP_ST_CLOSED_RSVD:
1171 		/* Waiting to be destroyed */
1172 		break;
1173 	default:
1174 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1175 			  "tcph->syn: %d, tcph->ack: %d\n",
1176 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
1177 			  pctx->p.tcph->ack);
1178 		assert(0);
1179 		/* This will be enabled once passiverecvcontext is completed */
1180 		/*exit(EXIT_FAILURE);*/
1181 	}
1182 
1183 	UNUSED(mtcp);
1184 	return;
1185 }
1186 /*----------------------------------------------------------------------------*/
1187 void
1188 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1189 		  struct tcp_stream *recvside_stream,
1190 		  struct tcp_stream *sendside_stream)
1191 {
1192 	/* this is empty for the time being */
1193 }
1194 /*----------------------------------------------------------------------------*/
1195