xref: /mOS-networking-stack/core/src/tcp_out.c (revision dcdbbb98)
1 #include <unistd.h>
2 #include <string.h>
3 
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15 
16 #define TCP_CALCULATE_CHECKSUM		TRUE
17 #define ACK_PIGGYBACK			TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
20 
21 #define TCP_MAX_WINDOW 65535
22 
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25 
26 /*----------------------------------------------------------------------------*/
27 static inline uint16_t
28 CalculateOptionLength(uint8_t flags)
29 {
30 	uint16_t optlen = 0;
31 
32 	if (flags & TCP_FLAG_SYN) {
33 		optlen += TCP_OPT_MSS_LEN;
34 #if TCP_OPT_SACK_ENABLED
35 		optlen += TCP_OPT_SACK_PERMIT_LEN;
36 #if !TCP_OPT_TIMESTAMP_ENABLED
37 		optlen += 2;	// insert NOP padding
38 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
39 #endif /* TCP_OPT_SACK_ENABLED */
40 
41 #if TCP_OPT_TIMESTAMP_ENABLED
42 		optlen += TCP_OPT_TIMESTAMP_LEN;
43 #if !TCP_OPT_SACK_ENABLED
44 		optlen += 2;	// insert NOP padding
45 #endif /* TCP_OPT_SACK_ENABLED */
46 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
47 
48 		optlen += TCP_OPT_WSCALE_LEN + 1;
49 
50 	} else {
51 
52 #if TCP_OPT_TIMESTAMP_ENABLED
53 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
54 #endif
55 
56 #if TCP_OPT_SACK_ENABLED
57 		if (flags & TCP_FLAG_SACK) {
58 			optlen += TCP_OPT_SACK_LEN + 2;
59 		}
60 #endif
61 	}
62 
63 	assert(optlen % 4 == 0);
64 
65 	return optlen;
66 }
67 /*----------------------------------------------------------------------------*/
68 static inline void
69 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
70 {
71 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
72 
73 	tcpopt[0] = TCP_OPT_TIMESTAMP;
74 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
75 	ts[0] = htonl(cur_ts);
76 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
77 }
78 /*----------------------------------------------------------------------------*/
79 static inline void
80 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
81 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
82 {
83 	int i = 0;
84 
85 	if (flags & TCP_FLAG_SYN) {
86 		uint16_t mss;
87 
88 		/* MSS option */
89 		mss = cur_stream->sndvar->mss;
90 		tcpopt[i++] = TCP_OPT_MSS;
91 		tcpopt[i++] = TCP_OPT_MSS_LEN;
92 		tcpopt[i++] = mss >> 8;
93 		tcpopt[i++] = mss % 256;
94 
95 		/* SACK permit */
96 #if TCP_OPT_SACK_ENABLED
97 #if !TCP_OPT_TIMESTAMP_ENABLED
98 		tcpopt[i++] = TCP_OPT_NOP;
99 		tcpopt[i++] = TCP_OPT_NOP;
100 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
101 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
102 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
103 		TRACE_SACK("Local SACK permited.\n");
104 #endif /* TCP_OPT_SACK_ENABLED */
105 
106 		/* Timestamp */
107 #if TCP_OPT_TIMESTAMP_ENABLED
108 #if !TCP_OPT_SACK_ENABLED
109 		tcpopt[i++] = TCP_OPT_NOP;
110 		tcpopt[i++] = TCP_OPT_NOP;
111 #endif /* TCP_OPT_SACK_ENABLED */
112 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
113 		i += TCP_OPT_TIMESTAMP_LEN;
114 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
115 
116 		/* Window scale */
117 		tcpopt[i++] = TCP_OPT_NOP;
118 		tcpopt[i++] = TCP_OPT_WSCALE;
119 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
120 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
121 
122 	} else {
123 
124 #if TCP_OPT_TIMESTAMP_ENABLED
125 		tcpopt[i++] = TCP_OPT_NOP;
126 		tcpopt[i++] = TCP_OPT_NOP;
127 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
128 		i += TCP_OPT_TIMESTAMP_LEN;
129 #endif
130 
131 #if TCP_OPT_SACK_ENABLED
132 		if (flags & TCP_OPT_SACK) {
133 			// TODO: implement SACK support
134 		}
135 #endif
136 	}
137 
138 	assert (i == optlen);
139 }
140 /*----------------------------------------------------------------------------*/
141 int
142 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
143 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
144 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
145 		uint8_t *payload, uint16_t payloadlen,
146 		uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
147 {
148 	struct tcphdr *tcph;
149 	uint8_t *tcpopt;
150 	uint32_t *ts;
151 	uint16_t optlen;
152 	struct pkt_ctx pctx;
153 	int rc = -1;
154 
155 	memset(&pctx, 0, sizeof(pctx));
156 	pctx.p.in_ifidx = in_ifidx;
157 	optlen = CalculateOptionLength(flags);
158 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
159 		TRACE_ERROR("Payload size exceeds MSS.\n");
160 		assert(0);
161 		return ERROR;
162 	}
163 
164 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
165 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
166 	if (tcph == NULL) {
167 		return ERROR;
168 	}
169 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
170 
171 	tcph->source = sport;
172 	tcph->dest = dport;
173 
174 	if (flags & TCP_FLAG_SYN)
175 		tcph->syn = TRUE;
176 	if (flags & TCP_FLAG_FIN)
177 		tcph->fin = TRUE;
178 	if (flags & TCP_FLAG_RST)
179 		tcph->rst = TRUE;
180 	if (flags & TCP_FLAG_PSH)
181 		tcph->psh = TRUE;
182 
183 	tcph->seq = htonl(seq);
184 	if (flags & TCP_FLAG_ACK) {
185 		tcph->ack = TRUE;
186 		tcph->ack_seq = htonl(ack_seq);
187 	}
188 
189 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
190 
191 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
192 	ts = (uint32_t *)(tcpopt + 4);
193 
194 	tcpopt[0] = TCP_OPT_NOP;
195 	tcpopt[1] = TCP_OPT_NOP;
196 	tcpopt[2] = TCP_OPT_TIMESTAMP;
197 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
198 	ts[0] = htonl(cur_ts);
199 	ts[1] = htonl(echo_ts);
200 
201 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
202 	// copy payload if exist
203 	if (payloadlen > 0) {
204 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
205 	}
206 
207 #if TCP_CALCULATE_CHECKSUM
208 	/* offload TCP checkum if possible */
209 	if (likely(mtcp->iom->dev_ioctl != NULL))
210 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
211 					  pctx.out_ifidx,
212 					  PKT_TX_TCP_CSUM,
213 					  pctx.p.iph);
214 	/* otherwise calculate TCP checksum in S/W */
215 	if (rc == -1)
216 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
217 					      TCP_HEADER_LEN +
218 					      optlen + payloadlen,
219 					      saddr, daddr);
220 #endif
221 
222 	if (tcph->syn || tcph->fin) {
223 		payloadlen++;
224 	}
225 
226 	struct mon_listener *walk;
227 	/* callback for monitor raw socket */
228 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
229 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
230 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
231 				       &pctx, MOS_ON_PKT_IN);
232 	return payloadlen;
233 }
234 /*----------------------------------------------------------------------------*/
235 int
236 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
237 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
238 {
239 	struct tcphdr *tcph;
240 	uint16_t optlen;
241 	uint8_t wscale = 0;
242 	uint32_t window32 = 0;
243 	struct pkt_ctx pctx;
244 	int rc = -1;
245 
246 	memset(&pctx, 0, sizeof(pctx));
247 	optlen = CalculateOptionLength(flags);
248 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
249 		TRACE_ERROR("Payload size exceeds MSS\n");
250 		return ERROR;
251 	}
252 
253 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
254 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
255 	if (tcph == NULL) {
256 		return -2;
257 	}
258 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
259 
260 	tcph->source = cur_stream->sport;
261 	tcph->dest = cur_stream->dport;
262 
263 	if (flags & TCP_FLAG_SYN) {
264 		tcph->syn = TRUE;
265 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
266 			TRACE_DBG("Stream %d: weird SYN sequence. "
267 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
268 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
269 		}
270 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
271 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
272 	}
273 	if (flags & TCP_FLAG_RST) {
274 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
275 		tcph->rst = TRUE;
276 	}
277 	if (flags & TCP_FLAG_PSH)
278 		tcph->psh = TRUE;
279 
280 	if (flags & TCP_FLAG_WACK) {
281 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
282 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
283 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
284 				cur_stream->id,
285 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
286 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
287 	} else if (flags & TCP_FLAG_FIN) {
288 		tcph->fin = TRUE;
289 
290 		if (cur_stream->sndvar->fss == 0) {
291 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
292 					cur_stream->id, cur_stream->closed);
293 		}
294 		tcph->seq = htonl(cur_stream->sndvar->fss);
295 		cur_stream->sndvar->is_fin_sent = TRUE;
296 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
297 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
298 	} else {
299 		tcph->seq = htonl(cur_stream->snd_nxt);
300 	}
301 
302 	if (flags & TCP_FLAG_ACK) {
303 		tcph->ack = TRUE;
304 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
305 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
306 		cur_stream->last_active_ts = cur_ts;
307 		UpdateTimeoutList(mtcp, cur_stream);
308 	}
309 
310 	if (flags & TCP_FLAG_SYN) {
311 		wscale = 0;
312 	} else {
313 		wscale = cur_stream->sndvar->wscale_mine;
314 	}
315 
316 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
317 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
318 	/* if the advertised window is 0, we need to advertise again later */
319 	if (window32 == 0) {
320 		cur_stream->need_wnd_adv = TRUE;
321 	}
322 
323 	GenerateTCPOptions(cur_stream, cur_ts, flags,
324 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
325 
326 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
327 	// copy payload if exist
328 	if (payloadlen > 0) {
329 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
330 	}
331 
332 #if TCP_CALCULATE_CHECKSUM
333 	if (likely(mtcp->iom->dev_ioctl != NULL))
334 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
335 					  pctx.out_ifidx,
336 					  PKT_TX_TCP_CSUM,
337 					  pctx.p.iph);
338 	if (rc == -1)
339 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
340 					      TCP_HEADER_LEN +
341 					      optlen + payloadlen,
342 					      cur_stream->saddr,
343 					      cur_stream->daddr);
344 #endif
345 	cur_stream->snd_nxt += payloadlen;
346 
347 	if (tcph->syn || tcph->fin) {
348 		cur_stream->snd_nxt++;
349 		payloadlen++;
350 	}
351 
352 	if (payloadlen > 0) {
353 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
354 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
355 				  payloadlen, cur_stream->snd_nxt);
356 		}
357 
358 		/* update retransmission timer if have payload */
359 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
360 		TRACE_RTO("Updating retransmission timer. "
361 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
362 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
363 		AddtoRTOList(mtcp, cur_stream);
364 	}
365 
366 	struct mon_listener *walk;
367 	/* callback for monitor raw socket */
368 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
369 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
370 			HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
371 				       &pctx, MOS_ON_PKT_IN);
372 
373 	if (mtcp->num_msp /* this means that stream monitor is on */) {
374 		FillPacketContextTCPInfo(&pctx, tcph);
375 
376 		/* New abstraction for monitor stream */
377 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
378 		struct tcp_stream *sendside_stream = cur_stream;
379 
380 		if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
381 			pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
382 					pctx.p.seq, recvside_stream->rcvvar->irs + 1);
383 
384 		UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
385 	}
386 
387 #ifdef PKTDUMP
388 	DumpPacket(mtcp,
389 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
390 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
391 			"OUT", -1);
392 #endif
393 
394 
395 	return payloadlen;
396 }
397 /*----------------------------------------------------------------------------*/
398 static int
399 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
400 {
401 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
402 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
403 	uint8_t *data;
404 	uint32_t buffered_len;
405 	uint32_t seq;
406 	uint16_t len;
407 	int16_t sndlen;
408 	uint32_t window;
409 	int packets = 0;
410 
411 	if (!sndvar->sndbuf) {
412 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
413 		assert(0);
414 		return 0;
415 	}
416 
417 	SBUF_LOCK(&sndvar->write_lock);
418 
419 	if (sndvar->sndbuf->len == 0) {
420 		packets = 0;
421 		goto out;
422 	}
423 
424 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
425 
426 	while (1) {
427 		seq = cur_stream->snd_nxt;
428 
429 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
430 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
431 					"state: %s, seq: %u, head_seq: %u.\n",
432 					cur_stream->id, TCPStateToString(cur_stream),
433 					seq, sndvar->sndbuf->head_seq);
434 			assert(0);
435 			break;
436 		}
437 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
438 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
439 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
440 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
441 					sndvar->sndbuf->len, seq, buffered_len);
442 		}
443 		if (buffered_len == 0)
444 			break;
445 
446 		data = sndvar->sndbuf->head +
447 				(seq - sndvar->sndbuf->head_seq);
448 
449 		if (buffered_len > maxlen) {
450 			len = maxlen;
451 		} else {
452 			len = buffered_len;
453 		}
454 
455 		if (len <= 0)
456 			break;
457 
458 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
459 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
460 					"buffered_len: %u\n", seq, len, buffered_len);
461 		}
462 
463 		if (seq - sndvar->snd_una + len > window) {
464 			/* Ask for new window advertisement to peer */
465 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
466 				TRACE_DBG("Full peer window. "
467 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
468 					  sndvar->peer_wnd, seq - sndvar->snd_una);
469 				if (TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
470 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
471 				}
472 			}
473 			packets = -3;
474 			goto out;
475 		}
476 
477 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
478 				TCP_FLAG_ACK, data, len);
479 		if (sndlen < 0) {
480 			packets = sndlen;
481 			goto out;
482 		}
483 		packets++;
484 	}
485 
486  out:
487 	SBUF_UNLOCK(&sndvar->write_lock);
488 	return packets;
489 }
490 /*----------------------------------------------------------------------------*/
491 static inline int
492 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
493 {
494 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
495 	int ret = 0;
496     int flag = 0;
497 
498     switch (cur_stream->state) {
499        case TCP_ST_SYN_SENT: 		/* Send SYN here */
500           flag = TCP_FLAG_SYN;
501           break;
502        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
503           cur_stream->snd_nxt = sndvar->iss;
504           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
505           break;
506        case TCP_ST_ESTABLISHED:     /* Send ACK here */
507        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
508        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
509        case TCP_ST_TIME_WAIT:       /* Send ACK here */
510           flag = TCP_FLAG_ACK;
511           break;
512        case TCP_ST_LAST_ACK:
513        case TCP_ST_FIN_WAIT_1:
514           /* if it is on ack_list, send it after sending ack */
515           if (sndvar->on_send_list || sndvar->on_ack_list)
516              return (-1);
517           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
518           break;
519        case TCP_ST_CLOSING:
520           if (sndvar->is_fin_sent) {
521              /* if the sequence is for FIN, send FIN */
522              flag = (cur_stream->snd_nxt == sndvar->fss) ?
523                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
524           } else {
525              /* if FIN is not sent, send fin with ack */
526              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
527           }
528        case TCP_ST_CLOSED_RSVD: /* Send RST here */
529           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
530                     cur_stream->id);
531           /* first flush the data and ack */
532           if (sndvar->on_send_list || sndvar->on_ack_list)
533              return (-1);
534           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
535           if (ret >= 0)
536              DestroyTCPStream(mtcp, cur_stream);
537           return (ret);
538        default:
539           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
540                       cur_stream->id);
541           assert(0); /* can't reach here! */
542           return (0);
543     }
544 
545     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
546 }
547 /*----------------------------------------------------------------------------*/
548 inline int
549 WriteTCPControlList(mtcp_manager_t mtcp,
550 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
551 {
552 	tcp_stream *cur_stream;
553 	tcp_stream *next, *last;
554 	int cnt = 0;
555 	int ret;
556 
557 	thresh = MIN(thresh, sender->control_list_cnt);
558 
559 	/* Send TCP control messages */
560 	cnt = 0;
561 	cur_stream = TAILQ_FIRST(&sender->control_list);
562 	last = TAILQ_LAST(&sender->control_list, control_head);
563 	while (cur_stream) {
564 		if (++cnt > thresh)
565 			break;
566 
567 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
568 				cnt, cur_stream->id);
569 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
570 
571 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
572 		sender->control_list_cnt--;
573 
574 		if (cur_stream->sndvar->on_control_list) {
575 			cur_stream->sndvar->on_control_list = FALSE;
576 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
577 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
578 			if (ret < 0) {
579 				TAILQ_INSERT_HEAD(&sender->control_list,
580 						cur_stream, sndvar->control_link);
581 				cur_stream->sndvar->on_control_list = TRUE;
582 				sender->control_list_cnt++;
583 				/* since there is no available write buffer, break */
584 				break;
585 			}
586 		} else {
587 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
588 		}
589 
590 		if (cur_stream == last)
591 			break;
592 		cur_stream = next;
593 	}
594 
595 	return cnt;
596 }
597 /*----------------------------------------------------------------------------*/
598 inline int
599 WriteTCPDataList(mtcp_manager_t mtcp,
600 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
601 {
602 	tcp_stream *cur_stream;
603 	tcp_stream *next, *last;
604 	int cnt = 0;
605 	int ret;
606 
607 	/* Send data */
608 	cnt = 0;
609 	cur_stream = TAILQ_FIRST(&sender->send_list);
610 	last = TAILQ_LAST(&sender->send_list, send_head);
611 	while (cur_stream) {
612 		if (++cnt > thresh)
613 			break;
614 
615 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
616 				cnt, cur_stream->id);
617 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
618 
619 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
620 		if (cur_stream->sndvar->on_send_list) {
621 			ret = 0;
622 
623 			/* Send data here */
624 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
625 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
626 				if (cur_stream->sndvar->on_control_list) {
627 					/* delay sending data after until on_control_list becomes off */
628 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
629 					ret = -1;
630 				} else {
631 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
632 				}
633 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
634 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
635 					cur_stream->state == TCP_ST_LAST_ACK) {
636 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
637 			} else {
638 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
639 						cur_stream->id, TCPStateToString(cur_stream));
640 #if DUMP_STREAM
641 				DumpStream(mtcp, cur_stream);
642 #endif
643 			}
644 
645 			if (ret < 0) {
646 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
647 				/* since there is no available write buffer, break */
648 				break;
649 
650 			} else {
651 				cur_stream->sndvar->on_send_list = FALSE;
652 				sender->send_list_cnt--;
653 				/* the ret value is the number of packets sent. */
654 				/* decrease ack_cnt for the piggybacked acks */
655 #if ACK_PIGGYBACK
656 				if (cur_stream->sndvar->ack_cnt > 0) {
657 					if (cur_stream->sndvar->ack_cnt > ret) {
658 						cur_stream->sndvar->ack_cnt -= ret;
659 					} else {
660 						cur_stream->sndvar->ack_cnt = 0;
661 					}
662 				}
663 #endif
664 #if 1
665 				if (cur_stream->control_list_waiting) {
666 					if (!cur_stream->sndvar->on_ack_list) {
667 						cur_stream->control_list_waiting = FALSE;
668 						AddtoControlList(mtcp, cur_stream, cur_ts);
669 					}
670 				}
671 #endif
672 			}
673 		} else {
674 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
675 #ifdef DUMP_STREAM
676 			DumpStream(mtcp, cur_stream);
677 #endif
678 		}
679 
680 		if (cur_stream == last)
681 			break;
682 		cur_stream = next;
683 	}
684 
685 	return cnt;
686 }
687 /*----------------------------------------------------------------------------*/
688 inline int
689 WriteTCPACKList(mtcp_manager_t mtcp,
690 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
691 {
692 	tcp_stream *cur_stream;
693 	tcp_stream *next, *last;
694 	int to_ack;
695 	int cnt = 0;
696 	int ret;
697 
698 	/* Send aggregated acks */
699 	cnt = 0;
700 	cur_stream = TAILQ_FIRST(&sender->ack_list);
701 	last = TAILQ_LAST(&sender->ack_list, ack_head);
702 	while (cur_stream) {
703 		if (++cnt > thresh)
704 			break;
705 
706 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
707 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
708 
709 		if (cur_stream->sndvar->on_ack_list) {
710 			/* this list is only to ack the data packets */
711 			/* if the ack is not data ack, then it will not process here */
712 			to_ack = FALSE;
713 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
714 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
715 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
716 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
717 					cur_stream->state == TCP_ST_TIME_WAIT) {
718 				/* TIMEWAIT is possible since the ack is queued
719 				   at FIN_WAIT_2 */
720 				tcprb_t *rb;
721 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
722 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
723 						(cur_stream->rcvvar->irs + 1) + rb->pile
724 						+ tcprb_cflen(rb))) {
725 					to_ack = TRUE;
726 				}
727 			} else {
728 				TRACE_DBG("Stream %u (%s): "
729 						"Try sending ack at not proper state. "
730 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
731 						cur_stream->id, TCPStateToString(cur_stream),
732 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
733 						cur_stream->sndvar->on_control_list);
734 #ifdef DUMP_STREAM
735 				DumpStream(mtcp, cur_stream);
736 #endif
737 			}
738 
739 			if (to_ack) {
740 				/* send the queued ack packets */
741 				while (cur_stream->sndvar->ack_cnt > 0) {
742 					ret = SendTCPPacket(mtcp, cur_stream,
743 							cur_ts, TCP_FLAG_ACK, NULL, 0);
744 					if (ret < 0) {
745 						/* since there is no available write buffer, break */
746 						break;
747 					}
748 					cur_stream->sndvar->ack_cnt--;
749 				}
750 
751 				/* if is_wack is set, send packet to get window advertisement */
752 				if (cur_stream->sndvar->is_wack) {
753 					cur_stream->sndvar->is_wack = FALSE;
754 					ret = SendTCPPacket(mtcp, cur_stream,
755 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
756 					if (ret < 0) {
757 						/* since there is no available write buffer, break */
758 						cur_stream->sndvar->is_wack = TRUE;
759 					}
760 				}
761 
762 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
763 					cur_stream->sndvar->on_ack_list = FALSE;
764 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
765 					sender->ack_list_cnt--;
766 				}
767 			} else {
768 				cur_stream->sndvar->on_ack_list = FALSE;
769 				cur_stream->sndvar->ack_cnt = 0;
770 				cur_stream->sndvar->is_wack = 0;
771 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
772 				sender->ack_list_cnt--;
773 			}
774 
775 			if (cur_stream->control_list_waiting) {
776 				if (!cur_stream->sndvar->on_send_list) {
777 					cur_stream->control_list_waiting = FALSE;
778 					AddtoControlList(mtcp, cur_stream, cur_ts);
779 				}
780 			}
781 		} else {
782 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
783 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
784 			sender->ack_list_cnt--;
785 #ifdef DUMP_STREAM
786 			thread_printf(mtcp, mtcp->log_fp,
787 					"Stream %u: not on ack list.\n", cur_stream->id);
788 			DumpStream(mtcp, cur_stream);
789 #endif
790 		}
791 
792 		if (cur_stream == last)
793 			break;
794 		cur_stream = next;
795 	}
796 
797 	return cnt;
798 }
799 /*----------------------------------------------------------------------------*/
800 inline struct mtcp_sender *
801 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
802 {
803 	if (cur_stream->sndvar->nif_out < 0) {
804 		return mtcp->g_sender;
805 
806 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
807 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
808 		return NULL;
809 
810 	} else {
811 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
812 	}
813 }
814 /*----------------------------------------------------------------------------*/
815 inline void
816 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
817 {
818 #if TRY_SEND_BEFORE_QUEUE
819 	int ret;
820 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
821 	assert(sender != NULL);
822 
823 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
824 	if (ret < 0) {
825 #endif
826 		if (!cur_stream->sndvar->on_control_list) {
827 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
828 			assert(sender != NULL);
829 
830 			cur_stream->sndvar->on_control_list = TRUE;
831 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
832 			sender->control_list_cnt++;
833 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
834 			//		cur_stream->id, sender->control_list_cnt);
835 		}
836 #if TRY_SEND_BEFORE_QUEUE
837 	} else {
838 		if (cur_stream->sndvar->on_control_list) {
839 			cur_stream->sndvar->on_control_list = FALSE;
840 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
841 			sender->control_list_cnt--;
842 		}
843 	}
844 #endif
845 }
846 /*----------------------------------------------------------------------------*/
847 inline void
848 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
849 {
850 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
851 	assert(sender != NULL);
852 
853 	if(!cur_stream->sndvar->sndbuf) {
854 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
855 				mtcp->ctx->cpu,
856 				cur_stream->id);
857 		assert(0);
858 		return;
859 	}
860 
861 	if (!cur_stream->sndvar->on_send_list) {
862 		cur_stream->sndvar->on_send_list = TRUE;
863 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
864 		sender->send_list_cnt++;
865 	}
866 }
867 /*----------------------------------------------------------------------------*/
868 inline void
869 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
870 {
871 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
872 	assert(sender != NULL);
873 
874 	if (!cur_stream->sndvar->on_ack_list) {
875 		cur_stream->sndvar->on_ack_list = TRUE;
876 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
877 		sender->ack_list_cnt++;
878 	}
879 }
880 /*----------------------------------------------------------------------------*/
881 inline void
882 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
883 {
884 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
885 	assert(sender != NULL);
886 
887 	if (cur_stream->sndvar->on_control_list) {
888 		cur_stream->sndvar->on_control_list = FALSE;
889 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
890 		sender->control_list_cnt--;
891 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
892 		//		cur_stream->id, sender->control_list_cnt);
893 	}
894 }
895 /*----------------------------------------------------------------------------*/
896 inline void
897 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
898 {
899 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
900 	assert(sender != NULL);
901 
902 	if (cur_stream->sndvar->on_send_list) {
903 		cur_stream->sndvar->on_send_list = FALSE;
904 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
905 		sender->send_list_cnt--;
906 	}
907 }
908 /*----------------------------------------------------------------------------*/
909 inline void
910 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
911 {
912 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
913 	assert(sender != NULL);
914 
915 	if (cur_stream->sndvar->on_ack_list) {
916 		cur_stream->sndvar->on_ack_list = FALSE;
917 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
918 		sender->ack_list_cnt--;
919 	}
920 }
921 /*----------------------------------------------------------------------------*/
922 inline void
923 EnqueueACK(mtcp_manager_t mtcp,
924 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
925 {
926 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
927 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
928 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
929 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
930 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
931 				cur_stream->id, TCPStateToString(cur_stream));
932 	}
933 
934 	if (opt == ACK_OPT_NOW) {
935 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
936 			cur_stream->sndvar->ack_cnt++;
937 		}
938 	} else if (opt == ACK_OPT_AGGREGATE) {
939 		if (cur_stream->sndvar->ack_cnt == 0) {
940 			cur_stream->sndvar->ack_cnt = 1;
941 		}
942 	} else if (opt == ACK_OPT_WACK) {
943 		cur_stream->sndvar->is_wack = TRUE;
944 	}
945 	AddtoACKList(mtcp, cur_stream);
946 }
947 /*----------------------------------------------------------------------------*/
948 inline void
949 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
950 {
951 	tcp_stream *stream;
952 
953 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
954 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
955 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
956 	}
957 }
958 /*----------------------------------------------------------------------------*/
959 static inline void
960 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
961 				    struct pkt_ctx *pctx)
962 {
963 	assert(cur_stream);
964 	assert(pctx);
965 
966 	/* add event */
967 	if (cur_stream->state < TCP_ST_SYN_SENT) {
968 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
969 		cur_stream->cb_events |= MOS_ON_CONN_START;
970 	}
971 	/* initialize TCP send variables of send-side stream */
972 	cur_stream->sndvar->cwnd = 1;
973 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
974 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
975 	cur_stream->sndvar->iss = pctx->p.seq;
976 	cur_stream->snd_nxt = pctx->p.seq + 1;
977 	cur_stream->state = TCP_ST_SYN_SENT;
978 	cur_stream->last_active_ts = pctx->p.cur_ts;
979 
980 	/* receive-side conn start event can also be tagged here */
981 	/* blocked since tcp_in.c takes care of this.. */
982 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
983 }
984 /*----------------------------------------------------------------------------*/
985 /**
986  * Called (when monitoring mode is enabled).. for every incoming packet from the
987  * NIC.
988  */
989 void
990 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
991 			    struct pkt_ctx *pctx)
992 {
993 	struct tcphdr *tcph;
994 
995 	assert(cur_stream);
996 	tcph = pctx->p.tcph;
997 
998 	/* if it is a new TCP stream from client */
999 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1000 		TRACE_STATE("Stream %d: %s\n",
1001 			    cur_stream->id, TCPStateToString(cur_stream));
1002 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1003 		AddtoTimeoutList(mtcp, cur_stream);
1004 		return;
1005 	}
1006 
1007 	if (tcph->ack) {
1008 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1009 		cur_stream->last_active_ts = pctx->p.cur_ts;
1010 	}
1011 
1012 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1013 
1014 	/* test for reset packet */
1015 	if (tcph->rst) {
1016 		cur_stream->have_reset = TRUE;
1017 		/* test for reset packet */
1018 		cur_stream->state = TCP_ST_CLOSED_RSVD;
1019 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1020 		TRACE_STATE("Stream %d: %s\n",
1021 				cur_stream->id,
1022 				TCPStateToString(cur_stream));
1023 		return;
1024 	}
1025 
1026 	/*
1027 	 * for all others, state transitioning is based on
1028 	 * current tcp_stream state
1029 	 */
1030 	switch (cur_stream->state) {
1031 	case TCP_ST_SYN_SENT:
1032 		/* control should not come here */
1033 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1034 #ifdef BE_RESILIENT_TO_PACKET_DROP
1035 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1036 			cur_stream->state = TCP_ST_ESTABLISHED;
1037 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1038 			cur_stream->snd_nxt = pctx->p.seq;
1039 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1040 			goto __Handle_TCP_ST_ESTABLISHED;
1041 		}
1042 #endif
1043 		break;
1044 	case TCP_ST_SYN_RCVD:
1045 		if (!tcph->ack)
1046 			break;
1047 
1048 		if (tcph->syn) {
1049 			cur_stream->sndvar->iss = pctx->p.seq;
1050 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1051 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1052 				  "setting seq: %u = iss\n",
1053 				  cur_stream->id, pctx->p.seq);
1054 		}
1055 #ifdef BE_RESILIENT_TO_PACKET_DROP
1056 		else {
1057 			cur_stream->state = TCP_ST_ESTABLISHED;
1058 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1059 			cur_stream->snd_nxt = pctx->p.seq;
1060 			cur_stream->rcv_nxt = pctx->p.ack_seq;
1061 			goto __Handle_TCP_ST_ESTABLISHED;
1062 		}
1063 #endif
1064 		TRACE_STATE("Stream %d: %s\n",
1065 			    cur_stream->id,
1066 			    TCPStateToString(cur_stream));
1067 		break;
1068 	case TCP_ST_ESTABLISHED:
1069 #ifdef BE_RESILIENT_TO_PACKET_DROP
1070 __Handle_TCP_ST_ESTABLISHED:
1071 #endif
1072 		/* if application decides to close, fin pkt is sent */
1073 #ifdef BE_RESILIENT_TO_PACKET_DROP
1074 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1075 		{
1076 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1077 					"Move rcv_nxt from %u to %u.\n",
1078 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1079 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1080 		}
1081 #endif
1082 		if (tcph->fin) {
1083 			cur_stream->state = TCP_ST_FIN_WAIT_1;
1084 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1085 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1086 			cur_stream->sndvar->is_fin_sent = TRUE;
1087 			cur_stream->snd_nxt++;
1088 			TRACE_STATE("Stream %d: %s\n",
1089 				    cur_stream->id,
1090 				    TCPStateToString(cur_stream));
1091 		} else {
1092 			/* creating tcp send buffer still pending.. */
1093 			/* do we need peek for send buffer? */
1094 		}
1095 		break;
1096 	case TCP_ST_CLOSE_WAIT:
1097 		/* if application decides to close, fin pkt is sent */
1098 #ifdef BE_RESILIENT_TO_PACKET_DROP
1099 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1100 		{
1101 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
1102 					"Move rcv_nxt from %u to %u.\n",
1103 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1104 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1105 		}
1106 #endif
1107 		if (tcph->fin) {
1108 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1109 			cur_stream->sndvar->is_fin_sent = TRUE;
1110 			cur_stream->snd_nxt++;
1111 			cur_stream->state = TCP_ST_LAST_ACK;
1112 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1113 			TRACE_STATE("Stream %d: %s\n",
1114 				    cur_stream->id,
1115 				    TCPStateToString(cur_stream));
1116 		} else if (tcph->ack) {
1117 			TRACE_STATE("Stream %d: %s\n",
1118 				    cur_stream->id,
1119 				    TCPStateToString(cur_stream));
1120 		}
1121 		break;
1122 	case TCP_ST_LAST_ACK:
1123 		/* control should not come here */
1124 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1125 		break;
1126 	case TCP_ST_FIN_WAIT_1:
1127 		/* control should not come here */
1128 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1129 		break;
1130 	case TCP_ST_FIN_WAIT_2:
1131 		/* control should not come here */
1132 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1133 		break;
1134 	case TCP_ST_CLOSING:
1135 		/* control should not come here */
1136 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1137 		break;
1138 	case TCP_ST_TIME_WAIT:
1139 		/* control may come here but... */
1140 		/* UpdatePassiveReceiveTCPContext() should take care of this */
1141 		if (tcph->ack) {
1142 			TRACE_STATE("Stream %d: %s\n",
1143 				    cur_stream->id,
1144 				    TCPStateToString(cur_stream));
1145 		}
1146 		break;
1147 	case TCP_ST_CLOSED:
1148 	case TCP_ST_CLOSED_RSVD:
1149 		/* Waiting to be destroyed */
1150 		break;
1151 	default:
1152 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1153 			  "tcph->syn: %d, tcph->ack: %d\n",
1154 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
1155 			  pctx->p.tcph->ack);
1156 		assert(0);
1157 		/* This will be enabled once passiverecvcontext is completed */
1158 		/*exit(EXIT_FAILURE);*/
1159 	}
1160 
1161 	UNUSED(mtcp);
1162 	return;
1163 }
1164 /*----------------------------------------------------------------------------*/
1165 void
1166 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1167 		  struct tcp_stream *recvside_stream,
1168 		  struct tcp_stream *sendside_stream)
1169 {
1170 	uint32_t snd_seq_drift, rcv_seq_drift;
1171 
1172 	snd_seq_drift = FetchSeqDrift(sendside_stream, pctx->p.seq);
1173 	rcv_seq_drift = FetchSeqDrift(recvside_stream, pctx->p.ack_seq);
1174 
1175 	if (snd_seq_drift != 0 || rcv_seq_drift != 0) {
1176 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1177 					  snd_seq_drift);
1178 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1179 					      rcv_seq_drift);
1180 		pctx->p.seq += snd_seq_drift;
1181 		pctx->p.ack_seq -= rcv_seq_drift;
1182 
1183 		/* Recompute checksums */
1184 		pctx->p.iph->check = 0;
1185 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1186 
1187 		pctx->p.tcph->check = 0;
1188 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1189 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1190 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1191 	}
1192 #if 0
1193 	/* This block of code will go away in future revisions */
1194 	/* update sequence no. if seq_drift > 0 */
1195 	if (sendside_stream->sndvar->seq_drift != 0 ||
1196 	    recvside_stream->sndvar->seq_drift != 0) {
1197 
1198 		pctx->p.tcph->seq = htonl(pctx->p.seq +
1199 					  sendside_stream->sndvar->seq_drift);
1200 		pctx->p.tcph->ack_seq = htonl(pctx->p.ack_seq -
1201 					      recvside_stream->sndvar->seq_drift);
1202 		pctx->p.seq += sendside_stream->sndvar->seq_drift;
1203 		pctx->p.ack_seq -= recvside_stream->sndvar->seq_drift;
1204 
1205 		/* Recompute checksums */
1206 		pctx->p.iph->check = 0;
1207 		pctx->p.iph->check = ip_fast_csum(pctx->p.iph, pctx->p.iph->ihl);
1208 
1209 		pctx->p.tcph->check = 0;
1210 		pctx->p.tcph->check = TCPCalcChecksum((uint16_t *)pctx->p.tcph,
1211 			ntohs(pctx->p.iph->tot_len) - (pctx->p.iph->ihl<<2),
1212 			pctx->p.iph->saddr, pctx->p.iph->daddr);
1213 	}
1214 #endif
1215 }
1216 /*----------------------------------------------------------------------------*/
1217