xref: /mOS-networking-stack/core/src/tcp_out.c (revision 30883c59)
176404edcSAsim Jamshed #include <unistd.h>
276404edcSAsim Jamshed #include <string.h>
376404edcSAsim Jamshed 
476404edcSAsim Jamshed #include "tcp_out.h"
576404edcSAsim Jamshed #include "mtcp.h"
676404edcSAsim Jamshed #include "ip_in.h"
776404edcSAsim Jamshed #include "ip_out.h"
876404edcSAsim Jamshed #include "tcp_in.h"
976404edcSAsim Jamshed #include "tcp.h"
1076404edcSAsim Jamshed #include "tcp_stream.h"
1176404edcSAsim Jamshed #include "eventpoll.h"
1276404edcSAsim Jamshed #include "timer.h"
1376404edcSAsim Jamshed #include "debug.h"
1476404edcSAsim Jamshed #include "config.h"
1576404edcSAsim Jamshed 
1676404edcSAsim Jamshed #define TCP_CALCULATE_CHECKSUM		TRUE
1776404edcSAsim Jamshed #define ACK_PIGGYBACK			TRUE
1876404edcSAsim Jamshed /* Enable this for higher concurrency rate experiments */
1976404edcSAsim Jamshed #define TRY_SEND_BEFORE_QUEUE		/*FALSE*/ TRUE
2076404edcSAsim Jamshed 
2176404edcSAsim Jamshed #define TCP_MAX_WINDOW 65535
2276404edcSAsim Jamshed 
2376404edcSAsim Jamshed #define MAX(a, b) ((a)>(b)?(a):(b))
2476404edcSAsim Jamshed #define MIN(a, b) ((a)<(b)?(a):(b))
2576404edcSAsim Jamshed 
2676404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
2776404edcSAsim Jamshed static inline uint16_t
CalculateOptionLength(uint8_t flags)2876404edcSAsim Jamshed CalculateOptionLength(uint8_t flags)
2976404edcSAsim Jamshed {
3076404edcSAsim Jamshed 	uint16_t optlen = 0;
3176404edcSAsim Jamshed 
3276404edcSAsim Jamshed 	if (flags & TCP_FLAG_SYN) {
3376404edcSAsim Jamshed 		optlen += TCP_OPT_MSS_LEN;
3476404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
3576404edcSAsim Jamshed 		optlen += TCP_OPT_SACK_PERMIT_LEN;
3676404edcSAsim Jamshed #if !TCP_OPT_TIMESTAMP_ENABLED
3776404edcSAsim Jamshed 		optlen += 2;	// insert NOP padding
3876404edcSAsim Jamshed #endif /* TCP_OPT_TIMESTAMP_ENABLED */
3976404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
4076404edcSAsim Jamshed 
4176404edcSAsim Jamshed #if TCP_OPT_TIMESTAMP_ENABLED
4276404edcSAsim Jamshed 		optlen += TCP_OPT_TIMESTAMP_LEN;
4376404edcSAsim Jamshed #if !TCP_OPT_SACK_ENABLED
4476404edcSAsim Jamshed 		optlen += 2;	// insert NOP padding
4576404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
4676404edcSAsim Jamshed #endif /* TCP_OPT_TIMESTAMP_ENABLED */
4776404edcSAsim Jamshed 
4876404edcSAsim Jamshed 		optlen += TCP_OPT_WSCALE_LEN + 1;
4976404edcSAsim Jamshed 
5076404edcSAsim Jamshed 	} else {
5176404edcSAsim Jamshed 
5276404edcSAsim Jamshed #if TCP_OPT_TIMESTAMP_ENABLED
5376404edcSAsim Jamshed 		optlen += TCP_OPT_TIMESTAMP_LEN + 2;
5476404edcSAsim Jamshed #endif
5576404edcSAsim Jamshed 
5676404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
5776404edcSAsim Jamshed 		if (flags & TCP_FLAG_SACK) {
5876404edcSAsim Jamshed 			optlen += TCP_OPT_SACK_LEN + 2;
5976404edcSAsim Jamshed 		}
6076404edcSAsim Jamshed #endif
6176404edcSAsim Jamshed 	}
6276404edcSAsim Jamshed 
6376404edcSAsim Jamshed 	assert(optlen % 4 == 0);
6476404edcSAsim Jamshed 
6576404edcSAsim Jamshed 	return optlen;
6676404edcSAsim Jamshed }
6776404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
6876404edcSAsim Jamshed static inline void
GenerateTCPTimestamp(tcp_stream * cur_stream,uint8_t * tcpopt,uint32_t cur_ts)6976404edcSAsim Jamshed GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
7076404edcSAsim Jamshed {
7176404edcSAsim Jamshed 	uint32_t *ts = (uint32_t *)(tcpopt + 2);
7276404edcSAsim Jamshed 
7376404edcSAsim Jamshed 	tcpopt[0] = TCP_OPT_TIMESTAMP;
7476404edcSAsim Jamshed 	tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
7576404edcSAsim Jamshed 	ts[0] = htonl(cur_ts);
7676404edcSAsim Jamshed 	ts[1] = htonl(cur_stream->rcvvar->ts_recent);
7776404edcSAsim Jamshed }
7876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
7976404edcSAsim Jamshed static inline void
GenerateTCPOptions(tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * tcpopt,uint16_t optlen)8076404edcSAsim Jamshed GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
8176404edcSAsim Jamshed 		uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
8276404edcSAsim Jamshed {
8376404edcSAsim Jamshed 	int i = 0;
8476404edcSAsim Jamshed 
8576404edcSAsim Jamshed 	if (flags & TCP_FLAG_SYN) {
8676404edcSAsim Jamshed 		uint16_t mss;
8776404edcSAsim Jamshed 
8876404edcSAsim Jamshed 		/* MSS option */
8976404edcSAsim Jamshed 		mss = cur_stream->sndvar->mss;
9076404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_MSS;
9176404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_MSS_LEN;
9276404edcSAsim Jamshed 		tcpopt[i++] = mss >> 8;
9376404edcSAsim Jamshed 		tcpopt[i++] = mss % 256;
9476404edcSAsim Jamshed 
9576404edcSAsim Jamshed 		/* SACK permit */
9676404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
9776404edcSAsim Jamshed #if !TCP_OPT_TIMESTAMP_ENABLED
9876404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
9976404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
10076404edcSAsim Jamshed #endif /* TCP_OPT_TIMESTAMP_ENABLED */
10176404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_SACK_PERMIT;
10276404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
10376404edcSAsim Jamshed 		TRACE_SACK("Local SACK permited.\n");
10476404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
10576404edcSAsim Jamshed 
10676404edcSAsim Jamshed 		/* Timestamp */
10776404edcSAsim Jamshed #if TCP_OPT_TIMESTAMP_ENABLED
10876404edcSAsim Jamshed #if !TCP_OPT_SACK_ENABLED
10976404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
11076404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
11176404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
11276404edcSAsim Jamshed 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
11376404edcSAsim Jamshed 		i += TCP_OPT_TIMESTAMP_LEN;
11476404edcSAsim Jamshed #endif /* TCP_OPT_TIMESTAMP_ENABLED */
11576404edcSAsim Jamshed 
11676404edcSAsim Jamshed 		/* Window scale */
11776404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
11876404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_WSCALE;
11976404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_WSCALE_LEN;
12076404edcSAsim Jamshed 		tcpopt[i++] = cur_stream->sndvar->wscale_mine;
12176404edcSAsim Jamshed 
12276404edcSAsim Jamshed 	} else {
12376404edcSAsim Jamshed 
12476404edcSAsim Jamshed #if TCP_OPT_TIMESTAMP_ENABLED
12576404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
12676404edcSAsim Jamshed 		tcpopt[i++] = TCP_OPT_NOP;
12776404edcSAsim Jamshed 		GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
12876404edcSAsim Jamshed 		i += TCP_OPT_TIMESTAMP_LEN;
12976404edcSAsim Jamshed #endif
13076404edcSAsim Jamshed 
13176404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
13276404edcSAsim Jamshed 		if (flags & TCP_OPT_SACK) {
13376404edcSAsim Jamshed 			// TODO: implement SACK support
13476404edcSAsim Jamshed 		}
13576404edcSAsim Jamshed #endif
13676404edcSAsim Jamshed 	}
13776404edcSAsim Jamshed 
13876404edcSAsim Jamshed 	assert (i == optlen);
13976404edcSAsim Jamshed }
14076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
14176404edcSAsim Jamshed int
SendTCPPacketStandalone(struct mtcp_manager * mtcp,uint32_t saddr,uint16_t sport,uint32_t daddr,uint16_t dport,uint32_t seq,uint32_t ack_seq,uint16_t window,uint8_t flags,uint8_t * payload,uint16_t payloadlen,uint32_t cur_ts,uint32_t echo_ts,uint16_t ip_id,int8_t in_ifidx)14276404edcSAsim Jamshed SendTCPPacketStandalone(struct mtcp_manager *mtcp,
14376404edcSAsim Jamshed 		uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
14476404edcSAsim Jamshed 		uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
14576404edcSAsim Jamshed 		uint8_t *payload, uint16_t payloadlen,
146a834ea89SAsim Jamshed 		uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
14776404edcSAsim Jamshed {
14876404edcSAsim Jamshed 	struct tcphdr *tcph;
14976404edcSAsim Jamshed 	uint8_t *tcpopt;
15076404edcSAsim Jamshed 	uint32_t *ts;
15176404edcSAsim Jamshed 	uint16_t optlen;
15276404edcSAsim Jamshed 	struct pkt_ctx pctx;
15376404edcSAsim Jamshed 	int rc = -1;
15476404edcSAsim Jamshed 
15576404edcSAsim Jamshed 	memset(&pctx, 0, sizeof(pctx));
156a834ea89SAsim Jamshed 	pctx.p.in_ifidx = in_ifidx;
15776404edcSAsim Jamshed 	optlen = CalculateOptionLength(flags);
15876404edcSAsim Jamshed 	if (payloadlen > TCP_DEFAULT_MSS + optlen) {
15976404edcSAsim Jamshed 		TRACE_ERROR("Payload size exceeds MSS.\n");
16076404edcSAsim Jamshed 		assert(0);
16176404edcSAsim Jamshed 		return ERROR;
16276404edcSAsim Jamshed 	}
16376404edcSAsim Jamshed 
164a834ea89SAsim Jamshed 	tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
16576404edcSAsim Jamshed 			saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
16676404edcSAsim Jamshed 	if (tcph == NULL) {
16776404edcSAsim Jamshed 		return ERROR;
16876404edcSAsim Jamshed 	}
16976404edcSAsim Jamshed 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
17076404edcSAsim Jamshed 
17176404edcSAsim Jamshed 	tcph->source = sport;
17276404edcSAsim Jamshed 	tcph->dest = dport;
17376404edcSAsim Jamshed 
17476404edcSAsim Jamshed 	if (flags & TCP_FLAG_SYN)
17576404edcSAsim Jamshed 		tcph->syn = TRUE;
17676404edcSAsim Jamshed 	if (flags & TCP_FLAG_FIN)
17776404edcSAsim Jamshed 		tcph->fin = TRUE;
17876404edcSAsim Jamshed 	if (flags & TCP_FLAG_RST)
17976404edcSAsim Jamshed 		tcph->rst = TRUE;
18076404edcSAsim Jamshed 	if (flags & TCP_FLAG_PSH)
18176404edcSAsim Jamshed 		tcph->psh = TRUE;
18276404edcSAsim Jamshed 
18376404edcSAsim Jamshed 	tcph->seq = htonl(seq);
18476404edcSAsim Jamshed 	if (flags & TCP_FLAG_ACK) {
18576404edcSAsim Jamshed 		tcph->ack = TRUE;
18676404edcSAsim Jamshed 		tcph->ack_seq = htonl(ack_seq);
18776404edcSAsim Jamshed 	}
18876404edcSAsim Jamshed 
18976404edcSAsim Jamshed 	tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
19076404edcSAsim Jamshed 
19176404edcSAsim Jamshed 	tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
19276404edcSAsim Jamshed 	ts = (uint32_t *)(tcpopt + 4);
19376404edcSAsim Jamshed 
19476404edcSAsim Jamshed 	tcpopt[0] = TCP_OPT_NOP;
19576404edcSAsim Jamshed 	tcpopt[1] = TCP_OPT_NOP;
19676404edcSAsim Jamshed 	tcpopt[2] = TCP_OPT_TIMESTAMP;
19776404edcSAsim Jamshed 	tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
19876404edcSAsim Jamshed 	ts[0] = htonl(cur_ts);
19976404edcSAsim Jamshed 	ts[1] = htonl(echo_ts);
20076404edcSAsim Jamshed 
20176404edcSAsim Jamshed 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
20276404edcSAsim Jamshed 	// copy payload if exist
20376404edcSAsim Jamshed 	if (payloadlen > 0) {
20476404edcSAsim Jamshed 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
20576404edcSAsim Jamshed 	}
20676404edcSAsim Jamshed 
20776404edcSAsim Jamshed #if TCP_CALCULATE_CHECKSUM
20876404edcSAsim Jamshed 	/* offload TCP checkum if possible */
20976404edcSAsim Jamshed 	if (likely(mtcp->iom->dev_ioctl != NULL))
21076404edcSAsim Jamshed 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
21176404edcSAsim Jamshed 					  pctx.out_ifidx,
21276404edcSAsim Jamshed 					  PKT_TX_TCP_CSUM,
21376404edcSAsim Jamshed 					  pctx.p.iph);
21476404edcSAsim Jamshed 	/* otherwise calculate TCP checksum in S/W */
21576404edcSAsim Jamshed 	if (rc == -1)
21676404edcSAsim Jamshed 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
21776404edcSAsim Jamshed 					      TCP_HEADER_LEN +
21876404edcSAsim Jamshed 					      optlen + payloadlen,
21976404edcSAsim Jamshed 					      saddr, daddr);
22076404edcSAsim Jamshed #endif
22176404edcSAsim Jamshed 
22276404edcSAsim Jamshed 	if (tcph->syn || tcph->fin) {
22376404edcSAsim Jamshed 		payloadlen++;
22476404edcSAsim Jamshed 	}
22576404edcSAsim Jamshed 
2268c9e1184SAsim Jamshed #ifdef PKTDUMP
2278c9e1184SAsim Jamshed 	DumpPacket(mtcp,
2288c9e1184SAsim Jamshed 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
2298c9e1184SAsim Jamshed 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
2308c9e1184SAsim Jamshed 			"OUT", -1);
2318c9e1184SAsim Jamshed #endif
2328c9e1184SAsim Jamshed 
23376404edcSAsim Jamshed 	struct mon_listener *walk;
23476404edcSAsim Jamshed 	/* callback for monitor raw socket */
23576404edcSAsim Jamshed 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
23676404edcSAsim Jamshed 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
23705e3289cSYoungGyoun 			if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
23805e3289cSYoungGyoun 				EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
23905e3289cSYoungGyoun 							   pctx.p.eth_len))
24076404edcSAsim Jamshed 				HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
24176404edcSAsim Jamshed 							   &pctx, MOS_ON_PKT_IN);
24205e3289cSYoungGyoun 
24376404edcSAsim Jamshed 	return payloadlen;
24476404edcSAsim Jamshed }
24576404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
24676404edcSAsim Jamshed int
SendTCPPacket(struct mtcp_manager * mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * payload,uint16_t payloadlen)24776404edcSAsim Jamshed SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
24876404edcSAsim Jamshed 		uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
24976404edcSAsim Jamshed {
25076404edcSAsim Jamshed 	struct tcphdr *tcph;
25176404edcSAsim Jamshed 	uint16_t optlen;
25276404edcSAsim Jamshed 	uint8_t wscale = 0;
25376404edcSAsim Jamshed 	uint32_t window32 = 0;
25476404edcSAsim Jamshed 	struct pkt_ctx pctx;
25576404edcSAsim Jamshed 	int rc = -1;
25676404edcSAsim Jamshed 
25776404edcSAsim Jamshed 	memset(&pctx, 0, sizeof(pctx));
25876404edcSAsim Jamshed 	optlen = CalculateOptionLength(flags);
25976404edcSAsim Jamshed 	if (payloadlen > cur_stream->sndvar->mss + optlen) {
26076404edcSAsim Jamshed 		TRACE_ERROR("Payload size exceeds MSS\n");
26176404edcSAsim Jamshed 		return ERROR;
26276404edcSAsim Jamshed 	}
26376404edcSAsim Jamshed 
26476404edcSAsim Jamshed 	tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
26576404edcSAsim Jamshed 			TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
26676404edcSAsim Jamshed 	if (tcph == NULL) {
26776404edcSAsim Jamshed 		return -2;
26876404edcSAsim Jamshed 	}
26976404edcSAsim Jamshed 	memset(tcph, 0, TCP_HEADER_LEN + optlen);
27076404edcSAsim Jamshed 
27176404edcSAsim Jamshed 	tcph->source = cur_stream->sport;
27276404edcSAsim Jamshed 	tcph->dest = cur_stream->dport;
27376404edcSAsim Jamshed 
27476404edcSAsim Jamshed 	if (flags & TCP_FLAG_SYN) {
27576404edcSAsim Jamshed 		tcph->syn = TRUE;
27676404edcSAsim Jamshed 		if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
27776404edcSAsim Jamshed 			TRACE_DBG("Stream %d: weird SYN sequence. "
27876404edcSAsim Jamshed 					"snd_nxt: %u, iss: %u\n", cur_stream->id,
27976404edcSAsim Jamshed 					cur_stream->snd_nxt, cur_stream->sndvar->iss);
28076404edcSAsim Jamshed 		}
28176404edcSAsim Jamshed 		TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
28276404edcSAsim Jamshed 			  cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
28376404edcSAsim Jamshed 	}
28476404edcSAsim Jamshed 	if (flags & TCP_FLAG_RST) {
28576404edcSAsim Jamshed 		TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
28676404edcSAsim Jamshed 		tcph->rst = TRUE;
28776404edcSAsim Jamshed 	}
28876404edcSAsim Jamshed 	if (flags & TCP_FLAG_PSH)
28976404edcSAsim Jamshed 		tcph->psh = TRUE;
29076404edcSAsim Jamshed 
29176404edcSAsim Jamshed 	if (flags & TCP_FLAG_WACK) {
29276404edcSAsim Jamshed 		tcph->seq = htonl(cur_stream->snd_nxt - 1);
29376404edcSAsim Jamshed 		TRACE_CLWND("%u Sending ACK to get new window advertisement. "
29476404edcSAsim Jamshed 				"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
29576404edcSAsim Jamshed 				cur_stream->id,
29676404edcSAsim Jamshed 				cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
29776404edcSAsim Jamshed 				cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
29876404edcSAsim Jamshed 	} else if (flags & TCP_FLAG_FIN) {
29976404edcSAsim Jamshed 		tcph->fin = TRUE;
30076404edcSAsim Jamshed 
30176404edcSAsim Jamshed 		if (cur_stream->sndvar->fss == 0) {
30276404edcSAsim Jamshed 			TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
30376404edcSAsim Jamshed 					cur_stream->id, cur_stream->closed);
30476404edcSAsim Jamshed 		}
30576404edcSAsim Jamshed 		tcph->seq = htonl(cur_stream->sndvar->fss);
30676404edcSAsim Jamshed 		cur_stream->sndvar->is_fin_sent = TRUE;
30776404edcSAsim Jamshed 		TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
30876404edcSAsim Jamshed 				cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
30976404edcSAsim Jamshed 	} else {
31076404edcSAsim Jamshed 		tcph->seq = htonl(cur_stream->snd_nxt);
31176404edcSAsim Jamshed 	}
31276404edcSAsim Jamshed 
31376404edcSAsim Jamshed 	if (flags & TCP_FLAG_ACK) {
31476404edcSAsim Jamshed 		tcph->ack = TRUE;
31576404edcSAsim Jamshed 		tcph->ack_seq = htonl(cur_stream->rcv_nxt);
31676404edcSAsim Jamshed 		cur_stream->sndvar->ts_lastack_sent = cur_ts;
31776404edcSAsim Jamshed 		cur_stream->last_active_ts = cur_ts;
31876404edcSAsim Jamshed 		UpdateTimeoutList(mtcp, cur_stream);
31976404edcSAsim Jamshed 	}
32076404edcSAsim Jamshed 
32176404edcSAsim Jamshed 	if (flags & TCP_FLAG_SYN) {
32276404edcSAsim Jamshed 		wscale = 0;
32376404edcSAsim Jamshed 	} else {
32476404edcSAsim Jamshed 		wscale = cur_stream->sndvar->wscale_mine;
32576404edcSAsim Jamshed 	}
32676404edcSAsim Jamshed 
32776404edcSAsim Jamshed 	window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
32876404edcSAsim Jamshed 	tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
32976404edcSAsim Jamshed 	/* if the advertised window is 0, we need to advertise again later */
33076404edcSAsim Jamshed 	if (window32 == 0) {
33176404edcSAsim Jamshed 		cur_stream->need_wnd_adv = TRUE;
33276404edcSAsim Jamshed 	}
33376404edcSAsim Jamshed 
33476404edcSAsim Jamshed 	GenerateTCPOptions(cur_stream, cur_ts, flags,
33576404edcSAsim Jamshed 			(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
33676404edcSAsim Jamshed 
33776404edcSAsim Jamshed 	tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
33876404edcSAsim Jamshed 	// copy payload if exist
33976404edcSAsim Jamshed 	if (payloadlen > 0) {
34076404edcSAsim Jamshed 		memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
34176404edcSAsim Jamshed 	}
34276404edcSAsim Jamshed 
34376404edcSAsim Jamshed #if TCP_CALCULATE_CHECKSUM
34476404edcSAsim Jamshed 	if (likely(mtcp->iom->dev_ioctl != NULL))
34576404edcSAsim Jamshed 		rc = mtcp->iom->dev_ioctl(mtcp->ctx,
34676404edcSAsim Jamshed 					  pctx.out_ifidx,
34776404edcSAsim Jamshed 					  PKT_TX_TCP_CSUM,
34876404edcSAsim Jamshed 					  pctx.p.iph);
34976404edcSAsim Jamshed 	if (rc == -1)
35076404edcSAsim Jamshed 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
35176404edcSAsim Jamshed 					      TCP_HEADER_LEN +
35276404edcSAsim Jamshed 					      optlen + payloadlen,
35376404edcSAsim Jamshed 					      cur_stream->saddr,
35476404edcSAsim Jamshed 					      cur_stream->daddr);
35576404edcSAsim Jamshed #endif
35676404edcSAsim Jamshed 	cur_stream->snd_nxt += payloadlen;
35776404edcSAsim Jamshed 
35876404edcSAsim Jamshed 	if (tcph->syn || tcph->fin) {
35976404edcSAsim Jamshed 		cur_stream->snd_nxt++;
36076404edcSAsim Jamshed 		payloadlen++;
36176404edcSAsim Jamshed 	}
36276404edcSAsim Jamshed 
36376404edcSAsim Jamshed 	if (payloadlen > 0) {
36476404edcSAsim Jamshed 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
36576404edcSAsim Jamshed 			TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
36676404edcSAsim Jamshed 				  payloadlen, cur_stream->snd_nxt);
36776404edcSAsim Jamshed 		}
36876404edcSAsim Jamshed 
36976404edcSAsim Jamshed 		/* update retransmission timer if have payload */
37076404edcSAsim Jamshed 		cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
37176404edcSAsim Jamshed 		TRACE_RTO("Updating retransmission timer. "
37276404edcSAsim Jamshed 				"cur_ts: %u, rto: %u, ts_rto: %u\n",
37376404edcSAsim Jamshed 				cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
37476404edcSAsim Jamshed 		AddtoRTOList(mtcp, cur_stream);
37576404edcSAsim Jamshed 	}
37676404edcSAsim Jamshed 
37776404edcSAsim Jamshed 	struct mon_listener *walk;
37876404edcSAsim Jamshed 	/* callback for monitor raw socket */
37976404edcSAsim Jamshed 	TAILQ_FOREACH(walk, &mtcp->monitors, link)
38076404edcSAsim Jamshed 		if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
38105e3289cSYoungGyoun 			if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
38205e3289cSYoungGyoun 				EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
38305e3289cSYoungGyoun 							   pctx.p.eth_len))
38476404edcSAsim Jamshed 				HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
38576404edcSAsim Jamshed 							   &pctx, MOS_ON_PKT_IN);
38676404edcSAsim Jamshed 
38776404edcSAsim Jamshed 	if (mtcp->num_msp /* this means that stream monitor is on */) {
38876404edcSAsim Jamshed 		FillPacketContextTCPInfo(&pctx, tcph);
38976404edcSAsim Jamshed 
39076404edcSAsim Jamshed 		/* New abstraction for monitor stream */
39176404edcSAsim Jamshed 		struct tcp_stream *recvside_stream = cur_stream->pair_stream;
39276404edcSAsim Jamshed 		struct tcp_stream *sendside_stream = cur_stream;
39376404edcSAsim Jamshed 
394d8823779SAsim Jamshed 		if (recvside_stream) {
39576404edcSAsim Jamshed 			if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
39676404edcSAsim Jamshed 				pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
3978c9e1184SAsim Jamshed 												   pctx.p.seq,
3988c9e1184SAsim Jamshed 												   recvside_stream->rcvvar->irs + 1);
39976404edcSAsim Jamshed 			UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
40076404edcSAsim Jamshed 		}
401d8823779SAsim Jamshed 	}
40276404edcSAsim Jamshed 
40376404edcSAsim Jamshed #ifdef PKTDUMP
40476404edcSAsim Jamshed 	DumpPacket(mtcp,
40576404edcSAsim Jamshed 			(char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
40676404edcSAsim Jamshed 			payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
40776404edcSAsim Jamshed 			"OUT", -1);
40876404edcSAsim Jamshed #endif
40976404edcSAsim Jamshed 
41076404edcSAsim Jamshed 
41176404edcSAsim Jamshed 	return payloadlen;
41276404edcSAsim Jamshed }
41376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
41476404edcSAsim Jamshed static int
FlushTCPSendingBuffer(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)41576404edcSAsim Jamshed FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
41676404edcSAsim Jamshed {
41776404edcSAsim Jamshed 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
41876404edcSAsim Jamshed 	const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
41976404edcSAsim Jamshed 	uint8_t *data;
42076404edcSAsim Jamshed 	uint32_t buffered_len;
42176404edcSAsim Jamshed 	uint32_t seq;
42276404edcSAsim Jamshed 	uint16_t len;
42376404edcSAsim Jamshed 	int16_t sndlen;
42476404edcSAsim Jamshed 	uint32_t window;
42576404edcSAsim Jamshed 	int packets = 0;
4268c9e1184SAsim Jamshed 	uint8_t wack_sent = 0;
42776404edcSAsim Jamshed 
42876404edcSAsim Jamshed 	if (!sndvar->sndbuf) {
42976404edcSAsim Jamshed 		TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
43076404edcSAsim Jamshed 		assert(0);
43176404edcSAsim Jamshed 		return 0;
43276404edcSAsim Jamshed 	}
43376404edcSAsim Jamshed 
43403d1e42cSAsim Jamshed 	SBUF_LOCK(&sndvar->write_lock);
43503d1e42cSAsim Jamshed 
43676404edcSAsim Jamshed 	if (sndvar->sndbuf->len == 0) {
43703d1e42cSAsim Jamshed 		packets = 0;
43803d1e42cSAsim Jamshed 		goto out;
43976404edcSAsim Jamshed 	}
44076404edcSAsim Jamshed 
44176404edcSAsim Jamshed 	window = MIN(sndvar->cwnd, sndvar->peer_wnd);
44276404edcSAsim Jamshed 
44376404edcSAsim Jamshed 	while (1) {
44476404edcSAsim Jamshed 		seq = cur_stream->snd_nxt;
44576404edcSAsim Jamshed 
44676404edcSAsim Jamshed 		if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
44776404edcSAsim Jamshed 			TRACE_ERROR("Stream %d: Invalid sequence to send. "
44876404edcSAsim Jamshed 					"state: %s, seq: %u, head_seq: %u.\n",
44976404edcSAsim Jamshed 					cur_stream->id, TCPStateToString(cur_stream),
45076404edcSAsim Jamshed 					seq, sndvar->sndbuf->head_seq);
45176404edcSAsim Jamshed 			assert(0);
45276404edcSAsim Jamshed 			break;
45376404edcSAsim Jamshed 		}
45476404edcSAsim Jamshed 		buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
45576404edcSAsim Jamshed 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
45676404edcSAsim Jamshed 			TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
45776404edcSAsim Jamshed 					"buffered_len: %u\n", sndvar->sndbuf->head_seq,
45876404edcSAsim Jamshed 					sndvar->sndbuf->len, seq, buffered_len);
45976404edcSAsim Jamshed 		}
46076404edcSAsim Jamshed 		if (buffered_len == 0)
46176404edcSAsim Jamshed 			break;
46276404edcSAsim Jamshed 
46376404edcSAsim Jamshed 		data = sndvar->sndbuf->head +
46476404edcSAsim Jamshed 				(seq - sndvar->sndbuf->head_seq);
46576404edcSAsim Jamshed 
46676404edcSAsim Jamshed 		if (buffered_len > maxlen) {
46776404edcSAsim Jamshed 			len = maxlen;
46876404edcSAsim Jamshed 		} else {
46976404edcSAsim Jamshed 			len = buffered_len;
47076404edcSAsim Jamshed 		}
47176404edcSAsim Jamshed 
4728c9e1184SAsim Jamshed 		if (len > window)
4738c9e1184SAsim Jamshed 			len = window;
4748c9e1184SAsim Jamshed 
47576404edcSAsim Jamshed 		if (len <= 0)
47676404edcSAsim Jamshed 			break;
47776404edcSAsim Jamshed 
47876404edcSAsim Jamshed 		if (cur_stream->state > TCP_ST_ESTABLISHED) {
47976404edcSAsim Jamshed 			TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
48076404edcSAsim Jamshed 					"buffered_len: %u\n", seq, len, buffered_len);
48176404edcSAsim Jamshed 		}
48276404edcSAsim Jamshed 
48376404edcSAsim Jamshed 		if (seq - sndvar->snd_una + len > window) {
48476404edcSAsim Jamshed 			/* Ask for new window advertisement to peer */
48576404edcSAsim Jamshed 			if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
48676404edcSAsim Jamshed 				TRACE_DBG("Full peer window. "
48776404edcSAsim Jamshed 					  "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
48876404edcSAsim Jamshed 					  sndvar->peer_wnd, seq - sndvar->snd_una);
4898c9e1184SAsim Jamshed 				if (!wack_sent && TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
49076404edcSAsim Jamshed 					EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
4918c9e1184SAsim Jamshed 				} else
4928c9e1184SAsim Jamshed 					wack_sent = 1;
49376404edcSAsim Jamshed 			}
494*30883c59SAsim Jamshed 			packets = -3;
495*30883c59SAsim Jamshed 			goto out;
49676404edcSAsim Jamshed 		}
49776404edcSAsim Jamshed 
49876404edcSAsim Jamshed 		sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
49976404edcSAsim Jamshed 				TCP_FLAG_ACK, data, len);
50076404edcSAsim Jamshed 		if (sndlen < 0) {
501c789f6daSAsim Jamshed 			packets = sndlen;
50203d1e42cSAsim Jamshed 			goto out;
50376404edcSAsim Jamshed 		}
50476404edcSAsim Jamshed 		packets++;
5058c9e1184SAsim Jamshed 
5068c9e1184SAsim Jamshed 		window -= len;
50776404edcSAsim Jamshed 	}
50876404edcSAsim Jamshed 
50903d1e42cSAsim Jamshed  out:
51003d1e42cSAsim Jamshed 	SBUF_UNLOCK(&sndvar->write_lock);
51176404edcSAsim Jamshed 	return packets;
51276404edcSAsim Jamshed }
51376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
51476404edcSAsim Jamshed static inline int
SendControlPacket(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)51576404edcSAsim Jamshed SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
51676404edcSAsim Jamshed {
51776404edcSAsim Jamshed 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
51876404edcSAsim Jamshed 	int ret = 0;
51976404edcSAsim Jamshed     int flag = 0;
52076404edcSAsim Jamshed 
52176404edcSAsim Jamshed     switch (cur_stream->state) {
52276404edcSAsim Jamshed        case TCP_ST_SYN_SENT: 		/* Send SYN here */
52376404edcSAsim Jamshed           flag = TCP_FLAG_SYN;
52476404edcSAsim Jamshed           break;
52576404edcSAsim Jamshed        case TCP_ST_SYN_RCVD:        /* Send SYN/ACK here */
52676404edcSAsim Jamshed           cur_stream->snd_nxt = sndvar->iss;
52776404edcSAsim Jamshed           flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
52876404edcSAsim Jamshed           break;
52976404edcSAsim Jamshed        case TCP_ST_ESTABLISHED:     /* Send ACK here */
53076404edcSAsim Jamshed        case TCP_ST_CLOSE_WAIT:	    /* Send ACK for the FIN here */
53176404edcSAsim Jamshed        case TCP_ST_FIN_WAIT_2:      /* Send ACK here */
53276404edcSAsim Jamshed        case TCP_ST_TIME_WAIT:       /* Send ACK here */
53376404edcSAsim Jamshed           flag = TCP_FLAG_ACK;
53476404edcSAsim Jamshed           break;
53576404edcSAsim Jamshed        case TCP_ST_LAST_ACK:
53676404edcSAsim Jamshed        case TCP_ST_FIN_WAIT_1:
53776404edcSAsim Jamshed           /* if it is on ack_list, send it after sending ack */
53876404edcSAsim Jamshed           if (sndvar->on_send_list || sndvar->on_ack_list)
53976404edcSAsim Jamshed              return (-1);
54076404edcSAsim Jamshed           flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
54176404edcSAsim Jamshed           break;
54276404edcSAsim Jamshed        case TCP_ST_CLOSING:
54376404edcSAsim Jamshed           if (sndvar->is_fin_sent) {
54476404edcSAsim Jamshed              /* if the sequence is for FIN, send FIN */
54576404edcSAsim Jamshed              flag = (cur_stream->snd_nxt == sndvar->fss) ?
54676404edcSAsim Jamshed                 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
54776404edcSAsim Jamshed           } else {
54876404edcSAsim Jamshed              /* if FIN is not sent, send fin with ack */
54976404edcSAsim Jamshed              flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
55076404edcSAsim Jamshed           }
55176404edcSAsim Jamshed        case TCP_ST_CLOSED_RSVD: /* Send RST here */
55276404edcSAsim Jamshed           TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
55376404edcSAsim Jamshed                     cur_stream->id);
55476404edcSAsim Jamshed           /* first flush the data and ack */
55576404edcSAsim Jamshed           if (sndvar->on_send_list || sndvar->on_ack_list)
55676404edcSAsim Jamshed              return (-1);
55776404edcSAsim Jamshed           ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
55876404edcSAsim Jamshed           if (ret >= 0)
55976404edcSAsim Jamshed              DestroyTCPStream(mtcp, cur_stream);
56076404edcSAsim Jamshed           return (ret);
56176404edcSAsim Jamshed        default:
56276404edcSAsim Jamshed           TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
56376404edcSAsim Jamshed                       cur_stream->id);
56476404edcSAsim Jamshed           assert(0); /* can't reach here! */
56576404edcSAsim Jamshed           return (0);
56676404edcSAsim Jamshed     }
56776404edcSAsim Jamshed 
56876404edcSAsim Jamshed     return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
56976404edcSAsim Jamshed }
57076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
57176404edcSAsim Jamshed inline int
WriteTCPControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)57276404edcSAsim Jamshed WriteTCPControlList(mtcp_manager_t mtcp,
57376404edcSAsim Jamshed 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
57476404edcSAsim Jamshed {
57576404edcSAsim Jamshed 	tcp_stream *cur_stream;
57676404edcSAsim Jamshed 	tcp_stream *next, *last;
57776404edcSAsim Jamshed 	int cnt = 0;
57876404edcSAsim Jamshed 	int ret;
57976404edcSAsim Jamshed 
58076404edcSAsim Jamshed 	thresh = MIN(thresh, sender->control_list_cnt);
58176404edcSAsim Jamshed 
58276404edcSAsim Jamshed 	/* Send TCP control messages */
58376404edcSAsim Jamshed 	cnt = 0;
58476404edcSAsim Jamshed 	cur_stream = TAILQ_FIRST(&sender->control_list);
58576404edcSAsim Jamshed 	last = TAILQ_LAST(&sender->control_list, control_head);
58676404edcSAsim Jamshed 	while (cur_stream) {
58776404edcSAsim Jamshed 		if (++cnt > thresh)
58876404edcSAsim Jamshed 			break;
58976404edcSAsim Jamshed 
59076404edcSAsim Jamshed 		TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
59176404edcSAsim Jamshed 				cnt, cur_stream->id);
59276404edcSAsim Jamshed 		next = TAILQ_NEXT(cur_stream, sndvar->control_link);
59376404edcSAsim Jamshed 
59476404edcSAsim Jamshed 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
59576404edcSAsim Jamshed 		sender->control_list_cnt--;
59676404edcSAsim Jamshed 
59776404edcSAsim Jamshed 		if (cur_stream->sndvar->on_control_list) {
59876404edcSAsim Jamshed 			cur_stream->sndvar->on_control_list = FALSE;
59976404edcSAsim Jamshed 			//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
60076404edcSAsim Jamshed 			ret = SendControlPacket(mtcp, cur_stream, cur_ts);
60176404edcSAsim Jamshed 			if (ret < 0) {
60276404edcSAsim Jamshed 				TAILQ_INSERT_HEAD(&sender->control_list,
60376404edcSAsim Jamshed 						cur_stream, sndvar->control_link);
60476404edcSAsim Jamshed 				cur_stream->sndvar->on_control_list = TRUE;
60576404edcSAsim Jamshed 				sender->control_list_cnt++;
60676404edcSAsim Jamshed 				/* since there is no available write buffer, break */
60776404edcSAsim Jamshed 				break;
60876404edcSAsim Jamshed 			}
60976404edcSAsim Jamshed 		} else {
61076404edcSAsim Jamshed 			TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
61176404edcSAsim Jamshed 		}
61276404edcSAsim Jamshed 
61376404edcSAsim Jamshed 		if (cur_stream == last)
61476404edcSAsim Jamshed 			break;
61576404edcSAsim Jamshed 		cur_stream = next;
61676404edcSAsim Jamshed 	}
61776404edcSAsim Jamshed 
61876404edcSAsim Jamshed 	return cnt;
61976404edcSAsim Jamshed }
62076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
62176404edcSAsim Jamshed inline int
WriteTCPDataList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)62276404edcSAsim Jamshed WriteTCPDataList(mtcp_manager_t mtcp,
62376404edcSAsim Jamshed 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
62476404edcSAsim Jamshed {
62576404edcSAsim Jamshed 	tcp_stream *cur_stream;
62676404edcSAsim Jamshed 	tcp_stream *next, *last;
62776404edcSAsim Jamshed 	int cnt = 0;
62876404edcSAsim Jamshed 	int ret;
62976404edcSAsim Jamshed 
63076404edcSAsim Jamshed 	/* Send data */
63176404edcSAsim Jamshed 	cnt = 0;
63276404edcSAsim Jamshed 	cur_stream = TAILQ_FIRST(&sender->send_list);
63376404edcSAsim Jamshed 	last = TAILQ_LAST(&sender->send_list, send_head);
63476404edcSAsim Jamshed 	while (cur_stream) {
63576404edcSAsim Jamshed 		if (++cnt > thresh)
63676404edcSAsim Jamshed 			break;
63776404edcSAsim Jamshed 
63876404edcSAsim Jamshed 		TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
63976404edcSAsim Jamshed 				cnt, cur_stream->id);
64076404edcSAsim Jamshed 		next = TAILQ_NEXT(cur_stream, sndvar->send_link);
64176404edcSAsim Jamshed 
64276404edcSAsim Jamshed 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
64376404edcSAsim Jamshed 		if (cur_stream->sndvar->on_send_list) {
64476404edcSAsim Jamshed 			ret = 0;
64576404edcSAsim Jamshed 
64676404edcSAsim Jamshed 			/* Send data here */
64776404edcSAsim Jamshed 			/* Only can send data when ESTABLISHED or CLOSE_WAIT */
64876404edcSAsim Jamshed 			if (cur_stream->state == TCP_ST_ESTABLISHED) {
64976404edcSAsim Jamshed 				if (cur_stream->sndvar->on_control_list) {
65076404edcSAsim Jamshed 					/* delay sending data after until on_control_list becomes off */
65176404edcSAsim Jamshed 					//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
65276404edcSAsim Jamshed 					ret = -1;
65376404edcSAsim Jamshed 				} else {
65476404edcSAsim Jamshed 					ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
65576404edcSAsim Jamshed 				}
65676404edcSAsim Jamshed 			} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
65776404edcSAsim Jamshed 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
65876404edcSAsim Jamshed 					cur_stream->state == TCP_ST_LAST_ACK) {
65976404edcSAsim Jamshed 				ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
66076404edcSAsim Jamshed 			} else {
66176404edcSAsim Jamshed 				TRACE_DBG("Stream %d: on_send_list at state %s\n",
66276404edcSAsim Jamshed 						cur_stream->id, TCPStateToString(cur_stream));
66376404edcSAsim Jamshed #if DUMP_STREAM
66476404edcSAsim Jamshed 				DumpStream(mtcp, cur_stream);
66576404edcSAsim Jamshed #endif
66676404edcSAsim Jamshed 			}
66776404edcSAsim Jamshed 
66876404edcSAsim Jamshed 			if (ret < 0) {
66976404edcSAsim Jamshed 				TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
67076404edcSAsim Jamshed 				/* since there is no available write buffer, break */
67176404edcSAsim Jamshed 				break;
67276404edcSAsim Jamshed 
67376404edcSAsim Jamshed 			} else {
67476404edcSAsim Jamshed 				cur_stream->sndvar->on_send_list = FALSE;
67576404edcSAsim Jamshed 				sender->send_list_cnt--;
67676404edcSAsim Jamshed 				/* the ret value is the number of packets sent. */
67776404edcSAsim Jamshed 				/* decrease ack_cnt for the piggybacked acks */
67876404edcSAsim Jamshed #if ACK_PIGGYBACK
67976404edcSAsim Jamshed 				if (cur_stream->sndvar->ack_cnt > 0) {
68076404edcSAsim Jamshed 					if (cur_stream->sndvar->ack_cnt > ret) {
68176404edcSAsim Jamshed 						cur_stream->sndvar->ack_cnt -= ret;
68276404edcSAsim Jamshed 					} else {
68376404edcSAsim Jamshed 						cur_stream->sndvar->ack_cnt = 0;
68476404edcSAsim Jamshed 					}
68576404edcSAsim Jamshed 				}
68676404edcSAsim Jamshed #endif
68776404edcSAsim Jamshed #if 1
68876404edcSAsim Jamshed 				if (cur_stream->control_list_waiting) {
68976404edcSAsim Jamshed 					if (!cur_stream->sndvar->on_ack_list) {
69076404edcSAsim Jamshed 						cur_stream->control_list_waiting = FALSE;
69176404edcSAsim Jamshed 						AddtoControlList(mtcp, cur_stream, cur_ts);
69276404edcSAsim Jamshed 					}
69376404edcSAsim Jamshed 				}
69476404edcSAsim Jamshed #endif
69576404edcSAsim Jamshed 			}
69676404edcSAsim Jamshed 		} else {
69776404edcSAsim Jamshed 			TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
69876404edcSAsim Jamshed #ifdef DUMP_STREAM
69976404edcSAsim Jamshed 			DumpStream(mtcp, cur_stream);
70076404edcSAsim Jamshed #endif
70176404edcSAsim Jamshed 		}
70276404edcSAsim Jamshed 
70376404edcSAsim Jamshed 		if (cur_stream == last)
70476404edcSAsim Jamshed 			break;
70576404edcSAsim Jamshed 		cur_stream = next;
70676404edcSAsim Jamshed 	}
70776404edcSAsim Jamshed 
70876404edcSAsim Jamshed 	return cnt;
70976404edcSAsim Jamshed }
71076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
71176404edcSAsim Jamshed inline int
WriteTCPACKList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)71276404edcSAsim Jamshed WriteTCPACKList(mtcp_manager_t mtcp,
71376404edcSAsim Jamshed 		struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
71476404edcSAsim Jamshed {
71576404edcSAsim Jamshed 	tcp_stream *cur_stream;
71676404edcSAsim Jamshed 	tcp_stream *next, *last;
71776404edcSAsim Jamshed 	int to_ack;
71876404edcSAsim Jamshed 	int cnt = 0;
71976404edcSAsim Jamshed 	int ret;
72076404edcSAsim Jamshed 
72176404edcSAsim Jamshed 	/* Send aggregated acks */
72276404edcSAsim Jamshed 	cnt = 0;
72376404edcSAsim Jamshed 	cur_stream = TAILQ_FIRST(&sender->ack_list);
72476404edcSAsim Jamshed 	last = TAILQ_LAST(&sender->ack_list, ack_head);
72576404edcSAsim Jamshed 	while (cur_stream) {
72676404edcSAsim Jamshed 		if (++cnt > thresh)
72776404edcSAsim Jamshed 			break;
72876404edcSAsim Jamshed 
72976404edcSAsim Jamshed 		TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
73076404edcSAsim Jamshed 		next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
73176404edcSAsim Jamshed 
73276404edcSAsim Jamshed 		if (cur_stream->sndvar->on_ack_list) {
73376404edcSAsim Jamshed 			/* this list is only to ack the data packets */
73476404edcSAsim Jamshed 			/* if the ack is not data ack, then it will not process here */
73576404edcSAsim Jamshed 			to_ack = FALSE;
73676404edcSAsim Jamshed 			if (cur_stream->state == TCP_ST_ESTABLISHED ||
73776404edcSAsim Jamshed 					cur_stream->state == TCP_ST_CLOSE_WAIT ||
73876404edcSAsim Jamshed 					cur_stream->state == TCP_ST_FIN_WAIT_1 ||
73976404edcSAsim Jamshed 					cur_stream->state == TCP_ST_FIN_WAIT_2 ||
74076404edcSAsim Jamshed 					cur_stream->state == TCP_ST_TIME_WAIT) {
74176404edcSAsim Jamshed 				/* TIMEWAIT is possible since the ack is queued
74276404edcSAsim Jamshed 				   at FIN_WAIT_2 */
74376404edcSAsim Jamshed 				tcprb_t *rb;
74476404edcSAsim Jamshed 				if ((rb = cur_stream->rcvvar->rcvbuf) &&
74576404edcSAsim Jamshed 					TCP_SEQ_LEQ(cur_stream->rcv_nxt,
74676404edcSAsim Jamshed 						(cur_stream->rcvvar->irs + 1) + rb->pile
74776404edcSAsim Jamshed 						+ tcprb_cflen(rb))) {
74876404edcSAsim Jamshed 					to_ack = TRUE;
74976404edcSAsim Jamshed 				}
75076404edcSAsim Jamshed 			} else {
75176404edcSAsim Jamshed 				TRACE_DBG("Stream %u (%s): "
75276404edcSAsim Jamshed 						"Try sending ack at not proper state. "
75376404edcSAsim Jamshed 						"seq: %u, ack_seq: %u, on_control_list: %u\n",
75476404edcSAsim Jamshed 						cur_stream->id, TCPStateToString(cur_stream),
75576404edcSAsim Jamshed 						cur_stream->snd_nxt, cur_stream->rcv_nxt,
75676404edcSAsim Jamshed 						cur_stream->sndvar->on_control_list);
75776404edcSAsim Jamshed #ifdef DUMP_STREAM
75876404edcSAsim Jamshed 				DumpStream(mtcp, cur_stream);
75976404edcSAsim Jamshed #endif
76076404edcSAsim Jamshed 			}
76176404edcSAsim Jamshed 
76276404edcSAsim Jamshed 			if (to_ack) {
76376404edcSAsim Jamshed 				/* send the queued ack packets */
76476404edcSAsim Jamshed 				while (cur_stream->sndvar->ack_cnt > 0) {
76576404edcSAsim Jamshed 					ret = SendTCPPacket(mtcp, cur_stream,
76676404edcSAsim Jamshed 							cur_ts, TCP_FLAG_ACK, NULL, 0);
76776404edcSAsim Jamshed 					if (ret < 0) {
76876404edcSAsim Jamshed 						/* since there is no available write buffer, break */
76976404edcSAsim Jamshed 						break;
77076404edcSAsim Jamshed 					}
77176404edcSAsim Jamshed 					cur_stream->sndvar->ack_cnt--;
77276404edcSAsim Jamshed 				}
77376404edcSAsim Jamshed 
77476404edcSAsim Jamshed 				/* if is_wack is set, send packet to get window advertisement */
77576404edcSAsim Jamshed 				if (cur_stream->sndvar->is_wack) {
77676404edcSAsim Jamshed 					cur_stream->sndvar->is_wack = FALSE;
77776404edcSAsim Jamshed 					ret = SendTCPPacket(mtcp, cur_stream,
77876404edcSAsim Jamshed 							cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
77976404edcSAsim Jamshed 					if (ret < 0) {
78076404edcSAsim Jamshed 						/* since there is no available write buffer, break */
78176404edcSAsim Jamshed 						cur_stream->sndvar->is_wack = TRUE;
78276404edcSAsim Jamshed 					}
78376404edcSAsim Jamshed 				}
78476404edcSAsim Jamshed 
78576404edcSAsim Jamshed 				if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
78676404edcSAsim Jamshed 					cur_stream->sndvar->on_ack_list = FALSE;
78776404edcSAsim Jamshed 					TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
78876404edcSAsim Jamshed 					sender->ack_list_cnt--;
78976404edcSAsim Jamshed 				}
79076404edcSAsim Jamshed 			} else {
79176404edcSAsim Jamshed 				cur_stream->sndvar->on_ack_list = FALSE;
79276404edcSAsim Jamshed 				cur_stream->sndvar->ack_cnt = 0;
79376404edcSAsim Jamshed 				cur_stream->sndvar->is_wack = 0;
79476404edcSAsim Jamshed 				TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
79576404edcSAsim Jamshed 				sender->ack_list_cnt--;
79676404edcSAsim Jamshed 			}
79776404edcSAsim Jamshed 
79876404edcSAsim Jamshed 			if (cur_stream->control_list_waiting) {
79976404edcSAsim Jamshed 				if (!cur_stream->sndvar->on_send_list) {
80076404edcSAsim Jamshed 					cur_stream->control_list_waiting = FALSE;
80176404edcSAsim Jamshed 					AddtoControlList(mtcp, cur_stream, cur_ts);
80276404edcSAsim Jamshed 				}
80376404edcSAsim Jamshed 			}
80476404edcSAsim Jamshed 		} else {
80576404edcSAsim Jamshed 			TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
80676404edcSAsim Jamshed 			TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
80776404edcSAsim Jamshed 			sender->ack_list_cnt--;
80876404edcSAsim Jamshed #ifdef DUMP_STREAM
80976404edcSAsim Jamshed 			thread_printf(mtcp, mtcp->log_fp,
81076404edcSAsim Jamshed 					"Stream %u: not on ack list.\n", cur_stream->id);
81176404edcSAsim Jamshed 			DumpStream(mtcp, cur_stream);
81276404edcSAsim Jamshed #endif
81376404edcSAsim Jamshed 		}
81476404edcSAsim Jamshed 
81576404edcSAsim Jamshed 		if (cur_stream == last)
81676404edcSAsim Jamshed 			break;
81776404edcSAsim Jamshed 		cur_stream = next;
81876404edcSAsim Jamshed 	}
81976404edcSAsim Jamshed 
82076404edcSAsim Jamshed 	return cnt;
82176404edcSAsim Jamshed }
82276404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
82376404edcSAsim Jamshed inline struct mtcp_sender *
GetSender(mtcp_manager_t mtcp,tcp_stream * cur_stream)82476404edcSAsim Jamshed GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
82576404edcSAsim Jamshed {
82676404edcSAsim Jamshed 	if (cur_stream->sndvar->nif_out < 0) {
82776404edcSAsim Jamshed 		return mtcp->g_sender;
82876404edcSAsim Jamshed 
82976404edcSAsim Jamshed 	} else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
83076404edcSAsim Jamshed 		TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
83176404edcSAsim Jamshed 		return NULL;
83276404edcSAsim Jamshed 
83376404edcSAsim Jamshed 	} else {
83476404edcSAsim Jamshed 		return mtcp->n_sender[cur_stream->sndvar->nif_out];
83576404edcSAsim Jamshed 	}
83676404edcSAsim Jamshed }
83776404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
83876404edcSAsim Jamshed inline void
AddtoControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)83976404edcSAsim Jamshed AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
84076404edcSAsim Jamshed {
84176404edcSAsim Jamshed #if TRY_SEND_BEFORE_QUEUE
84276404edcSAsim Jamshed 	int ret;
84376404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
84476404edcSAsim Jamshed 	assert(sender != NULL);
84576404edcSAsim Jamshed 
84676404edcSAsim Jamshed 	ret = SendControlPacket(mtcp, cur_stream, cur_ts);
84776404edcSAsim Jamshed 	if (ret < 0) {
84876404edcSAsim Jamshed #endif
84976404edcSAsim Jamshed 		if (!cur_stream->sndvar->on_control_list) {
85076404edcSAsim Jamshed 			struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
85176404edcSAsim Jamshed 			assert(sender != NULL);
85276404edcSAsim Jamshed 
85376404edcSAsim Jamshed 			cur_stream->sndvar->on_control_list = TRUE;
85476404edcSAsim Jamshed 			TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
85576404edcSAsim Jamshed 			sender->control_list_cnt++;
85676404edcSAsim Jamshed 			//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
85776404edcSAsim Jamshed 			//		cur_stream->id, sender->control_list_cnt);
85876404edcSAsim Jamshed 		}
85976404edcSAsim Jamshed #if TRY_SEND_BEFORE_QUEUE
86076404edcSAsim Jamshed 	} else {
86176404edcSAsim Jamshed 		if (cur_stream->sndvar->on_control_list) {
86276404edcSAsim Jamshed 			cur_stream->sndvar->on_control_list = FALSE;
86376404edcSAsim Jamshed 			TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
86476404edcSAsim Jamshed 			sender->control_list_cnt--;
86576404edcSAsim Jamshed 		}
86676404edcSAsim Jamshed 	}
86776404edcSAsim Jamshed #endif
86876404edcSAsim Jamshed }
86976404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
87076404edcSAsim Jamshed inline void
AddtoSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)87176404edcSAsim Jamshed AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
87276404edcSAsim Jamshed {
87376404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
87476404edcSAsim Jamshed 	assert(sender != NULL);
87576404edcSAsim Jamshed 
87676404edcSAsim Jamshed 	if(!cur_stream->sndvar->sndbuf) {
87776404edcSAsim Jamshed 		TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
87876404edcSAsim Jamshed 				mtcp->ctx->cpu,
87976404edcSAsim Jamshed 				cur_stream->id);
88076404edcSAsim Jamshed 		assert(0);
88176404edcSAsim Jamshed 		return;
88276404edcSAsim Jamshed 	}
88376404edcSAsim Jamshed 
88476404edcSAsim Jamshed 	if (!cur_stream->sndvar->on_send_list) {
88576404edcSAsim Jamshed 		cur_stream->sndvar->on_send_list = TRUE;
88676404edcSAsim Jamshed 		TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
88776404edcSAsim Jamshed 		sender->send_list_cnt++;
88876404edcSAsim Jamshed 	}
88976404edcSAsim Jamshed }
89076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
89176404edcSAsim Jamshed inline void
AddtoACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)89276404edcSAsim Jamshed AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
89376404edcSAsim Jamshed {
89476404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
89576404edcSAsim Jamshed 	assert(sender != NULL);
89676404edcSAsim Jamshed 
89776404edcSAsim Jamshed 	if (!cur_stream->sndvar->on_ack_list) {
89876404edcSAsim Jamshed 		cur_stream->sndvar->on_ack_list = TRUE;
89976404edcSAsim Jamshed 		TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
90076404edcSAsim Jamshed 		sender->ack_list_cnt++;
90176404edcSAsim Jamshed 	}
90276404edcSAsim Jamshed }
90376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
90476404edcSAsim Jamshed inline void
RemoveFromControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream)90576404edcSAsim Jamshed RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
90676404edcSAsim Jamshed {
90776404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
90876404edcSAsim Jamshed 	assert(sender != NULL);
90976404edcSAsim Jamshed 
91076404edcSAsim Jamshed 	if (cur_stream->sndvar->on_control_list) {
91176404edcSAsim Jamshed 		cur_stream->sndvar->on_control_list = FALSE;
91276404edcSAsim Jamshed 		TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
91376404edcSAsim Jamshed 		sender->control_list_cnt--;
91476404edcSAsim Jamshed 		//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
91576404edcSAsim Jamshed 		//		cur_stream->id, sender->control_list_cnt);
91676404edcSAsim Jamshed 	}
91776404edcSAsim Jamshed }
91876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
91976404edcSAsim Jamshed inline void
RemoveFromSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)92076404edcSAsim Jamshed RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
92176404edcSAsim Jamshed {
92276404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
92376404edcSAsim Jamshed 	assert(sender != NULL);
92476404edcSAsim Jamshed 
92576404edcSAsim Jamshed 	if (cur_stream->sndvar->on_send_list) {
92676404edcSAsim Jamshed 		cur_stream->sndvar->on_send_list = FALSE;
92776404edcSAsim Jamshed 		TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
92876404edcSAsim Jamshed 		sender->send_list_cnt--;
92976404edcSAsim Jamshed 	}
93076404edcSAsim Jamshed }
93176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
93276404edcSAsim Jamshed inline void
RemoveFromACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)93376404edcSAsim Jamshed RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
93476404edcSAsim Jamshed {
93576404edcSAsim Jamshed 	struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
93676404edcSAsim Jamshed 	assert(sender != NULL);
93776404edcSAsim Jamshed 
93876404edcSAsim Jamshed 	if (cur_stream->sndvar->on_ack_list) {
93976404edcSAsim Jamshed 		cur_stream->sndvar->on_ack_list = FALSE;
94076404edcSAsim Jamshed 		TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
94176404edcSAsim Jamshed 		sender->ack_list_cnt--;
94276404edcSAsim Jamshed 	}
94376404edcSAsim Jamshed }
94476404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
94576404edcSAsim Jamshed inline void
EnqueueACK(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t opt)94676404edcSAsim Jamshed EnqueueACK(mtcp_manager_t mtcp,
94776404edcSAsim Jamshed 		tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
94876404edcSAsim Jamshed {
94976404edcSAsim Jamshed 	if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
95076404edcSAsim Jamshed 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
95176404edcSAsim Jamshed 			cur_stream->state == TCP_ST_FIN_WAIT_1 ||
95276404edcSAsim Jamshed 			cur_stream->state == TCP_ST_FIN_WAIT_2)) {
95376404edcSAsim Jamshed 		TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
95476404edcSAsim Jamshed 				cur_stream->id, TCPStateToString(cur_stream));
95576404edcSAsim Jamshed 	}
95676404edcSAsim Jamshed 
95776404edcSAsim Jamshed 	if (opt == ACK_OPT_NOW) {
95876404edcSAsim Jamshed 		if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
95976404edcSAsim Jamshed 			cur_stream->sndvar->ack_cnt++;
96076404edcSAsim Jamshed 		}
96176404edcSAsim Jamshed 	} else if (opt == ACK_OPT_AGGREGATE) {
96276404edcSAsim Jamshed 		if (cur_stream->sndvar->ack_cnt == 0) {
96376404edcSAsim Jamshed 			cur_stream->sndvar->ack_cnt = 1;
96476404edcSAsim Jamshed 		}
96576404edcSAsim Jamshed 	} else if (opt == ACK_OPT_WACK) {
96676404edcSAsim Jamshed 		cur_stream->sndvar->is_wack = TRUE;
96776404edcSAsim Jamshed 	}
96876404edcSAsim Jamshed 	AddtoACKList(mtcp, cur_stream);
96976404edcSAsim Jamshed }
97076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
97176404edcSAsim Jamshed inline void
DumpControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender)97276404edcSAsim Jamshed DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
97376404edcSAsim Jamshed {
97476404edcSAsim Jamshed 	tcp_stream *stream;
97576404edcSAsim Jamshed 
97676404edcSAsim Jamshed 	TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
97776404edcSAsim Jamshed 	TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
97876404edcSAsim Jamshed 		TRACE_DBG("Stream id: %u in control list\n", stream->id);
97976404edcSAsim Jamshed 	}
98076404edcSAsim Jamshed }
98176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
98276404edcSAsim Jamshed static inline void
UpdatePassiveSendTCPContext_SynSent(struct tcp_stream * cur_stream,struct pkt_ctx * pctx)98376404edcSAsim Jamshed UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
98476404edcSAsim Jamshed 				    struct pkt_ctx *pctx)
98576404edcSAsim Jamshed {
98676404edcSAsim Jamshed 	assert(cur_stream);
98776404edcSAsim Jamshed 	assert(pctx);
98876404edcSAsim Jamshed 
98976404edcSAsim Jamshed 	/* add event */
99076404edcSAsim Jamshed 	if (cur_stream->state < TCP_ST_SYN_SENT) {
99176404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
99276404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_CONN_START;
99376404edcSAsim Jamshed 	}
99476404edcSAsim Jamshed 	/* initialize TCP send variables of send-side stream */
99576404edcSAsim Jamshed 	cur_stream->sndvar->cwnd = 1;
99676404edcSAsim Jamshed 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
99776404edcSAsim Jamshed 	cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
99876404edcSAsim Jamshed 	cur_stream->sndvar->iss = pctx->p.seq;
99976404edcSAsim Jamshed 	cur_stream->snd_nxt = pctx->p.seq + 1;
100076404edcSAsim Jamshed 	cur_stream->state = TCP_ST_SYN_SENT;
100176404edcSAsim Jamshed 	cur_stream->last_active_ts = pctx->p.cur_ts;
100276404edcSAsim Jamshed 
100376404edcSAsim Jamshed 	/* receive-side conn start event can also be tagged here */
100476404edcSAsim Jamshed 	/* blocked since tcp_in.c takes care of this.. */
100576404edcSAsim Jamshed 	/* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
100676404edcSAsim Jamshed }
100776404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
100876404edcSAsim Jamshed /**
100976404edcSAsim Jamshed  * Called (when monitoring mode is enabled).. for every incoming packet from the
101076404edcSAsim Jamshed  * NIC.
101176404edcSAsim Jamshed  */
101276404edcSAsim Jamshed void
UpdatePassiveSendTCPContext(mtcp_manager_t mtcp,struct tcp_stream * cur_stream,struct pkt_ctx * pctx)101376404edcSAsim Jamshed UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
101476404edcSAsim Jamshed 			    struct pkt_ctx *pctx)
101576404edcSAsim Jamshed {
101676404edcSAsim Jamshed 	struct tcphdr *tcph;
101776404edcSAsim Jamshed 
101876404edcSAsim Jamshed 	assert(cur_stream);
101976404edcSAsim Jamshed 	tcph = pctx->p.tcph;
102076404edcSAsim Jamshed 
102176404edcSAsim Jamshed 	/* if it is a new TCP stream from client */
102276404edcSAsim Jamshed 	if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
102376404edcSAsim Jamshed 		TRACE_STATE("Stream %d: %s\n",
102476404edcSAsim Jamshed 			    cur_stream->id, TCPStateToString(cur_stream));
102576404edcSAsim Jamshed 		UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
102676404edcSAsim Jamshed 		AddtoTimeoutList(mtcp, cur_stream);
102776404edcSAsim Jamshed 		return;
102876404edcSAsim Jamshed 	}
102976404edcSAsim Jamshed 
103076404edcSAsim Jamshed 	if (tcph->ack) {
103176404edcSAsim Jamshed 		cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
103276404edcSAsim Jamshed 		cur_stream->last_active_ts = pctx->p.cur_ts;
103376404edcSAsim Jamshed 	}
103476404edcSAsim Jamshed 
103576404edcSAsim Jamshed 	cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
103676404edcSAsim Jamshed 
103776404edcSAsim Jamshed 	/* test for reset packet */
103876404edcSAsim Jamshed 	if (tcph->rst) {
103976404edcSAsim Jamshed 		cur_stream->have_reset = TRUE;
104076404edcSAsim Jamshed 		/* test for reset packet */
104176404edcSAsim Jamshed 		cur_stream->state = TCP_ST_CLOSED_RSVD;
104276404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
104376404edcSAsim Jamshed 		TRACE_STATE("Stream %d: %s\n",
104476404edcSAsim Jamshed 				cur_stream->id,
104576404edcSAsim Jamshed 				TCPStateToString(cur_stream));
104676404edcSAsim Jamshed 		return;
104776404edcSAsim Jamshed 	}
104876404edcSAsim Jamshed 
104976404edcSAsim Jamshed 	/*
105076404edcSAsim Jamshed 	 * for all others, state transitioning is based on
105176404edcSAsim Jamshed 	 * current tcp_stream state
105276404edcSAsim Jamshed 	 */
105376404edcSAsim Jamshed 	switch (cur_stream->state) {
105476404edcSAsim Jamshed 	case TCP_ST_SYN_SENT:
105576404edcSAsim Jamshed 		/* control should not come here */
105676404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
105776404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
105876404edcSAsim Jamshed 		if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
105976404edcSAsim Jamshed 			cur_stream->state = TCP_ST_ESTABLISHED;
106076404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
106176404edcSAsim Jamshed 			cur_stream->snd_nxt = pctx->p.seq;
106276404edcSAsim Jamshed 			cur_stream->rcv_nxt = pctx->p.ack_seq;
106376404edcSAsim Jamshed 			goto __Handle_TCP_ST_ESTABLISHED;
106476404edcSAsim Jamshed 		}
106576404edcSAsim Jamshed #endif
106676404edcSAsim Jamshed 		break;
106776404edcSAsim Jamshed 	case TCP_ST_SYN_RCVD:
106876404edcSAsim Jamshed 		if (!tcph->ack)
106976404edcSAsim Jamshed 			break;
107076404edcSAsim Jamshed 
107176404edcSAsim Jamshed 		if (tcph->syn) {
107276404edcSAsim Jamshed 			cur_stream->sndvar->iss = pctx->p.seq;
107376404edcSAsim Jamshed 			cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
107476404edcSAsim Jamshed 			TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
107576404edcSAsim Jamshed 				  "setting seq: %u = iss\n",
107676404edcSAsim Jamshed 				  cur_stream->id, pctx->p.seq);
107776404edcSAsim Jamshed 		}
107876404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
107976404edcSAsim Jamshed 		else {
108076404edcSAsim Jamshed 			cur_stream->state = TCP_ST_ESTABLISHED;
108176404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
108276404edcSAsim Jamshed 			cur_stream->snd_nxt = pctx->p.seq;
108376404edcSAsim Jamshed 			cur_stream->rcv_nxt = pctx->p.ack_seq;
108476404edcSAsim Jamshed 			goto __Handle_TCP_ST_ESTABLISHED;
108576404edcSAsim Jamshed 		}
108676404edcSAsim Jamshed #endif
108776404edcSAsim Jamshed 		TRACE_STATE("Stream %d: %s\n",
108876404edcSAsim Jamshed 			    cur_stream->id,
108976404edcSAsim Jamshed 			    TCPStateToString(cur_stream));
109076404edcSAsim Jamshed 		break;
109176404edcSAsim Jamshed 	case TCP_ST_ESTABLISHED:
109276404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
109376404edcSAsim Jamshed __Handle_TCP_ST_ESTABLISHED:
109476404edcSAsim Jamshed #endif
109576404edcSAsim Jamshed 		/* if application decides to close, fin pkt is sent */
109676404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
109776404edcSAsim Jamshed 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
109876404edcSAsim Jamshed 		{
109976404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
110076404edcSAsim Jamshed 					"Move rcv_nxt from %u to %u.\n",
110176404edcSAsim Jamshed 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
110276404edcSAsim Jamshed 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
110376404edcSAsim Jamshed 		}
110476404edcSAsim Jamshed #endif
110576404edcSAsim Jamshed 		if (tcph->fin) {
110676404edcSAsim Jamshed 			cur_stream->state = TCP_ST_FIN_WAIT_1;
110776404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
110876404edcSAsim Jamshed 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
110976404edcSAsim Jamshed 			cur_stream->sndvar->is_fin_sent = TRUE;
111076404edcSAsim Jamshed 			cur_stream->snd_nxt++;
111176404edcSAsim Jamshed 			TRACE_STATE("Stream %d: %s\n",
111276404edcSAsim Jamshed 				    cur_stream->id,
111376404edcSAsim Jamshed 				    TCPStateToString(cur_stream));
111476404edcSAsim Jamshed 		} else {
111576404edcSAsim Jamshed 			/* creating tcp send buffer still pending.. */
111676404edcSAsim Jamshed 			/* do we need peek for send buffer? */
111776404edcSAsim Jamshed 		}
111876404edcSAsim Jamshed 		break;
111976404edcSAsim Jamshed 	case TCP_ST_CLOSE_WAIT:
112076404edcSAsim Jamshed 		/* if application decides to close, fin pkt is sent */
112176404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
112276404edcSAsim Jamshed 		if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
112376404edcSAsim Jamshed 		{
112476404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
112576404edcSAsim Jamshed 					"Move rcv_nxt from %u to %u.\n",
112676404edcSAsim Jamshed 					cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
112776404edcSAsim Jamshed 			cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
112876404edcSAsim Jamshed 		}
112976404edcSAsim Jamshed #endif
113076404edcSAsim Jamshed 		if (tcph->fin) {
113176404edcSAsim Jamshed 			cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
113276404edcSAsim Jamshed 			cur_stream->sndvar->is_fin_sent = TRUE;
113376404edcSAsim Jamshed 			cur_stream->snd_nxt++;
11348c9e1184SAsim Jamshed 
11358c9e1184SAsim Jamshed 			/* verify whether the FIN from the other end is acked */
11368c9e1184SAsim Jamshed 			if ((tcph->ack) && (ntohl(tcph->ack_seq) == cur_stream->rcv_nxt))
113776404edcSAsim Jamshed 				cur_stream->state = TCP_ST_LAST_ACK;
11388c9e1184SAsim Jamshed 			else
11398c9e1184SAsim Jamshed 				cur_stream->state = TCP_ST_CLOSING;
11408c9e1184SAsim Jamshed 
114176404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
114276404edcSAsim Jamshed 			TRACE_STATE("Stream %d: %s\n",
114376404edcSAsim Jamshed 				    cur_stream->id,
114476404edcSAsim Jamshed 				    TCPStateToString(cur_stream));
114576404edcSAsim Jamshed 		} else if (tcph->ack) {
114676404edcSAsim Jamshed 			TRACE_STATE("Stream %d: %s\n",
114776404edcSAsim Jamshed 				    cur_stream->id,
114876404edcSAsim Jamshed 				    TCPStateToString(cur_stream));
114976404edcSAsim Jamshed 		}
115076404edcSAsim Jamshed 		break;
115176404edcSAsim Jamshed 	case TCP_ST_LAST_ACK:
115276404edcSAsim Jamshed 		/* control should not come here */
115376404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
115476404edcSAsim Jamshed 		break;
115576404edcSAsim Jamshed 	case TCP_ST_FIN_WAIT_1:
115676404edcSAsim Jamshed 		/* control should not come here */
115776404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
115876404edcSAsim Jamshed 		break;
115976404edcSAsim Jamshed 	case TCP_ST_FIN_WAIT_2:
116076404edcSAsim Jamshed 		/* control should not come here */
116176404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
116276404edcSAsim Jamshed 		break;
116376404edcSAsim Jamshed 	case TCP_ST_CLOSING:
116476404edcSAsim Jamshed 		/* control should not come here */
116576404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
116676404edcSAsim Jamshed 		break;
116776404edcSAsim Jamshed 	case TCP_ST_TIME_WAIT:
116876404edcSAsim Jamshed 		/* control may come here but... */
116976404edcSAsim Jamshed 		/* UpdatePassiveReceiveTCPContext() should take care of this */
117076404edcSAsim Jamshed 		if (tcph->ack) {
117176404edcSAsim Jamshed 			TRACE_STATE("Stream %d: %s\n",
117276404edcSAsim Jamshed 				    cur_stream->id,
117376404edcSAsim Jamshed 				    TCPStateToString(cur_stream));
117476404edcSAsim Jamshed 		}
117576404edcSAsim Jamshed 		break;
117676404edcSAsim Jamshed 	case TCP_ST_CLOSED:
117776404edcSAsim Jamshed 	case TCP_ST_CLOSED_RSVD:
117876404edcSAsim Jamshed 		/* Waiting to be destroyed */
117976404edcSAsim Jamshed 		break;
118076404edcSAsim Jamshed 	default:
118176404edcSAsim Jamshed 		TRACE_DBG("This should not happen.. Error state: %s reached!\n"
118276404edcSAsim Jamshed 			  "tcph->syn: %d, tcph->ack: %d\n",
118376404edcSAsim Jamshed 			  TCPStateToString(cur_stream), pctx->p.tcph->syn,
118476404edcSAsim Jamshed 			  pctx->p.tcph->ack);
118576404edcSAsim Jamshed 		assert(0);
118676404edcSAsim Jamshed 		/* This will be enabled once passiverecvcontext is completed */
118776404edcSAsim Jamshed 		/*exit(EXIT_FAILURE);*/
118876404edcSAsim Jamshed 	}
118976404edcSAsim Jamshed 
119076404edcSAsim Jamshed 	UNUSED(mtcp);
119176404edcSAsim Jamshed 	return;
119276404edcSAsim Jamshed }
119376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
119476404edcSAsim Jamshed void
PostSendTCPAction(mtcp_manager_t mtcp,struct pkt_ctx * pctx,struct tcp_stream * recvside_stream,struct tcp_stream * sendside_stream)119576404edcSAsim Jamshed PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
119676404edcSAsim Jamshed 		  struct tcp_stream *recvside_stream,
119776404edcSAsim Jamshed 		  struct tcp_stream *sendside_stream)
119876404edcSAsim Jamshed {
1199861ea7dfSAsim Jamshed 	/* this is empty for the time being */
120076404edcSAsim Jamshed }
120176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
1202