xref: /mOS-networking-stack/core/src/tcp_in.c (revision 05e3289c)
176404edcSAsim Jamshed #include <assert.h>
276404edcSAsim Jamshed 
376404edcSAsim Jamshed #include "mos_api.h"
476404edcSAsim Jamshed #include "tcp_util.h"
576404edcSAsim Jamshed #include "tcp_in.h"
676404edcSAsim Jamshed #include "tcp_out.h"
776404edcSAsim Jamshed #include "tcp_ring_buffer.h"
876404edcSAsim Jamshed #include "eventpoll.h"
976404edcSAsim Jamshed #include "debug.h"
1076404edcSAsim Jamshed #include "timer.h"
1176404edcSAsim Jamshed #include "ip_in.h"
1276404edcSAsim Jamshed #include "tcp_rb.h"
1376404edcSAsim Jamshed #include "config.h"
1476404edcSAsim Jamshed #include "scalable_event.h"
1576404edcSAsim Jamshed 
1676404edcSAsim Jamshed #define MAX(a, b) ((a)>(b)?(a):(b))
1776404edcSAsim Jamshed #define MIN(a, b) ((a)<(b)?(a):(b))
1876404edcSAsim Jamshed 
1976404edcSAsim Jamshed #define RECOVERY_AFTER_LOSS TRUE
2076404edcSAsim Jamshed #define SELECTIVE_WRITE_EVENT_NOTIFY TRUE
2176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
2276404edcSAsim Jamshed static inline void
2376404edcSAsim Jamshed Handle_TCP_ST_ESTABLISHED (mtcp_manager_t mtcp, tcp_stream* cur_stream,
2476404edcSAsim Jamshed 		struct pkt_ctx *pctx);
2576404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
2676404edcSAsim Jamshed static inline int
FilterSYNPacket(mtcp_manager_t mtcp,uint32_t ip,uint16_t port)2776404edcSAsim Jamshed FilterSYNPacket(mtcp_manager_t mtcp, uint32_t ip, uint16_t port)
2876404edcSAsim Jamshed {
2976404edcSAsim Jamshed 	struct sockaddr_in *addr;
3076404edcSAsim Jamshed 
3176404edcSAsim Jamshed 	/* TODO: This listening logic should be revised */
3276404edcSAsim Jamshed 
3376404edcSAsim Jamshed 	/* if not listening, drop */
3476404edcSAsim Jamshed 	if (!mtcp->listener) {
3576404edcSAsim Jamshed 		return FALSE;
3676404edcSAsim Jamshed 	}
3776404edcSAsim Jamshed 
3876404edcSAsim Jamshed 	/* if not the address we want, drop */
3976404edcSAsim Jamshed 	addr = &mtcp->listener->socket->saddr;
4076404edcSAsim Jamshed 	if (addr->sin_port == port) {
4176404edcSAsim Jamshed 		if (addr->sin_addr.s_addr != INADDR_ANY) {
4276404edcSAsim Jamshed 			if (ip == addr->sin_addr.s_addr) {
4376404edcSAsim Jamshed 				return TRUE;
4476404edcSAsim Jamshed 			}
4576404edcSAsim Jamshed 			return FALSE;
4676404edcSAsim Jamshed 		} else {
4776404edcSAsim Jamshed 			int i;
4876404edcSAsim Jamshed 
4976404edcSAsim Jamshed 			for (i = 0; i < g_config.mos->netdev_table->num; i++) {
5076404edcSAsim Jamshed 				if (ip == g_config.mos->netdev_table->ent[i]->ip_addr) {
5176404edcSAsim Jamshed 					return TRUE;
5276404edcSAsim Jamshed 				}
5376404edcSAsim Jamshed 			}
5476404edcSAsim Jamshed 			return FALSE;
5576404edcSAsim Jamshed 		}
5676404edcSAsim Jamshed 	}
5776404edcSAsim Jamshed 
5876404edcSAsim Jamshed 	return FALSE;
5976404edcSAsim Jamshed }
6076404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
6176404edcSAsim Jamshed static inline int
HandleActiveOpen(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)6276404edcSAsim Jamshed HandleActiveOpen(mtcp_manager_t mtcp, tcp_stream *cur_stream,
6376404edcSAsim Jamshed 		struct pkt_ctx *pctx)
6476404edcSAsim Jamshed {
6576404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
6676404edcSAsim Jamshed 
6776404edcSAsim Jamshed 	cur_stream->rcvvar->irs = pctx->p.seq;
6876404edcSAsim Jamshed 	cur_stream->snd_nxt = pctx->p.ack_seq;
6976404edcSAsim Jamshed 	cur_stream->sndvar->peer_wnd = pctx->p.window;
7076404edcSAsim Jamshed 	cur_stream->rcvvar->snd_wl1 = cur_stream->rcvvar->irs - 1;
7176404edcSAsim Jamshed 	cur_stream->rcv_nxt = cur_stream->rcvvar->irs + 1;
7276404edcSAsim Jamshed 	cur_stream->rcvvar->last_ack_seq = pctx->p.ack_seq;
7376404edcSAsim Jamshed 	ParseTCPOptions(cur_stream, pctx->p.cur_ts, (uint8_t *)tcph + TCP_HEADER_LEN,
7476404edcSAsim Jamshed 			(tcph->doff << 2) - TCP_HEADER_LEN);
7576404edcSAsim Jamshed 	cur_stream->sndvar->cwnd = ((cur_stream->sndvar->cwnd == 1)?
7676404edcSAsim Jamshed 			(cur_stream->sndvar->mss * 2): cur_stream->sndvar->mss);
7776404edcSAsim Jamshed 	cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
7876404edcSAsim Jamshed 	if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
7976404edcSAsim Jamshed 		UpdateRetransmissionTimer(mtcp, cur_stream, pctx->p.cur_ts);
8076404edcSAsim Jamshed 
8176404edcSAsim Jamshed 	return TRUE;
8276404edcSAsim Jamshed }
8376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
8476404edcSAsim Jamshed /* ValidateSequence: validates sequence number of the segment                 */
8576404edcSAsim Jamshed /* Return: TRUE if acceptable, FALSE if not acceptable                        */
8676404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
8776404edcSAsim Jamshed static inline int
ValidateSequence(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)8876404edcSAsim Jamshed ValidateSequence(mtcp_manager_t mtcp, tcp_stream *cur_stream,
8976404edcSAsim Jamshed 		struct pkt_ctx *pctx)
9076404edcSAsim Jamshed {
9176404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
9276404edcSAsim Jamshed 
9376404edcSAsim Jamshed 	/* Protect Against Wrapped Sequence number (PAWS) */
9476404edcSAsim Jamshed 	if (!tcph->rst && cur_stream->saw_timestamp) {
9576404edcSAsim Jamshed 		struct tcp_timestamp ts;
9676404edcSAsim Jamshed 
9776404edcSAsim Jamshed 		if (!ParseTCPTimestamp(cur_stream, &ts,
9876404edcSAsim Jamshed 				(uint8_t *)tcph + TCP_HEADER_LEN,
9976404edcSAsim Jamshed 				(tcph->doff << 2) - TCP_HEADER_LEN)) {
10076404edcSAsim Jamshed 			/* if there is no timestamp */
10176404edcSAsim Jamshed 			/* TODO: implement here */
10276404edcSAsim Jamshed 			TRACE_DBG("No timestamp found.\n");
10376404edcSAsim Jamshed 			return FALSE;
10476404edcSAsim Jamshed 		}
10576404edcSAsim Jamshed 
10676404edcSAsim Jamshed 		/* RFC1323: if SEG.TSval < TS.Recent, drop and send ack */
10776404edcSAsim Jamshed 		if (TCP_SEQ_LT(ts.ts_val, cur_stream->rcvvar->ts_recent)) {
10876404edcSAsim Jamshed 			/* TODO: ts_recent should be invalidated
10976404edcSAsim Jamshed 					 before timestamp wraparound for long idle flow */
11076404edcSAsim Jamshed 			TRACE_DBG("PAWS Detect wrong timestamp. "
11176404edcSAsim Jamshed 					"seq: %u, ts_val: %u, prev: %u\n",
11276404edcSAsim Jamshed 					pctx->p.seq, ts.ts_val, cur_stream->rcvvar->ts_recent);
11376404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
11476404edcSAsim Jamshed 			return FALSE;
11576404edcSAsim Jamshed 		} else {
11676404edcSAsim Jamshed 			/* valid timestamp */
11776404edcSAsim Jamshed 			if (TCP_SEQ_GT(ts.ts_val, cur_stream->rcvvar->ts_recent)) {
11876404edcSAsim Jamshed 				TRACE_TSTAMP("Timestamp update. cur: %u, prior: %u "
11976404edcSAsim Jamshed 					"(time diff: %uus)\n",
12076404edcSAsim Jamshed 					ts.ts_val, cur_stream->rcvvar->ts_recent,
12176404edcSAsim Jamshed 					TS_TO_USEC(pctx->p.cur_ts - cur_stream->rcvvar->ts_last_ts_upd));
12276404edcSAsim Jamshed 				cur_stream->rcvvar->ts_last_ts_upd = pctx->p.cur_ts;
12376404edcSAsim Jamshed 			}
12476404edcSAsim Jamshed 
12576404edcSAsim Jamshed 			cur_stream->rcvvar->ts_recent = ts.ts_val;
12676404edcSAsim Jamshed 			cur_stream->rcvvar->ts_lastack_rcvd = ts.ts_ref;
12776404edcSAsim Jamshed 		}
12876404edcSAsim Jamshed 	}
12976404edcSAsim Jamshed 
13076404edcSAsim Jamshed 	/* TCP sequence validation */
13176404edcSAsim Jamshed 	if (!TCP_SEQ_BETWEEN(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt,
13276404edcSAsim Jamshed 				cur_stream->rcv_nxt + cur_stream->rcvvar->rcv_wnd)) {
13376404edcSAsim Jamshed 
13476404edcSAsim Jamshed 		/* if RST bit is set, ignore the segment */
13576404edcSAsim Jamshed 		if (tcph->rst)
13676404edcSAsim Jamshed 			return FALSE;
13776404edcSAsim Jamshed 
13876404edcSAsim Jamshed 		if (cur_stream->state == TCP_ST_ESTABLISHED) {
13976404edcSAsim Jamshed 			/* check if it is to get window advertisement */
14076404edcSAsim Jamshed 			if (pctx->p.seq + 1 == cur_stream->rcv_nxt) {
14176404edcSAsim Jamshed 				TRACE_DBG("Window update request. (seq: %u, rcv_wnd: %u)\n",
142cafe7743SAsim Jamshed 						pctx->p.seq, cur_stream->rcvvar->rcv_wnd);
14376404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_SEND_ACK_AGG;
14476404edcSAsim Jamshed 				return FALSE;
14576404edcSAsim Jamshed 
14676404edcSAsim Jamshed 			}
14776404edcSAsim Jamshed 
14876404edcSAsim Jamshed 			if (TCP_SEQ_LEQ(pctx->p.seq, cur_stream->rcv_nxt)) {
14976404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_SEND_ACK_AGG;
15076404edcSAsim Jamshed 			} else {
15176404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
15276404edcSAsim Jamshed 			}
15376404edcSAsim Jamshed 		} else {
15476404edcSAsim Jamshed 			if (cur_stream->state == TCP_ST_TIME_WAIT) {
15576404edcSAsim Jamshed 				TRACE_DBG("Stream %d: tw expire update to %u\n",
15676404edcSAsim Jamshed 						cur_stream->id, cur_stream->rcvvar->ts_tw_expire);
15776404edcSAsim Jamshed 				AddtoTimewaitList(mtcp, cur_stream, pctx->p.cur_ts);
15876404edcSAsim Jamshed 			}
15976404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_CONTROL;
16076404edcSAsim Jamshed 		}
16176404edcSAsim Jamshed 		return FALSE;
16276404edcSAsim Jamshed 	}
16376404edcSAsim Jamshed 
16476404edcSAsim Jamshed 	return TRUE;
16576404edcSAsim Jamshed }
16676404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
16776404edcSAsim Jamshed static inline void
NotifyConnectionReset(mtcp_manager_t mtcp,tcp_stream * cur_stream)16876404edcSAsim Jamshed NotifyConnectionReset(mtcp_manager_t mtcp, tcp_stream *cur_stream)
16976404edcSAsim Jamshed {
17076404edcSAsim Jamshed 	TRACE_DBG("Stream %d: Notifying connection reset.\n", cur_stream->id);
17176404edcSAsim Jamshed 	/* TODO: implement this function */
17276404edcSAsim Jamshed 	/* signal to user "connection reset" */
17376404edcSAsim Jamshed }
17476404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
17576404edcSAsim Jamshed static inline int
ProcessRST(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)17676404edcSAsim Jamshed ProcessRST(mtcp_manager_t mtcp, tcp_stream *cur_stream,
17776404edcSAsim Jamshed 		struct pkt_ctx *pctx)
17876404edcSAsim Jamshed {
17976404edcSAsim Jamshed 	/* TODO: we need reset validation logic */
18076404edcSAsim Jamshed 	/* the sequence number of a RST should be inside window */
18176404edcSAsim Jamshed 	/* (in SYN_SENT state, it should ack the previous SYN */
18276404edcSAsim Jamshed 
18376404edcSAsim Jamshed 	TRACE_DBG("Stream %d: TCP RESET (%s)\n",
18476404edcSAsim Jamshed 			cur_stream->id, TCPStateToString(cur_stream));
18576404edcSAsim Jamshed #if DUMP_STREAM
18676404edcSAsim Jamshed 	DumpStream(mtcp, cur_stream);
18776404edcSAsim Jamshed #endif
18876404edcSAsim Jamshed 
18976404edcSAsim Jamshed 	if (cur_stream->state <= TCP_ST_SYN_SENT) {
19076404edcSAsim Jamshed 		/* not handled here */
19176404edcSAsim Jamshed 		return FALSE;
19276404edcSAsim Jamshed 	}
19376404edcSAsim Jamshed 
19476404edcSAsim Jamshed 	if (cur_stream->state == TCP_ST_SYN_RCVD) {
19576404edcSAsim Jamshed 		/* ACK number of last sent ACK packet == rcv_nxt + 1*/
19676404edcSAsim Jamshed 		if (pctx->p.seq == 0 ||
19776404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
19876404edcSAsim Jamshed 			pctx->p.seq == cur_stream->rcv_nxt + 1 ||
19976404edcSAsim Jamshed #endif
200a5e1a556SAsim Jamshed 			pctx->p.ack_seq == cur_stream->snd_nxt)
20176404edcSAsim Jamshed 		{
20276404edcSAsim Jamshed 			cur_stream->state = TCP_ST_CLOSED_RSVD;
20376404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
20476404edcSAsim Jamshed 			cur_stream->close_reason = TCP_RESET;
20576404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_DESTROY;
20676404edcSAsim Jamshed 		} else {
20776404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
20876404edcSAsim Jamshed 				"(SYN_RCVD): Ignore invalid RST. "
20976404edcSAsim Jamshed 				"ack_seq expected: %u, ack_seq rcvd: %u\n",
21076404edcSAsim Jamshed 				cur_stream->rcv_nxt + 1, pctx->p.ack_seq);
21176404edcSAsim Jamshed 		}
21276404edcSAsim Jamshed 		return TRUE;
21376404edcSAsim Jamshed 	}
21476404edcSAsim Jamshed 
21576404edcSAsim Jamshed 	/* if the application is already closed the connection,
21676404edcSAsim Jamshed 	   just destroy the it */
21776404edcSAsim Jamshed 	if (cur_stream->state == TCP_ST_FIN_WAIT_1 ||
21876404edcSAsim Jamshed 			cur_stream->state == TCP_ST_FIN_WAIT_2 ||
21976404edcSAsim Jamshed 			cur_stream->state == TCP_ST_LAST_ACK ||
22076404edcSAsim Jamshed 			cur_stream->state == TCP_ST_CLOSING ||
22176404edcSAsim Jamshed 			cur_stream->state == TCP_ST_TIME_WAIT) {
22276404edcSAsim Jamshed 		cur_stream->state = TCP_ST_CLOSED_RSVD;
22376404edcSAsim Jamshed 		cur_stream->close_reason = TCP_ACTIVE_CLOSE;
22476404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
22576404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_DESTROY;
22676404edcSAsim Jamshed 		return TRUE;
22776404edcSAsim Jamshed 	}
22876404edcSAsim Jamshed 
22976404edcSAsim Jamshed 	if (cur_stream->state >= TCP_ST_ESTABLISHED &&
23076404edcSAsim Jamshed 			cur_stream->state <= TCP_ST_CLOSE_WAIT) {
23176404edcSAsim Jamshed 		/* ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT */
23276404edcSAsim Jamshed 		/* TODO: flush all the segment queues */
23376404edcSAsim Jamshed 		//NotifyConnectionReset(mtcp, cur_stream);
23476404edcSAsim Jamshed 	}
23576404edcSAsim Jamshed 
23676404edcSAsim Jamshed 	if (!(cur_stream->sndvar->on_closeq || cur_stream->sndvar->on_closeq_int ||
23776404edcSAsim Jamshed 		  cur_stream->sndvar->on_resetq || cur_stream->sndvar->on_resetq_int)) {
23876404edcSAsim Jamshed 		//cur_stream->state = TCP_ST_CLOSED_RSVD;
23976404edcSAsim Jamshed 		//cur_stream->actions |= MOS_ACT_DESTROY;
24076404edcSAsim Jamshed 		cur_stream->state = TCP_ST_CLOSED_RSVD;
24176404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
24276404edcSAsim Jamshed 		cur_stream->close_reason = TCP_RESET;
24376404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
24476404edcSAsim Jamshed 			RaiseCloseEvent(mtcp, cur_stream);
24576404edcSAsim Jamshed 	}
24676404edcSAsim Jamshed 
24776404edcSAsim Jamshed 	return TRUE;
24876404edcSAsim Jamshed }
24976404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
25076404edcSAsim Jamshed inline void
EstimateRTT(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t mrtt)25176404edcSAsim Jamshed EstimateRTT(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t mrtt)
25276404edcSAsim Jamshed {
25376404edcSAsim Jamshed 	/* This function should be called for not retransmitted packets */
25476404edcSAsim Jamshed 	/* TODO: determine tcp_rto_min */
25576404edcSAsim Jamshed #define TCP_RTO_MIN 0
25676404edcSAsim Jamshed 	long m = mrtt;
25776404edcSAsim Jamshed 	uint32_t tcp_rto_min = TCP_RTO_MIN;
25876404edcSAsim Jamshed 	struct tcp_recv_vars *rcvvar = cur_stream->rcvvar;
25976404edcSAsim Jamshed 
26076404edcSAsim Jamshed 	if (m == 0) {
26176404edcSAsim Jamshed 		m = 1;
26276404edcSAsim Jamshed 	}
26376404edcSAsim Jamshed 	if (rcvvar->srtt != 0) {
26476404edcSAsim Jamshed 		/* rtt = 7/8 rtt + 1/8 new */
26576404edcSAsim Jamshed 		m -= (rcvvar->srtt >> 3);
26676404edcSAsim Jamshed 		rcvvar->srtt += m;
26776404edcSAsim Jamshed 		if (m < 0) {
26876404edcSAsim Jamshed 			m = -m;
26976404edcSAsim Jamshed 			m -= (rcvvar->mdev >> 2);
27076404edcSAsim Jamshed 			if (m > 0) {
27176404edcSAsim Jamshed 				m >>= 3;
27276404edcSAsim Jamshed 			}
27376404edcSAsim Jamshed 		} else {
27476404edcSAsim Jamshed 			m -= (rcvvar->mdev >> 2);
27576404edcSAsim Jamshed 		}
27676404edcSAsim Jamshed 		rcvvar->mdev += m;
27776404edcSAsim Jamshed 		if (rcvvar->mdev > rcvvar->mdev_max) {
27876404edcSAsim Jamshed 			rcvvar->mdev_max = rcvvar->mdev;
27976404edcSAsim Jamshed 			if (rcvvar->mdev_max > rcvvar->rttvar) {
28076404edcSAsim Jamshed 				rcvvar->rttvar = rcvvar->mdev_max;
28176404edcSAsim Jamshed 			}
28276404edcSAsim Jamshed 		}
28376404edcSAsim Jamshed 		if (TCP_SEQ_GT(cur_stream->sndvar->snd_una, rcvvar->rtt_seq)) {
28476404edcSAsim Jamshed 			if (rcvvar->mdev_max < rcvvar->rttvar) {
28576404edcSAsim Jamshed 				rcvvar->rttvar -= (rcvvar->rttvar - rcvvar->mdev_max) >> 2;
28676404edcSAsim Jamshed 			}
28776404edcSAsim Jamshed 			rcvvar->rtt_seq = cur_stream->snd_nxt;
28876404edcSAsim Jamshed 			rcvvar->mdev_max = tcp_rto_min;
28976404edcSAsim Jamshed 		}
29076404edcSAsim Jamshed 	} else {
29176404edcSAsim Jamshed 		/* fresh measurement */
29276404edcSAsim Jamshed 		rcvvar->srtt = m << 3;
29376404edcSAsim Jamshed 		rcvvar->mdev = m << 1;
29476404edcSAsim Jamshed 		rcvvar->mdev_max = rcvvar->rttvar = MAX(rcvvar->mdev, tcp_rto_min);
29576404edcSAsim Jamshed 		rcvvar->rtt_seq = cur_stream->snd_nxt;
29676404edcSAsim Jamshed 	}
29776404edcSAsim Jamshed 
29876404edcSAsim Jamshed 	TRACE_RTT("mrtt: %u (%uus), srtt: %u (%ums), mdev: %u, mdev_max: %u, "
29976404edcSAsim Jamshed 			"rttvar: %u, rtt_seq: %u\n", mrtt, mrtt * TIME_TICK,
30076404edcSAsim Jamshed 			rcvvar->srtt, TS_TO_MSEC((rcvvar->srtt) >> 3), rcvvar->mdev,
30176404edcSAsim Jamshed 			rcvvar->mdev_max, rcvvar->rttvar, rcvvar->rtt_seq);
30276404edcSAsim Jamshed }
30376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
30476404edcSAsim Jamshed static inline void
ProcessACK(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)30576404edcSAsim Jamshed ProcessACK(mtcp_manager_t mtcp, tcp_stream *cur_stream,
30676404edcSAsim Jamshed 		struct pkt_ctx *pctx)
30776404edcSAsim Jamshed {
30876404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
30976404edcSAsim Jamshed 	uint32_t seq = pctx->p.seq;
31076404edcSAsim Jamshed 	uint32_t ack_seq = pctx->p.ack_seq;
31176404edcSAsim Jamshed 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
31276404edcSAsim Jamshed 	uint32_t cwindow, cwindow_prev;
31376404edcSAsim Jamshed 	uint32_t rmlen;
31476404edcSAsim Jamshed 	uint32_t snd_wnd_prev;
31576404edcSAsim Jamshed 	uint32_t right_wnd_edge;
31676404edcSAsim Jamshed 	uint8_t dup;
31776404edcSAsim Jamshed 
31876404edcSAsim Jamshed 	cwindow = pctx->p.window;
31976404edcSAsim Jamshed 	if (!tcph->syn) {
32076404edcSAsim Jamshed 		cwindow = cwindow << sndvar->wscale_peer;
32176404edcSAsim Jamshed 	}
32276404edcSAsim Jamshed 	right_wnd_edge = sndvar->peer_wnd + cur_stream->rcvvar->snd_wl2;
32376404edcSAsim Jamshed 
32476404edcSAsim Jamshed 	if (cur_stream->state == TCP_ST_FIN_WAIT_1 ||
32576404edcSAsim Jamshed 			cur_stream->state == TCP_ST_FIN_WAIT_2 ||
32676404edcSAsim Jamshed 			cur_stream->state == TCP_ST_CLOSING ||
32776404edcSAsim Jamshed 			cur_stream->state == TCP_ST_CLOSE_WAIT ||
32876404edcSAsim Jamshed 			cur_stream->state == TCP_ST_LAST_ACK) {
32976404edcSAsim Jamshed 		if (sndvar->is_fin_sent && ack_seq == sndvar->fss + 1) {
33076404edcSAsim Jamshed 			ack_seq--;
33176404edcSAsim Jamshed 		}
33276404edcSAsim Jamshed 	}
33376404edcSAsim Jamshed 
334*05e3289cSYoungGyoun 	/* If ack overs the sending buffer, return */
33576404edcSAsim Jamshed 	if (TCP_SEQ_GT(ack_seq, sndvar->sndbuf->head_seq + sndvar->sndbuf->len)) {
33676404edcSAsim Jamshed 		TRACE_DBG("Stream %d (%s): invalid acknologement. "
33776404edcSAsim Jamshed 				"ack_seq: %u, possible max_ack_seq: %u\n", cur_stream->id,
33876404edcSAsim Jamshed 				TCPStateToString(cur_stream), ack_seq,
33976404edcSAsim Jamshed 				sndvar->sndbuf->head_seq + sndvar->sndbuf->len);
34076404edcSAsim Jamshed 		return;
34176404edcSAsim Jamshed 	}
34276404edcSAsim Jamshed 
34376404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
34476404edcSAsim Jamshed 	if (TCP_SEQ_GT(seq + pctx->p.payloadlen, cur_stream->rcv_nxt))
34576404edcSAsim Jamshed 		cur_stream->rcv_nxt = seq + pctx->p.payloadlen;
34676404edcSAsim Jamshed #endif
34776404edcSAsim Jamshed 
34876404edcSAsim Jamshed 	/* Update window */
34976404edcSAsim Jamshed 	if (TCP_SEQ_LT(cur_stream->rcvvar->snd_wl1, seq) ||
35076404edcSAsim Jamshed 			(cur_stream->rcvvar->snd_wl1 == seq &&
35176404edcSAsim Jamshed 			TCP_SEQ_LT(cur_stream->rcvvar->snd_wl2, ack_seq)) ||
35276404edcSAsim Jamshed 			(cur_stream->rcvvar->snd_wl2 == ack_seq &&
35376404edcSAsim Jamshed 			cwindow > sndvar->peer_wnd)) {
35476404edcSAsim Jamshed 		cwindow_prev = sndvar->peer_wnd;
35576404edcSAsim Jamshed 		sndvar->peer_wnd = cwindow;
35676404edcSAsim Jamshed 		cur_stream->rcvvar->snd_wl1 = seq;
35776404edcSAsim Jamshed 		cur_stream->rcvvar->snd_wl2 = ack_seq;
35876404edcSAsim Jamshed 		TRACE_CLWND("Window update. "
35976404edcSAsim Jamshed 				"ack: %u, peer_wnd: %u, snd_nxt-snd_una: %u\n",
36076404edcSAsim Jamshed 				ack_seq, cwindow, cur_stream->snd_nxt - sndvar->snd_una);
36176404edcSAsim Jamshed 		if (cwindow_prev < cur_stream->snd_nxt - sndvar->snd_una &&
36276404edcSAsim Jamshed 				sndvar->peer_wnd >= cur_stream->snd_nxt - sndvar->snd_una) {
36376404edcSAsim Jamshed 			TRACE_CLWND("%u Broadcasting client window update! "
36476404edcSAsim Jamshed 					"ack_seq: %u, peer_wnd: %u (before: %u), "
36576404edcSAsim Jamshed 					"(snd_nxt - snd_una: %u)\n",
36676404edcSAsim Jamshed 					cur_stream->id, ack_seq, sndvar->peer_wnd, cwindow_prev,
36776404edcSAsim Jamshed 					cur_stream->snd_nxt - sndvar->snd_una);
36876404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
36976404edcSAsim Jamshed 				RaiseWriteEvent(mtcp, cur_stream);
37076404edcSAsim Jamshed 		}
37176404edcSAsim Jamshed 	}
37276404edcSAsim Jamshed 
37376404edcSAsim Jamshed 	/* Check duplicated ack count */
37476404edcSAsim Jamshed 	/* Duplicated ack if
37576404edcSAsim Jamshed 	   1) ack_seq is old
37676404edcSAsim Jamshed 	   2) payload length is 0.
37776404edcSAsim Jamshed 	   3) advertised window not changed.
37876404edcSAsim Jamshed 	   4) there is outstanding unacknowledged data
37976404edcSAsim Jamshed 	   5) ack_seq == snd_una
38076404edcSAsim Jamshed 	 */
38176404edcSAsim Jamshed 
38276404edcSAsim Jamshed 	dup = FALSE;
38376404edcSAsim Jamshed 	if (TCP_SEQ_LT(ack_seq, cur_stream->snd_nxt)) {
38476404edcSAsim Jamshed 		if (ack_seq == cur_stream->rcvvar->last_ack_seq && pctx->p.payloadlen == 0) {
38576404edcSAsim Jamshed 			if (cur_stream->rcvvar->snd_wl2 + sndvar->peer_wnd == right_wnd_edge) {
38676404edcSAsim Jamshed 				if (cur_stream->rcvvar->dup_acks + 1 > cur_stream->rcvvar->dup_acks) {
38776404edcSAsim Jamshed 					cur_stream->rcvvar->dup_acks++;
38876404edcSAsim Jamshed 				}
38976404edcSAsim Jamshed 				dup = TRUE;
39076404edcSAsim Jamshed 			}
39176404edcSAsim Jamshed 		}
39276404edcSAsim Jamshed 	}
39376404edcSAsim Jamshed 	if (!dup) {
39476404edcSAsim Jamshed 		cur_stream->rcvvar->dup_acks = 0;
39576404edcSAsim Jamshed 		cur_stream->rcvvar->last_ack_seq = ack_seq;
39676404edcSAsim Jamshed 	}
39776404edcSAsim Jamshed 
39876404edcSAsim Jamshed 	/* Fast retransmission */
39976404edcSAsim Jamshed 	if (dup && cur_stream->rcvvar->dup_acks == 3) {
40076404edcSAsim Jamshed 		TRACE_LOSS("Triple duplicated ACKs!! ack_seq: %u\n", ack_seq);
40176404edcSAsim Jamshed 		if (TCP_SEQ_LT(ack_seq, cur_stream->snd_nxt)) {
40276404edcSAsim Jamshed 			TRACE_LOSS("Reducing snd_nxt from %u to %u\n",
40376404edcSAsim Jamshed 					cur_stream->snd_nxt, ack_seq);
40476404edcSAsim Jamshed #if RTM_STAT
40576404edcSAsim Jamshed 			sndvar->rstat.tdp_ack_cnt++;
40676404edcSAsim Jamshed 			sndvar->rstat.tdp_ack_bytes += (cur_stream->snd_nxt - ack_seq);
40776404edcSAsim Jamshed #endif
40876404edcSAsim Jamshed 			if (ack_seq != sndvar->snd_una) {
40976404edcSAsim Jamshed 				TRACE_DBG("ack_seq and snd_una mismatch on tdp ack. "
41076404edcSAsim Jamshed 						"ack_seq: %u, snd_una: %u\n",
41176404edcSAsim Jamshed 						ack_seq, sndvar->snd_una);
41276404edcSAsim Jamshed 			}
41376404edcSAsim Jamshed 			cur_stream->snd_nxt = ack_seq;
41476404edcSAsim Jamshed 		}
41576404edcSAsim Jamshed 
41676404edcSAsim Jamshed 		/* update congestion control variables */
41776404edcSAsim Jamshed 		/* ssthresh to half of min of cwnd and peer wnd */
41876404edcSAsim Jamshed 		sndvar->ssthresh = MIN(sndvar->cwnd, sndvar->peer_wnd) / 2;
41976404edcSAsim Jamshed 		if (sndvar->ssthresh < 2 * sndvar->mss) {
42076404edcSAsim Jamshed 			sndvar->ssthresh = 2 * sndvar->mss;
42176404edcSAsim Jamshed 		}
42276404edcSAsim Jamshed 		sndvar->cwnd = sndvar->ssthresh + 3 * sndvar->mss;
42376404edcSAsim Jamshed 		TRACE_CONG("Fast retransmission. cwnd: %u, ssthresh: %u\n",
42476404edcSAsim Jamshed 				sndvar->cwnd, sndvar->ssthresh);
42576404edcSAsim Jamshed 
42676404edcSAsim Jamshed 		/* count number of retransmissions */
42776404edcSAsim Jamshed 		if (sndvar->nrtx < TCP_MAX_RTX) {
42876404edcSAsim Jamshed 			sndvar->nrtx++;
42976404edcSAsim Jamshed 		} else {
43076404edcSAsim Jamshed 			TRACE_DBG("Exceed MAX_RTX.\n");
43176404edcSAsim Jamshed 		}
43276404edcSAsim Jamshed 
43376404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_DATA;
43476404edcSAsim Jamshed 
43576404edcSAsim Jamshed 	} else if (cur_stream->rcvvar->dup_acks > 3) {
43676404edcSAsim Jamshed 		/* Inflate congestion window until before overflow */
43776404edcSAsim Jamshed 		if ((uint32_t)(sndvar->cwnd + sndvar->mss) > sndvar->cwnd) {
43876404edcSAsim Jamshed 			sndvar->cwnd += sndvar->mss;
43976404edcSAsim Jamshed 			TRACE_CONG("Dupack cwnd inflate. cwnd: %u, ssthresh: %u\n",
44076404edcSAsim Jamshed 					sndvar->cwnd, sndvar->ssthresh);
44176404edcSAsim Jamshed 		}
44276404edcSAsim Jamshed 	}
44376404edcSAsim Jamshed 
44476404edcSAsim Jamshed #if TCP_OPT_SACK_ENABLED
44576404edcSAsim Jamshed 	ParseSACKOption(cur_stream, ack_seq, (uint8_t *)tcph + TCP_HEADER_LEN,
44676404edcSAsim Jamshed 			(tcph->doff << 2) - TCP_HEADER_LEN);
44776404edcSAsim Jamshed #endif /* TCP_OPT_SACK_ENABLED */
44876404edcSAsim Jamshed 
44976404edcSAsim Jamshed #if RECOVERY_AFTER_LOSS
45076404edcSAsim Jamshed 	/* updating snd_nxt (when recovered from loss) */
45176404edcSAsim Jamshed 	if (TCP_SEQ_GT(ack_seq, cur_stream->snd_nxt)) {
45276404edcSAsim Jamshed #if RTM_STAT
45376404edcSAsim Jamshed 		sndvar->rstat.ack_upd_cnt++;
45476404edcSAsim Jamshed 		sndvar->rstat.ack_upd_bytes += (ack_seq - cur_stream->snd_nxt);
45576404edcSAsim Jamshed #endif
45676404edcSAsim Jamshed 		TRACE_LOSS("Updating snd_nxt from %u to %u\n",
45776404edcSAsim Jamshed 				cur_stream->snd_nxt, ack_seq);
45876404edcSAsim Jamshed 		cur_stream->snd_nxt = ack_seq;
45976404edcSAsim Jamshed 		if (sndvar->sndbuf->len == 0) {
46076404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
46176404edcSAsim Jamshed 				RemoveFromSendList(mtcp, cur_stream);
46276404edcSAsim Jamshed 		}
46376404edcSAsim Jamshed 	}
46476404edcSAsim Jamshed #endif
46576404edcSAsim Jamshed 
46676404edcSAsim Jamshed 	/* If ack_seq is previously acked, return */
46776404edcSAsim Jamshed 	if (TCP_SEQ_GEQ(sndvar->sndbuf->head_seq, ack_seq)) {
46876404edcSAsim Jamshed 		return;
46976404edcSAsim Jamshed 	}
47076404edcSAsim Jamshed 
47176404edcSAsim Jamshed 	/* Remove acked sequence from send buffer */
47276404edcSAsim Jamshed 	rmlen = ack_seq - sndvar->sndbuf->head_seq;
47376404edcSAsim Jamshed 	if (rmlen > 0) {
47476404edcSAsim Jamshed 		/* Routine goes here only if there is new payload (not retransmitted) */
47576404edcSAsim Jamshed 		uint16_t packets;
47676404edcSAsim Jamshed 
47776404edcSAsim Jamshed 		/* If acks new data */
47876404edcSAsim Jamshed 		packets = rmlen / sndvar->eff_mss;
47976404edcSAsim Jamshed 		if ((rmlen / sndvar->eff_mss) * sndvar->eff_mss > rmlen) {
48076404edcSAsim Jamshed 			packets++;
48176404edcSAsim Jamshed 		}
48276404edcSAsim Jamshed 
48376404edcSAsim Jamshed 		/* Estimate RTT and calculate rto */
48476404edcSAsim Jamshed 		if (cur_stream->saw_timestamp) {
48576404edcSAsim Jamshed 			EstimateRTT(mtcp, cur_stream,
48676404edcSAsim Jamshed 					pctx->p.cur_ts - cur_stream->rcvvar->ts_lastack_rcvd);
48776404edcSAsim Jamshed 			sndvar->rto = (cur_stream->rcvvar->srtt >> 3) + cur_stream->rcvvar->rttvar;
48876404edcSAsim Jamshed 			assert(sndvar->rto > 0);
48976404edcSAsim Jamshed 		} else {
49076404edcSAsim Jamshed 			//TODO: Need to implement timestamp estimation without timestamp
49176404edcSAsim Jamshed 			TRACE_RTT("NOT IMPLEMENTED.\n");
49276404edcSAsim Jamshed 		}
49376404edcSAsim Jamshed 
49476404edcSAsim Jamshed 		/* Update congestion control variables */
49576404edcSAsim Jamshed 		if (cur_stream->state >= TCP_ST_ESTABLISHED) {
49676404edcSAsim Jamshed 			if (sndvar->cwnd < sndvar->ssthresh) {
49776404edcSAsim Jamshed 				if ((sndvar->cwnd + sndvar->mss) > sndvar->cwnd) {
49876404edcSAsim Jamshed 					sndvar->cwnd += (sndvar->mss * packets);
49976404edcSAsim Jamshed 				}
50076404edcSAsim Jamshed 				TRACE_CONG("slow start cwnd: %u, ssthresh: %u\n",
50176404edcSAsim Jamshed 						sndvar->cwnd, sndvar->ssthresh);
50276404edcSAsim Jamshed 			} else {
50376404edcSAsim Jamshed 				uint32_t new_cwnd = sndvar->cwnd +
50476404edcSAsim Jamshed 						packets * sndvar->mss * sndvar->mss /
50576404edcSAsim Jamshed 						sndvar->cwnd;
50676404edcSAsim Jamshed 				if (new_cwnd > sndvar->cwnd) {
50776404edcSAsim Jamshed 					sndvar->cwnd = new_cwnd;
50876404edcSAsim Jamshed 				}
50976404edcSAsim Jamshed 				//TRACE_CONG("congestion avoidance cwnd: %u, ssthresh: %u\n",
51076404edcSAsim Jamshed 				//		sndvar->cwnd, sndvar->ssthresh);
51176404edcSAsim Jamshed 			}
51276404edcSAsim Jamshed 		}
51376404edcSAsim Jamshed 
51476404edcSAsim Jamshed 		if (SBUF_LOCK(&sndvar->write_lock)) {
51576404edcSAsim Jamshed 			if (errno == EDEADLK)
51676404edcSAsim Jamshed 				perror("ProcessACK: write_lock blocked\n");
51776404edcSAsim Jamshed 			assert(0);
51876404edcSAsim Jamshed 		}
51976404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
52076404edcSAsim Jamshed 			SBRemove(mtcp->rbm_snd, sndvar->sndbuf, rmlen);
52176404edcSAsim Jamshed 		sndvar->snd_una = ack_seq;
52276404edcSAsim Jamshed 		snd_wnd_prev = sndvar->snd_wnd;
52376404edcSAsim Jamshed 		sndvar->snd_wnd = sndvar->sndbuf->size - sndvar->sndbuf->len;
52476404edcSAsim Jamshed 
52576404edcSAsim Jamshed 		/* If there was no available sending window */
52676404edcSAsim Jamshed 		/* notify the newly available window to application */
52776404edcSAsim Jamshed #if SELECTIVE_WRITE_EVENT_NOTIFY
52876404edcSAsim Jamshed 		if (snd_wnd_prev <= 0) {
52976404edcSAsim Jamshed #endif /* SELECTIVE_WRITE_EVENT_NOTIFY */
53076404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
53176404edcSAsim Jamshed 				RaiseWriteEvent(mtcp, cur_stream);
53276404edcSAsim Jamshed #if SELECTIVE_WRITE_EVENT_NOTIFY
53376404edcSAsim Jamshed 		}
53476404edcSAsim Jamshed #endif /* SELECTIVE_WRITE_EVENT_NOTIFY */
53576404edcSAsim Jamshed 
53676404edcSAsim Jamshed 		SBUF_UNLOCK(&sndvar->write_lock);
53776404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
53876404edcSAsim Jamshed 			UpdateRetransmissionTimer(mtcp, cur_stream, pctx->p.cur_ts);
53976404edcSAsim Jamshed 	}
54076404edcSAsim Jamshed }
54176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
54276404edcSAsim Jamshed /* ProcessTCPPayload: merges TCP payload using receive ring buffer            */
54376404edcSAsim Jamshed /* Return: TRUE (1) in normal case, FALSE (0) if immediate ACK is required    */
54476404edcSAsim Jamshed /* CAUTION: should only be called at ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2      */
54576404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
54676404edcSAsim Jamshed static inline int
ProcessTCPPayload(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)54776404edcSAsim Jamshed ProcessTCPPayload(mtcp_manager_t mtcp, tcp_stream *cur_stream,
54876404edcSAsim Jamshed 				struct pkt_ctx *pctx)
54976404edcSAsim Jamshed {
55076404edcSAsim Jamshed 	struct tcp_recv_vars *rcvvar = cur_stream->rcvvar;
55176404edcSAsim Jamshed 	uint32_t prev_rcv_nxt;
55276404edcSAsim Jamshed 	int ret = -1;
55376404edcSAsim Jamshed 	bool read_lock;
55476404edcSAsim Jamshed 	struct socket_map *walk;
55576404edcSAsim Jamshed 
55676404edcSAsim Jamshed 	if (!cur_stream->buffer_mgmt)
55776404edcSAsim Jamshed 		return FALSE;
55876404edcSAsim Jamshed 
55976404edcSAsim Jamshed 	/* if seq and segment length is lower than rcv_nxt, ignore and send ack */
560*05e3289cSYoungGyoun 	if (TCP_SEQ_LT(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt))
56176404edcSAsim Jamshed 		return FALSE;
562*05e3289cSYoungGyoun 
563*05e3289cSYoungGyoun 	/*
564*05e3289cSYoungGyoun 	TRACE_DEBUG("pctx->p.seq = %u, pctx->p.payloadlen = %d / cur_stream->rcv_nxt = %u, "
565*05e3289cSYoungGyoun 	            "rcvvar->rcv_wnd = %u\n",
566*05e3289cSYoungGyoun 				pctx->p.seq, pctx->p.payloadlen, cur_stream->rcv_nxt, rcvvar->rcv_wnd);
567*05e3289cSYoungGyoun 	*/
568*05e3289cSYoungGyoun 
56976404edcSAsim Jamshed 	/* if payload exceeds receiving buffer, drop and send ack */
57076404edcSAsim Jamshed 	if (TCP_SEQ_GT(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt + rcvvar->rcv_wnd)) {
571*05e3289cSYoungGyoun 		/* MOS_ON_ERROR: payload outside the window arrives */
572*05e3289cSYoungGyoun 		if (cur_stream->side == MOS_SIDE_CLI) {
573*05e3289cSYoungGyoun 			SOCKQ_FOREACH_REVERSE(walk, &cur_stream->msocks) {
574*05e3289cSYoungGyoun 				HandleCallback(mtcp, MOS_NULL, walk, cur_stream->side,
575*05e3289cSYoungGyoun 					       pctx, MOS_ON_ERROR);
576*05e3289cSYoungGyoun 			} SOCKQ_FOREACH_END;
577*05e3289cSYoungGyoun 		} else { /* cur_stream->side == MOS_SIDE_SVR */
57876404edcSAsim Jamshed 			SOCKQ_FOREACH_START(walk, &cur_stream->msocks) {
57976404edcSAsim Jamshed 				HandleCallback(mtcp, MOS_NULL, walk, cur_stream->side,
58076404edcSAsim Jamshed 					       pctx, MOS_ON_ERROR);
58176404edcSAsim Jamshed 			} SOCKQ_FOREACH_END;
582*05e3289cSYoungGyoun 		}
58376404edcSAsim Jamshed 		return FALSE;
58476404edcSAsim Jamshed 	}
58576404edcSAsim Jamshed 
58676404edcSAsim Jamshed 	/* allocate receive buffer if not exist */
58776404edcSAsim Jamshed 	if (!rcvvar->rcvbuf) {
58876404edcSAsim Jamshed 		rcvvar->rcvbuf = tcprb_new(mtcp->bufseg_pool, g_config.mos->rmem_size, cur_stream->buffer_mgmt);
58976404edcSAsim Jamshed 		if (!rcvvar->rcvbuf) {
59076404edcSAsim Jamshed 			TRACE_ERROR("Stream %d: Failed to allocate receive buffer.\n",
59176404edcSAsim Jamshed 				    cur_stream->id);
59276404edcSAsim Jamshed 			cur_stream->state = TCP_ST_CLOSED_RSVD;
59376404edcSAsim Jamshed 			cur_stream->close_reason = TCP_NO_MEM;
59476404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
59576404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
59676404edcSAsim Jamshed 				RaiseErrorEvent(mtcp, cur_stream);
59776404edcSAsim Jamshed 
59876404edcSAsim Jamshed 			return ERROR;
59976404edcSAsim Jamshed 		}
60076404edcSAsim Jamshed 	}
60176404edcSAsim Jamshed 
60276404edcSAsim Jamshed 	read_lock = HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM);
60376404edcSAsim Jamshed 
60476404edcSAsim Jamshed 	if (read_lock && SBUF_LOCK(&rcvvar->read_lock)) {
60576404edcSAsim Jamshed 		if (errno == EDEADLK)
60676404edcSAsim Jamshed 			perror("ProcessTCPPayload: read_lock blocked\n");
60776404edcSAsim Jamshed 		assert(0);
60876404edcSAsim Jamshed 	}
60976404edcSAsim Jamshed 
61076404edcSAsim Jamshed 	prev_rcv_nxt = cur_stream->rcv_nxt;
61176404edcSAsim Jamshed 
61276404edcSAsim Jamshed 	tcprb_t *rb = rcvvar->rcvbuf;
61376404edcSAsim Jamshed 	loff_t off = seq2loff(rb, pctx->p.seq, (rcvvar->irs + 1));
61476404edcSAsim Jamshed 	if (off >= 0) {
61576404edcSAsim Jamshed 		if (!HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM) &&
61676404edcSAsim Jamshed 			HAS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE))
61776404edcSAsim Jamshed 			tcprb_setpile(rb, rb->pile + tcprb_cflen(rb));
618*05e3289cSYoungGyoun 
619*05e3289cSYoungGyoun 		/* policy on the buffer outrun case
620*05e3289cSYoungGyoun 		 * (1) has MOS_SOCK_MONITOR_STREAM_ACTIVE -> raise MOS_ON_ERROR first
621*05e3289cSYoungGyoun 		 *                                           and then overwrite payload
622*05e3289cSYoungGyoun 		 * (2) MOS_SOCK_STREAM only -> overwrite immediately
623*05e3289cSYoungGyoun 		 */
624*05e3289cSYoungGyoun 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE)) {
625*05e3289cSYoungGyoun 			int fflen = tcprb_fflen(rb, pctx->p.payload, pctx->p.payloadlen, off);
626*05e3289cSYoungGyoun 			if (fflen > 0) {
627*05e3289cSYoungGyoun 				/* if we detect buffer outrun, raise a MOS_ON_ERROR event.
628*05e3289cSYoungGyoun 				   then, the application can either consume read buffer data,
629*05e3289cSYoungGyoun 				   or increase the read buffer size */
630*05e3289cSYoungGyoun 				if (cur_stream->side == MOS_SIDE_CLI) {
631*05e3289cSYoungGyoun 					SOCKQ_FOREACH_REVERSE(walk, &cur_stream->msocks) {
632*05e3289cSYoungGyoun 					if (walk->monitor_stream->peek_offset[cur_stream->side]
633*05e3289cSYoungGyoun 						< rb->head + fflen)
63476404edcSAsim Jamshed 						HandleCallback(mtcp, MOS_NULL, walk, cur_stream->side,
63576404edcSAsim Jamshed 									   pctx, MOS_ON_ERROR);
63676404edcSAsim Jamshed 					} SOCKQ_FOREACH_END;
637*05e3289cSYoungGyoun 				} else {
638*05e3289cSYoungGyoun 					SOCKQ_FOREACH_START(walk, &cur_stream->msocks) {
639*05e3289cSYoungGyoun 						if (walk->monitor_stream->peek_offset[cur_stream->side]
640*05e3289cSYoungGyoun 							< rb->head + fflen)
641*05e3289cSYoungGyoun 						HandleCallback(mtcp, MOS_NULL, walk, cur_stream->side,
642*05e3289cSYoungGyoun 									   pctx, MOS_ON_ERROR);
643*05e3289cSYoungGyoun 					} SOCKQ_FOREACH_END;
64476404edcSAsim Jamshed 				}
64576404edcSAsim Jamshed 			}
646*05e3289cSYoungGyoun 		}
647*05e3289cSYoungGyoun 		/* try writing packet payload to buffer */
648*05e3289cSYoungGyoun 		ret = tcprb_pwrite(rb, pctx->p.payload, pctx->p.payloadlen, off);
649*05e3289cSYoungGyoun 	}
65076404edcSAsim Jamshed 	/* TODO: update monitor vars */
6513b6b9ba6SAsim Jamshed 
65276404edcSAsim Jamshed 	/*
65376404edcSAsim Jamshed 	 * error can pop up due to disabled buffered management
65476404edcSAsim Jamshed 	 * (only in monitor mode). In that case, ignore the warning
65576404edcSAsim Jamshed 	 * message.
65676404edcSAsim Jamshed 	 */
65776404edcSAsim Jamshed 	if (ret < 0 && cur_stream->buffer_mgmt && mtcp->num_msp == 0)
658*05e3289cSYoungGyoun 		TRACE_ERROR("Cannot merge payload.\n");
65976404edcSAsim Jamshed 
66076404edcSAsim Jamshed 	/* discard the buffer if the state is FIN_WAIT_1 or FIN_WAIT_2,
66176404edcSAsim Jamshed 	   meaning that the connection is already closed by the application */
66276404edcSAsim Jamshed 	loff_t cftail = rb->pile + tcprb_cflen(rb);
66376404edcSAsim Jamshed 	if (cur_stream->state == TCP_ST_FIN_WAIT_1 ||
66476404edcSAsim Jamshed 	    cur_stream->state == TCP_ST_FIN_WAIT_2) {
66576404edcSAsim Jamshed 		/* XXX: Do we really need to update recv vars? */
66676404edcSAsim Jamshed 		tcprb_setpile(rb, cftail);
66776404edcSAsim Jamshed 	}
66876404edcSAsim Jamshed 	if (cftail > 0 && (rcvvar->irs + 1) + cftail > cur_stream->rcv_nxt) {
66976404edcSAsim Jamshed 		RAISE_DEBUG_EVENT(mtcp, cur_stream,
67076404edcSAsim Jamshed 				"Move rcv_nxt from %u to %u.\n",
67176404edcSAsim Jamshed 				cur_stream->rcv_nxt, (rcvvar->irs + 1) + cftail);
67276404edcSAsim Jamshed 		cur_stream->rcv_nxt = (rcvvar->irs + 1) + cftail;
67376404edcSAsim Jamshed 	}
67476404edcSAsim Jamshed 	assert(cftail - rb->pile >= 0);
67576404edcSAsim Jamshed 	rcvvar->rcv_wnd = rb->len - (cftail - rb->pile);
67676404edcSAsim Jamshed 
67776404edcSAsim Jamshed 	if (read_lock)
67876404edcSAsim Jamshed 		SBUF_UNLOCK(&rcvvar->read_lock);
67976404edcSAsim Jamshed 
68076404edcSAsim Jamshed 
68176404edcSAsim Jamshed 	if (TCP_SEQ_LEQ(cur_stream->rcv_nxt, prev_rcv_nxt)) {
68276404edcSAsim Jamshed 		/* There are some lost packets */
68376404edcSAsim Jamshed 		return FALSE;
68476404edcSAsim Jamshed 	}
68576404edcSAsim Jamshed 
68676404edcSAsim Jamshed 	TRACE_EPOLL("Stream %d data arrived. "
6874cb4e140SAsim Jamshed 		    "len: %d, ET: %llu, IN: %llu, OUT: %llu\n",
688cafe7743SAsim Jamshed 		    cur_stream->id, pctx->p.payloadlen,
6894cb4e140SAsim Jamshed 		    cur_stream->socket? (unsigned long long)cur_stream->socket->epoll & MOS_EPOLLET : 0,
6904cb4e140SAsim Jamshed 		    cur_stream->socket? (unsigned long long)cur_stream->socket->epoll & MOS_EPOLLIN : 0,
6914cb4e140SAsim Jamshed 		    cur_stream->socket? (unsigned long long)cur_stream->socket->epoll & MOS_EPOLLOUT : 0);
69276404edcSAsim Jamshed 
69376404edcSAsim Jamshed 	if (cur_stream->state == TCP_ST_ESTABLISHED)
69476404edcSAsim Jamshed 		RaiseReadEvent(mtcp, cur_stream);
69576404edcSAsim Jamshed 
69676404edcSAsim Jamshed 	return TRUE;
69776404edcSAsim Jamshed }
69876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
69976404edcSAsim Jamshed static inline void
Handle_TCP_ST_LISTEN(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)70076404edcSAsim Jamshed Handle_TCP_ST_LISTEN (mtcp_manager_t mtcp, tcp_stream* cur_stream,
70176404edcSAsim Jamshed 		struct pkt_ctx *pctx)
70276404edcSAsim Jamshed {
70376404edcSAsim Jamshed 
70476404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
70576404edcSAsim Jamshed 
70676404edcSAsim Jamshed 	if (tcph->syn) {
70703d1e42cSAsim Jamshed 		if (cur_stream->state == TCP_ST_LISTEN)
70803d1e42cSAsim Jamshed 			cur_stream->rcv_nxt++;
70976404edcSAsim Jamshed 		cur_stream->state = TCP_ST_SYN_RCVD;
71076404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE | MOS_ON_CONN_START;
71176404edcSAsim Jamshed 		TRACE_STATE("Stream %d: TCP_ST_SYN_RCVD\n", cur_stream->id);
71276404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
71376404edcSAsim Jamshed 		if (IS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE)) {
71476404edcSAsim Jamshed 			/**
71576404edcSAsim Jamshed 			 * Passive stream context needs to initialize irs and rcv_nxt
71676404edcSAsim Jamshed 			 * as it is not set neither during createserverstream or monitor
71776404edcSAsim Jamshed 			 * creation.
71876404edcSAsim Jamshed 			 */
71976404edcSAsim Jamshed 			cur_stream->rcvvar->irs =
72076404edcSAsim Jamshed 				cur_stream->rcv_nxt = pctx->p.seq;
72176404edcSAsim Jamshed 		}
72276404edcSAsim Jamshed 	} else {
72376404edcSAsim Jamshed 		CTRACE_ERROR("Stream %d (TCP_ST_LISTEN): "
72476404edcSAsim Jamshed 				"Packet without SYN.\n", cur_stream->id);
72576404edcSAsim Jamshed 	}
72676404edcSAsim Jamshed 
72776404edcSAsim Jamshed }
72876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
72976404edcSAsim Jamshed static inline void
Handle_TCP_ST_SYN_SENT(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)73076404edcSAsim Jamshed Handle_TCP_ST_SYN_SENT (mtcp_manager_t mtcp, tcp_stream* cur_stream,
73176404edcSAsim Jamshed 		struct pkt_ctx *pctx)
73276404edcSAsim Jamshed {
73376404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
73476404edcSAsim Jamshed 
73576404edcSAsim Jamshed 	/* when active open */
73676404edcSAsim Jamshed 	if (tcph->ack) {
73776404edcSAsim Jamshed 		/* filter the unacceptable acks */
73876404edcSAsim Jamshed 		if (TCP_SEQ_LEQ(pctx->p.ack_seq, cur_stream->sndvar->iss)
73976404edcSAsim Jamshed #ifndef BE_RESILIENT_TO_PACKET_DROP
74076404edcSAsim Jamshed 			|| TCP_SEQ_GT(pctx->p.ack_seq, cur_stream->snd_nxt)
74176404edcSAsim Jamshed #endif
74276404edcSAsim Jamshed 				) {
74376404edcSAsim Jamshed 			if (!tcph->rst) {
74476404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_SEND_RST;
74576404edcSAsim Jamshed 			}
74676404edcSAsim Jamshed 			return;
74776404edcSAsim Jamshed 		}
74876404edcSAsim Jamshed 		/* accept the ack */
74976404edcSAsim Jamshed 		cur_stream->sndvar->snd_una++;
75076404edcSAsim Jamshed 	}
75176404edcSAsim Jamshed 
75276404edcSAsim Jamshed 	if (tcph->rst) {
75376404edcSAsim Jamshed 		if (tcph->ack) {
75476404edcSAsim Jamshed 			cur_stream->state = TCP_ST_CLOSED_RSVD;
75576404edcSAsim Jamshed 			cur_stream->close_reason = TCP_RESET;
75676404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
75776404edcSAsim Jamshed 			if (cur_stream->socket) {
75876404edcSAsim Jamshed 				if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
75976404edcSAsim Jamshed 					RaiseErrorEvent(mtcp, cur_stream);
76076404edcSAsim Jamshed 			} else {
76176404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_DESTROY;
76276404edcSAsim Jamshed 			}
76376404edcSAsim Jamshed 		}
76476404edcSAsim Jamshed 		return;
76576404edcSAsim Jamshed 	}
76676404edcSAsim Jamshed 
76776404edcSAsim Jamshed 	if (tcph->ack
76876404edcSAsim Jamshed #ifndef BE_RESILIENT_TO_PACKET_DROP
76976404edcSAsim Jamshed 		/* If we already lost SYNACK, let the ACK packet do the SYNACK's role */
77076404edcSAsim Jamshed 		&& tcph->syn
77176404edcSAsim Jamshed #endif
77276404edcSAsim Jamshed 	   ) {
77376404edcSAsim Jamshed 		int ret = HandleActiveOpen(mtcp, cur_stream, pctx);
77476404edcSAsim Jamshed 		if (!ret) {
77576404edcSAsim Jamshed 			return;
77676404edcSAsim Jamshed 		}
77776404edcSAsim Jamshed 
77876404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
77976404edcSAsim Jamshed 		if (!tcph->syn) {
78076404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
78176404edcSAsim Jamshed 					"We missed SYNACK. Replace it with an ACK packet.\n");
78276404edcSAsim Jamshed 			/* correct some variables */
78376404edcSAsim Jamshed 			cur_stream->rcv_nxt = pctx->p.seq;
78476404edcSAsim Jamshed 		}
78576404edcSAsim Jamshed #endif
78676404edcSAsim Jamshed 
78776404edcSAsim Jamshed 		cur_stream->sndvar->nrtx = 0;
78876404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
78976404edcSAsim Jamshed 			RemoveFromRTOList(mtcp, cur_stream);
79076404edcSAsim Jamshed 		cur_stream->state = TCP_ST_ESTABLISHED;
79176404edcSAsim Jamshed 		cur_stream->cb_events |= /*MOS_ON_CONN_SETUP |*/ MOS_ON_TCP_STATE_CHANGE;
79276404edcSAsim Jamshed 		TRACE_STATE("Stream %d: TCP_ST_ESTABLISHED\n", cur_stream->id);
79376404edcSAsim Jamshed 
79476404edcSAsim Jamshed 		if (cur_stream->socket) {
79576404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
79676404edcSAsim Jamshed 				RaiseWriteEvent(mtcp, cur_stream);
79776404edcSAsim Jamshed 		} else {
79876404edcSAsim Jamshed 			TRACE_STATE("Stream %d: ESTABLISHED, but no socket\n", cur_stream->id);
79976404edcSAsim Jamshed 			cur_stream->close_reason = TCP_ACTIVE_CLOSE;
80076404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_RST;
80176404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_DESTROY;
80276404edcSAsim Jamshed 		}
80376404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
80476404edcSAsim Jamshed 		if (g_config.mos->tcp_timeout > 0)
80576404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
80676404edcSAsim Jamshed 				AddtoTimeoutList(mtcp, cur_stream);
80776404edcSAsim Jamshed 
80876404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
80976404edcSAsim Jamshed 		/* Handle this ack packet */
81076404edcSAsim Jamshed 		if (!tcph->syn)
81176404edcSAsim Jamshed 			Handle_TCP_ST_ESTABLISHED(mtcp, cur_stream, pctx);
81276404edcSAsim Jamshed #endif
81376404edcSAsim Jamshed 
81476404edcSAsim Jamshed 	} else if (tcph->syn) {
81576404edcSAsim Jamshed 		cur_stream->state = TCP_ST_SYN_RCVD;
81676404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
81776404edcSAsim Jamshed 		TRACE_STATE("Stream %d: TCP_ST_SYN_RCVD\n", cur_stream->id);
81876404edcSAsim Jamshed 		cur_stream->snd_nxt = cur_stream->sndvar->iss;
81976404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
82076404edcSAsim Jamshed 	}
82176404edcSAsim Jamshed }
82276404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
82376404edcSAsim Jamshed static inline void
Handle_TCP_ST_SYN_RCVD(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)82476404edcSAsim Jamshed Handle_TCP_ST_SYN_RCVD (mtcp_manager_t mtcp, tcp_stream* cur_stream,
82576404edcSAsim Jamshed 		struct pkt_ctx *pctx)
82676404edcSAsim Jamshed {
82776404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
82876404edcSAsim Jamshed 	struct tcp_send_vars *sndvar = cur_stream->sndvar;
82976404edcSAsim Jamshed 	int ret;
83076404edcSAsim Jamshed 
83176404edcSAsim Jamshed 	if (tcph->ack) {
83276404edcSAsim Jamshed 		uint32_t prior_cwnd;
83376404edcSAsim Jamshed 		/* NOTE: We do not validate the ack number because first few packets
83476404edcSAsim Jamshed 		 * can also come out of order */
83576404edcSAsim Jamshed 
83676404edcSAsim Jamshed 		sndvar->snd_una++;
83776404edcSAsim Jamshed 		cur_stream->snd_nxt = pctx->p.ack_seq;
83876404edcSAsim Jamshed 		prior_cwnd = sndvar->cwnd;
83976404edcSAsim Jamshed 		sndvar->cwnd = ((prior_cwnd == 1)?
84076404edcSAsim Jamshed 				(sndvar->mss * 2): sndvar->mss);
84176404edcSAsim Jamshed 
84276404edcSAsim Jamshed 		//UpdateRetransmissionTimer(mtcp, cur_stream, cur_ts);
84376404edcSAsim Jamshed 		sndvar->nrtx = 0;
84476404edcSAsim Jamshed 		cur_stream->rcv_nxt = cur_stream->rcvvar->irs + 1;
84576404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
84676404edcSAsim Jamshed 			RemoveFromRTOList(mtcp, cur_stream);
84776404edcSAsim Jamshed 
84876404edcSAsim Jamshed 		cur_stream->state = TCP_ST_ESTABLISHED;
84976404edcSAsim Jamshed 		cur_stream->cb_events |= /*MOS_ON_CONN_SETUP |*/ MOS_ON_TCP_STATE_CHANGE;
85076404edcSAsim Jamshed 		TRACE_STATE("Stream %d: TCP_ST_ESTABLISHED\n", cur_stream->id);
85176404edcSAsim Jamshed 
85276404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
85376404edcSAsim Jamshed 		if (pctx->p.ack_seq != sndvar->iss + 1)
85476404edcSAsim Jamshed 			Handle_TCP_ST_ESTABLISHED(mtcp, cur_stream, pctx);
85576404edcSAsim Jamshed #endif
85676404edcSAsim Jamshed 
85776404edcSAsim Jamshed 		/* update listening socket */
85876404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM)) {
85976404edcSAsim Jamshed 
86076404edcSAsim Jamshed 			struct tcp_listener *listener = mtcp->listener;
86176404edcSAsim Jamshed 
86276404edcSAsim Jamshed 			ret = StreamEnqueue(listener->acceptq, cur_stream);
86376404edcSAsim Jamshed 			if (ret < 0) {
86476404edcSAsim Jamshed 				TRACE_ERROR("Stream %d: Failed to enqueue to "
86576404edcSAsim Jamshed 						"the listen backlog!\n", cur_stream->id);
86676404edcSAsim Jamshed 				cur_stream->close_reason = TCP_NOT_ACCEPTED;
86776404edcSAsim Jamshed 				cur_stream->state = TCP_ST_CLOSED_RSVD;
86876404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
86976404edcSAsim Jamshed 				TRACE_STATE("Stream %d: TCP_ST_CLOSED_RSVD\n", cur_stream->id);
87076404edcSAsim Jamshed 				cur_stream->actions |= MOS_ACT_SEND_CONTROL;
87176404edcSAsim Jamshed 			}
87276404edcSAsim Jamshed 
87376404edcSAsim Jamshed 			/* raise an event to the listening socket */
87476404edcSAsim Jamshed 			if (listener->socket && (listener->socket->epoll & MOS_EPOLLIN)) {
87576404edcSAsim Jamshed 				AddEpollEvent(mtcp->ep,
87676404edcSAsim Jamshed 						MOS_EVENT_QUEUE, listener->socket, MOS_EPOLLIN);
87776404edcSAsim Jamshed 			}
87876404edcSAsim Jamshed 		}
87976404edcSAsim Jamshed 
88076404edcSAsim Jamshed 		//TRACE_DBG("Stream %d inserted into acceptq.\n", cur_stream->id);
88176404edcSAsim Jamshed 		if (g_config.mos->tcp_timeout > 0)
88276404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
88376404edcSAsim Jamshed 				AddtoTimeoutList(mtcp, cur_stream);
88476404edcSAsim Jamshed 
88576404edcSAsim Jamshed 	} else {
88676404edcSAsim Jamshed 		/* Handle retransmitted SYN packet */
88776404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE) &&
88876404edcSAsim Jamshed 			tcph->syn) {
88976404edcSAsim Jamshed 			if (pctx->p.seq == cur_stream->pair_stream->sndvar->iss) {
89076404edcSAsim Jamshed 				TRACE_DBG("syn retransmit! (p.seq = %u / iss = %u)\n",
89176404edcSAsim Jamshed 						  pctx->p.seq, cur_stream->pair_stream->sndvar->iss);
89276404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_REXMIT;
89376404edcSAsim Jamshed 			}
89476404edcSAsim Jamshed 		}
89576404edcSAsim Jamshed 
89676404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): No ACK.\n",
89776404edcSAsim Jamshed 				cur_stream->id);
89876404edcSAsim Jamshed 		/* retransmit SYN/ACK */
89976404edcSAsim Jamshed 		cur_stream->snd_nxt = sndvar->iss;
90076404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
90176404edcSAsim Jamshed 	}
90276404edcSAsim Jamshed }
90376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
90476404edcSAsim Jamshed static inline void
Handle_TCP_ST_ESTABLISHED(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)90576404edcSAsim Jamshed Handle_TCP_ST_ESTABLISHED (mtcp_manager_t mtcp, tcp_stream* cur_stream,
90676404edcSAsim Jamshed 		struct pkt_ctx *pctx)
90776404edcSAsim Jamshed {
90876404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
90976404edcSAsim Jamshed 
91076404edcSAsim Jamshed 	if (tcph->syn) {
91176404edcSAsim Jamshed 		/* Handle retransmitted SYNACK packet */
91276404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE) &&
91376404edcSAsim Jamshed 			tcph->ack) {
91476404edcSAsim Jamshed 			if (pctx->p.seq == cur_stream->pair_stream->sndvar->iss) {
91576404edcSAsim Jamshed 				TRACE_DBG("syn/ack retransmit! (p.seq = %u / iss = %u)\n",
91676404edcSAsim Jamshed 						pctx->p.seq, cur_stream->pair_stream->sndvar->iss);
91776404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_REXMIT;
91876404edcSAsim Jamshed 			}
91976404edcSAsim Jamshed 		}
92076404edcSAsim Jamshed 
92176404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_ESTABLISHED): weird SYN. "
92276404edcSAsim Jamshed 				"seq: %u, expected: %u, ack_seq: %u, expected: %u\n",
92376404edcSAsim Jamshed 				cur_stream->id, pctx->p.seq, cur_stream->rcv_nxt,
92476404edcSAsim Jamshed 				pctx->p.ack_seq, cur_stream->snd_nxt);
92576404edcSAsim Jamshed 		cur_stream->snd_nxt = pctx->p.ack_seq;
92676404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
92776404edcSAsim Jamshed 		return;
92876404edcSAsim Jamshed 	}
92976404edcSAsim Jamshed 
93076404edcSAsim Jamshed 	if (pctx->p.payloadlen > 0) {
93176404edcSAsim Jamshed 		if (ProcessTCPPayload(mtcp, cur_stream, pctx)) {
93276404edcSAsim Jamshed 			/* if return is TRUE, send ACK */
93376404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_AGG;
93476404edcSAsim Jamshed 		} else {
93576404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
93676404edcSAsim Jamshed 		}
93776404edcSAsim Jamshed 	}
93876404edcSAsim Jamshed 
93976404edcSAsim Jamshed 	if (tcph->ack) {
94076404edcSAsim Jamshed 		if (cur_stream->sndvar->sndbuf) {
94176404edcSAsim Jamshed 			ProcessACK(mtcp, cur_stream, pctx);
94276404edcSAsim Jamshed 		}
94376404edcSAsim Jamshed 	}
94476404edcSAsim Jamshed 
94576404edcSAsim Jamshed 	if (tcph->fin) {
94676404edcSAsim Jamshed 		/* process the FIN only if the sequence is valid */
94776404edcSAsim Jamshed 		/* FIN packet is allowed to push payload (should we check for PSH flag)? */
94876404edcSAsim Jamshed 		if (!cur_stream->buffer_mgmt ||
94976404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
95076404edcSAsim Jamshed 			TCP_SEQ_GEQ(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt)
95176404edcSAsim Jamshed #else
95276404edcSAsim Jamshed 			pctx->p.seq + pctx->p.payloadlen == cur_stream->rcv_nxt
95376404edcSAsim Jamshed #endif
95476404edcSAsim Jamshed 			) {
95576404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
95676404edcSAsim Jamshed 			cur_stream->rcv_nxt = pctx->p.seq + pctx->p.payloadlen;
95776404edcSAsim Jamshed #endif
95876404edcSAsim Jamshed 			cur_stream->state = TCP_ST_CLOSE_WAIT;
95976404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
96076404edcSAsim Jamshed 			TRACE_STATE("Stream %d: TCP_ST_CLOSE_WAIT\n", cur_stream->id);
96176404edcSAsim Jamshed 			cur_stream->rcv_nxt++;
96276404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_CONTROL;
96376404edcSAsim Jamshed 
96476404edcSAsim Jamshed 			/* notify FIN to application */
96576404edcSAsim Jamshed 			RaiseReadEvent(mtcp, cur_stream);
96676404edcSAsim Jamshed 		} else {
96776404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
96876404edcSAsim Jamshed 					"Expected %u, but received %u (= %u + %u)\n",
96976404edcSAsim Jamshed 					cur_stream->rcv_nxt, pctx->p.seq + pctx->p.payloadlen,
97076404edcSAsim Jamshed 					pctx->p.seq, pctx->p.payloadlen);
97176404edcSAsim Jamshed 
97276404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
97376404edcSAsim Jamshed 			return;
97476404edcSAsim Jamshed 		}
97576404edcSAsim Jamshed 	}
97676404edcSAsim Jamshed }
97776404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
97876404edcSAsim Jamshed static inline void
Handle_TCP_ST_CLOSE_WAIT(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)97976404edcSAsim Jamshed Handle_TCP_ST_CLOSE_WAIT (mtcp_manager_t mtcp, tcp_stream* cur_stream,
98076404edcSAsim Jamshed 		struct pkt_ctx *pctx)
98176404edcSAsim Jamshed {
98276404edcSAsim Jamshed 	if (TCP_SEQ_LT(pctx->p.seq, cur_stream->rcv_nxt)) {
98376404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_CLOSE_WAIT): "
98476404edcSAsim Jamshed 				"weird seq: %u, expected: %u\n",
98576404edcSAsim Jamshed 				cur_stream->id, pctx->p.seq, cur_stream->rcv_nxt);
98676404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
98776404edcSAsim Jamshed 		return;
98876404edcSAsim Jamshed 	}
98976404edcSAsim Jamshed 
99076404edcSAsim Jamshed 	if (cur_stream->sndvar->sndbuf) {
99176404edcSAsim Jamshed 		ProcessACK(mtcp, cur_stream, pctx);
99276404edcSAsim Jamshed 	}
99376404edcSAsim Jamshed }
99476404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
99576404edcSAsim Jamshed static inline void
Handle_TCP_ST_LAST_ACK(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)99676404edcSAsim Jamshed Handle_TCP_ST_LAST_ACK (mtcp_manager_t mtcp, tcp_stream* cur_stream,
99776404edcSAsim Jamshed 		struct pkt_ctx *pctx)
99876404edcSAsim Jamshed {
99976404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
100076404edcSAsim Jamshed 
100176404edcSAsim Jamshed 	if (TCP_SEQ_LT(pctx->p.seq, cur_stream->rcv_nxt)) {
100276404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_LAST_ACK): "
100376404edcSAsim Jamshed 				"weird seq: %u, expected: %u\n",
100476404edcSAsim Jamshed 				cur_stream->id, pctx->p.seq, cur_stream->rcv_nxt);
100576404edcSAsim Jamshed 		return;
100676404edcSAsim Jamshed 	}
100776404edcSAsim Jamshed 
100876404edcSAsim Jamshed 	if (tcph->ack) {
100976404edcSAsim Jamshed 		if (cur_stream->sndvar->sndbuf) {
101076404edcSAsim Jamshed 			ProcessACK(mtcp, cur_stream, pctx);
101176404edcSAsim Jamshed 		}
101276404edcSAsim Jamshed 
101376404edcSAsim Jamshed 		if (!cur_stream->sndvar->is_fin_sent) {
101476404edcSAsim Jamshed 			/* the case that FIN is not sent yet */
101576404edcSAsim Jamshed 			/* this is not ack for FIN, ignore */
101676404edcSAsim Jamshed 			TRACE_DBG("Stream %d (TCP_ST_LAST_ACK): "
101776404edcSAsim Jamshed 					"No FIN sent yet.\n", cur_stream->id);
101876404edcSAsim Jamshed #ifdef DBGMSG
101976404edcSAsim Jamshed 			DumpIPPacket(mtcp, pctx->p.iph, pctx->p.ip_len);
102076404edcSAsim Jamshed #endif
102176404edcSAsim Jamshed #if DUMP_STREAM
102276404edcSAsim Jamshed 			DumpStream(mtcp, cur_stream);
102376404edcSAsim Jamshed 			DumpControlList(mtcp, mtcp->n_sender[0]);
102476404edcSAsim Jamshed #endif
102576404edcSAsim Jamshed 			return;
102676404edcSAsim Jamshed 		}
102776404edcSAsim Jamshed 
102876404edcSAsim Jamshed 		/* check if ACK of FIN */
102976404edcSAsim Jamshed 		if (pctx->p.ack_seq == cur_stream->sndvar->fss + 1) {
103076404edcSAsim Jamshed 			cur_stream->sndvar->snd_una++;
103176404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
103276404edcSAsim Jamshed 				UpdateRetransmissionTimer(mtcp, cur_stream, pctx->p.cur_ts);
103376404edcSAsim Jamshed 			cur_stream->state = TCP_ST_CLOSED_RSVD;
103476404edcSAsim Jamshed 			cur_stream->close_reason = TCP_PASSIVE_CLOSE;
103576404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
103676404edcSAsim Jamshed 			TRACE_STATE("Stream %d: TCP_ST_CLOSED_RSVD\n",
103776404edcSAsim Jamshed 					cur_stream->id);
103876404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_DESTROY;
103976404edcSAsim Jamshed 		} else {
104076404edcSAsim Jamshed 			TRACE_DBG("Stream %d (TCP_ST_LAST_ACK): Not ACK of FIN. "
104176404edcSAsim Jamshed 					"ack_seq: %u, expected: %u\n",
104276404edcSAsim Jamshed 					cur_stream->id, pctx->p.ack_seq, cur_stream->sndvar->fss + 1);
104376404edcSAsim Jamshed 			//cur_stream->snd_nxt = cur_stream->sndvar->fss;
104476404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_CONTROL;
104576404edcSAsim Jamshed 		}
104676404edcSAsim Jamshed 	} else {
104776404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_LAST_ACK): No ACK\n",
104876404edcSAsim Jamshed 			  cur_stream->id);
104976404edcSAsim Jamshed 		//cur_stream->snd_nxt = cur_stream->sndvar->fss;
105076404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
105176404edcSAsim Jamshed 	}
105276404edcSAsim Jamshed }
105376404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
105476404edcSAsim Jamshed static inline void
Handle_TCP_ST_FIN_WAIT_1(mtcp_manager_t mtcp,tcp_stream * cur_stream,struct pkt_ctx * pctx)105576404edcSAsim Jamshed Handle_TCP_ST_FIN_WAIT_1 (mtcp_manager_t mtcp, tcp_stream* cur_stream,
105676404edcSAsim Jamshed 		struct pkt_ctx *pctx)
105776404edcSAsim Jamshed {
105876404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
105976404edcSAsim Jamshed 
106076404edcSAsim Jamshed 	if (TCP_SEQ_LT(pctx->p.seq, cur_stream->rcv_nxt)) {
106176404edcSAsim Jamshed 		RAISE_DEBUG_EVENT(mtcp, cur_stream,
106276404edcSAsim Jamshed 				"Stream %d (FIN_WAIT_1): "
106376404edcSAsim Jamshed 				"weird seq: %u, expected: %u\n",
106476404edcSAsim Jamshed 				cur_stream->id, pctx->p.seq, cur_stream->rcv_nxt);
106576404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
106676404edcSAsim Jamshed 		return;
106776404edcSAsim Jamshed 	}
106876404edcSAsim Jamshed 
106976404edcSAsim Jamshed 	if (tcph->ack) {
107076404edcSAsim Jamshed 		if (cur_stream->sndvar->sndbuf) {
107176404edcSAsim Jamshed 			ProcessACK(mtcp, cur_stream, pctx);
107276404edcSAsim Jamshed 		}
10738c9e1184SAsim Jamshed #if BE_RESILIENT_TO_PACKET_DROP
107476404edcSAsim Jamshed 		if (cur_stream->sndvar->is_fin_sent &&
107576404edcSAsim Jamshed 			((!HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM) &&
107676404edcSAsim Jamshed 			 pctx->p.ack_seq == cur_stream->sndvar->fss) ||
107776404edcSAsim Jamshed 			 pctx->p.ack_seq == cur_stream->sndvar->fss + 1)) {
10788c9e1184SAsim Jamshed 
10798c9e1184SAsim Jamshed #else
10808c9e1184SAsim Jamshed 		if (cur_stream->sndvar->is_fin_sent &&
10818c9e1184SAsim Jamshed 		    pctx->p.ack_seq == cur_stream->sndvar->fss + 1) {
10828c9e1184SAsim Jamshed #endif
108376404edcSAsim Jamshed 			cur_stream->sndvar->snd_una = pctx->p.ack_seq;
108476404edcSAsim Jamshed 			if (TCP_SEQ_GT(pctx->p.ack_seq, cur_stream->snd_nxt)) {
108576404edcSAsim Jamshed 				TRACE_DBG("Stream %d: update snd_nxt to %u\n",
108676404edcSAsim Jamshed 						cur_stream->id, pctx->p.ack_seq);
108776404edcSAsim Jamshed 				cur_stream->snd_nxt = pctx->p.ack_seq;
108876404edcSAsim Jamshed 			}
108976404edcSAsim Jamshed 			//cur_stream->sndvar->snd_una++;
109076404edcSAsim Jamshed 			//UpdateRetransmissionTimer(mtcp, cur_stream, cur_ts);
109176404edcSAsim Jamshed 			cur_stream->sndvar->nrtx = 0;
109276404edcSAsim Jamshed 			if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
109376404edcSAsim Jamshed 				RemoveFromRTOList(mtcp, cur_stream);
109476404edcSAsim Jamshed 			cur_stream->state = TCP_ST_FIN_WAIT_2;
109576404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
109676404edcSAsim Jamshed 			TRACE_STATE("Stream %d: TCP_ST_FIN_WAIT_2\n",
109776404edcSAsim Jamshed 					cur_stream->id);
109876404edcSAsim Jamshed 		} else {
109976404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
110076404edcSAsim Jamshed 					"Failed to transit to FIN_WAIT_2, "
110176404edcSAsim Jamshed 					"is_fin_sent: %s, ack_seq: %u, sndvar->fss: %u\n",
110276404edcSAsim Jamshed 					cur_stream->sndvar->is_fin_sent ? "true" : "false",
110376404edcSAsim Jamshed 					pctx->p.ack_seq, cur_stream->sndvar->fss);
110476404edcSAsim Jamshed 		}
110576404edcSAsim Jamshed 
110676404edcSAsim Jamshed 	} else {
110776404edcSAsim Jamshed 		RAISE_DEBUG_EVENT(mtcp, cur_stream,
110876404edcSAsim Jamshed 				"Failed to transit to FIN_WAIT_2, "
110976404edcSAsim Jamshed 				"We got a %s%s%s%s%s%s packet.",
111076404edcSAsim Jamshed 				pctx->p.tcph->syn ? "S" : "",
111176404edcSAsim Jamshed 				pctx->p.tcph->fin ? "F" : "",
111276404edcSAsim Jamshed 				pctx->p.tcph->rst ? "R" : "",
111376404edcSAsim Jamshed 				pctx->p.tcph->psh ? "P" : "",
111476404edcSAsim Jamshed 				pctx->p.tcph->urg ? "U" : "",
111576404edcSAsim Jamshed 				pctx->p.tcph->ack ? "A" : "");
111676404edcSAsim Jamshed 
111776404edcSAsim Jamshed 		TRACE_DBG("Stream %d: does not contain an ack!\n",
111876404edcSAsim Jamshed 				cur_stream->id);
111976404edcSAsim Jamshed 		return;
112076404edcSAsim Jamshed 	}
112176404edcSAsim Jamshed 
112276404edcSAsim Jamshed 	if (pctx->p.payloadlen > 0) {
112376404edcSAsim Jamshed 		if (ProcessTCPPayload(mtcp, cur_stream, pctx)) {
112476404edcSAsim Jamshed 			/* if return is TRUE, send ACK */
112576404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_AGG;
112676404edcSAsim Jamshed 		} else {
112776404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
112876404edcSAsim Jamshed 		}
112976404edcSAsim Jamshed 	}
113076404edcSAsim Jamshed 
113176404edcSAsim Jamshed 	if (tcph->fin) {
113276404edcSAsim Jamshed 		/* process the FIN only if the sequence is valid */
113376404edcSAsim Jamshed 		/* FIN packet is allowed to push payload (should we check for PSH flag)? */
113476404edcSAsim Jamshed 		if (!cur_stream->buffer_mgmt ||
113576404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
113676404edcSAsim Jamshed 			TCP_SEQ_GEQ(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt)
113776404edcSAsim Jamshed #else
113876404edcSAsim Jamshed 			pctx->p.seq + pctx->p.payloadlen == cur_stream->rcv_nxt
113976404edcSAsim Jamshed #endif
114076404edcSAsim Jamshed 		   ) {
114176404edcSAsim Jamshed 			cur_stream->rcv_nxt++;
114276404edcSAsim Jamshed 
114376404edcSAsim Jamshed 			if (cur_stream->state == TCP_ST_FIN_WAIT_1) {
114476404edcSAsim Jamshed 				cur_stream->state = TCP_ST_CLOSING;
114576404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
114676404edcSAsim Jamshed 				TRACE_STATE("Stream %d: TCP_ST_CLOSING\n", cur_stream->id);
114776404edcSAsim Jamshed 
114876404edcSAsim Jamshed 			} else if (cur_stream->state == TCP_ST_FIN_WAIT_2) {
114976404edcSAsim Jamshed 				cur_stream->state = TCP_ST_TIME_WAIT;
115076404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
115176404edcSAsim Jamshed 				TRACE_STATE("Stream %d: TCP_ST_TIME_WAIT\n", cur_stream->id);
115276404edcSAsim Jamshed 				AddtoTimewaitList(mtcp, cur_stream, pctx->p.cur_ts);
115376404edcSAsim Jamshed 			}
115476404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_CONTROL;
115576404edcSAsim Jamshed 		} else {
115676404edcSAsim Jamshed 			RAISE_DEBUG_EVENT(mtcp, cur_stream,
115776404edcSAsim Jamshed 					"Expected %u, but received %u (= %u + %u)\n",
115876404edcSAsim Jamshed 					cur_stream->rcv_nxt, pctx->p.seq + pctx->p.payloadlen,
115976404edcSAsim Jamshed 					pctx->p.seq, pctx->p.payloadlen);
116076404edcSAsim Jamshed 
116176404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
116276404edcSAsim Jamshed 			return;
116376404edcSAsim Jamshed 		}
116476404edcSAsim Jamshed 	}
116576404edcSAsim Jamshed }
116676404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
116776404edcSAsim Jamshed static inline void
116876404edcSAsim Jamshed Handle_TCP_ST_FIN_WAIT_2 (mtcp_manager_t mtcp, tcp_stream* cur_stream,
116976404edcSAsim Jamshed 		struct pkt_ctx *pctx)
117076404edcSAsim Jamshed {
117176404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
117276404edcSAsim Jamshed 
117376404edcSAsim Jamshed 	if (tcph->ack) {
117476404edcSAsim Jamshed 		if (cur_stream->sndvar->sndbuf) {
117576404edcSAsim Jamshed 			ProcessACK(mtcp, cur_stream, pctx);
117676404edcSAsim Jamshed 		}
117776404edcSAsim Jamshed 	} else {
117876404edcSAsim Jamshed 		TRACE_DBG("Stream %d: does not contain an ack!\n",
117976404edcSAsim Jamshed 				cur_stream->id);
118076404edcSAsim Jamshed 		return;
118176404edcSAsim Jamshed 	}
118276404edcSAsim Jamshed 
118376404edcSAsim Jamshed 	if (pctx->p.payloadlen > 0) {
118476404edcSAsim Jamshed 		if (ProcessTCPPayload(mtcp, cur_stream, pctx)) {
118576404edcSAsim Jamshed 			/* if return is TRUE, send ACK */
118676404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_AGG;
118776404edcSAsim Jamshed 		} else {
118876404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_ACK_NOW;
118976404edcSAsim Jamshed 		}
119076404edcSAsim Jamshed 	}
119176404edcSAsim Jamshed 
119276404edcSAsim Jamshed 	if (tcph->fin) {
119376404edcSAsim Jamshed 		/* process the FIN only if the sequence is valid */
119476404edcSAsim Jamshed 		/* FIN packet is allowed to push payload (should we check for PSH flag)? */
119576404edcSAsim Jamshed 		if (!cur_stream->buffer_mgmt ||
119676404edcSAsim Jamshed #ifdef BE_RESILIENT_TO_PACKET_DROP
119776404edcSAsim Jamshed 			TCP_SEQ_GEQ(pctx->p.seq + pctx->p.payloadlen, cur_stream->rcv_nxt)
119876404edcSAsim Jamshed #else
119976404edcSAsim Jamshed 			pctx->p.seq + pctx->p.payloadlen == cur_stream->rcv_nxt
120076404edcSAsim Jamshed #endif
120176404edcSAsim Jamshed 			) {
120276404edcSAsim Jamshed 			cur_stream->state = TCP_ST_TIME_WAIT;
120376404edcSAsim Jamshed 			cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
120476404edcSAsim Jamshed 			cur_stream->rcv_nxt++;
120576404edcSAsim Jamshed 			TRACE_STATE("Stream %d: TCP_ST_TIME_WAIT\n", cur_stream->id);
120676404edcSAsim Jamshed 
120776404edcSAsim Jamshed 			AddtoTimewaitList(mtcp, cur_stream, pctx->p.cur_ts);
120876404edcSAsim Jamshed 			cur_stream->actions |= MOS_ACT_SEND_CONTROL;
120976404edcSAsim Jamshed 		}
121076404edcSAsim Jamshed 	} else {
121176404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_FIN_WAIT_2): No FIN. "
121276404edcSAsim Jamshed 				"seq: %u, ack_seq: %u, snd_nxt: %u, snd_una: %u\n",
1213cafe7743SAsim Jamshed 				cur_stream->id, pctx->p.seq, pctx->p.ack_seq,
121476404edcSAsim Jamshed 				cur_stream->snd_nxt, cur_stream->sndvar->snd_una);
121576404edcSAsim Jamshed #if DBGMSG
1216cafe7743SAsim Jamshed 		DumpIPPacket(mtcp, pctx->p.iph, pctx->p.ip_len);
121776404edcSAsim Jamshed #endif
121876404edcSAsim Jamshed 	}
121976404edcSAsim Jamshed 
122076404edcSAsim Jamshed }
122176404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
122276404edcSAsim Jamshed static inline void
122376404edcSAsim Jamshed Handle_TCP_ST_CLOSING (mtcp_manager_t mtcp, tcp_stream* cur_stream,
122476404edcSAsim Jamshed 		struct pkt_ctx *pctx)
122576404edcSAsim Jamshed {
122676404edcSAsim Jamshed 	const struct tcphdr* tcph = pctx->p.tcph;
122776404edcSAsim Jamshed 
122876404edcSAsim Jamshed 	if (tcph->ack) {
122976404edcSAsim Jamshed 		if (cur_stream->sndvar->sndbuf) {
123076404edcSAsim Jamshed 			ProcessACK(mtcp, cur_stream, pctx);
123176404edcSAsim Jamshed 		}
123276404edcSAsim Jamshed 
123376404edcSAsim Jamshed 		if (!cur_stream->sndvar->is_fin_sent) {
123476404edcSAsim Jamshed 			TRACE_DBG("Stream %d (TCP_ST_CLOSING): "
123576404edcSAsim Jamshed 					"No FIN sent yet.\n", cur_stream->id);
123676404edcSAsim Jamshed 			return;
123776404edcSAsim Jamshed 		}
123876404edcSAsim Jamshed 
123976404edcSAsim Jamshed 		// check if ACK of FIN
124076404edcSAsim Jamshed 		if (pctx->p.ack_seq != cur_stream->sndvar->fss + 1) {
124176404edcSAsim Jamshed #if DBGMSG
124276404edcSAsim Jamshed 			TRACE_DBG("Stream %d (TCP_ST_CLOSING): Not ACK of FIN. "
124376404edcSAsim Jamshed 				  "ack_seq: %u, snd_nxt: %u, snd_una: %u, fss: %u\n",
1244cafe7743SAsim Jamshed 				  cur_stream->id, pctx->p.ack_seq, cur_stream->snd_nxt,
124576404edcSAsim Jamshed 				  cur_stream->sndvar->snd_una, cur_stream->sndvar->fss);
1246cafe7743SAsim Jamshed 			DumpIPPacketToFile(stderr, pctx->p.iph, pctx->p.ip_len);
124776404edcSAsim Jamshed 			DumpStream(mtcp, cur_stream);
124876404edcSAsim Jamshed #endif
124976404edcSAsim Jamshed 			//assert(0);
125076404edcSAsim Jamshed 			/* if the packet is not the ACK of FIN, ignore */
125176404edcSAsim Jamshed 			return;
125276404edcSAsim Jamshed 		}
125376404edcSAsim Jamshed 
125476404edcSAsim Jamshed 		cur_stream->sndvar->snd_una = pctx->p.ack_seq;
125576404edcSAsim Jamshed 		if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
125676404edcSAsim Jamshed 			UpdateRetransmissionTimer(mtcp, cur_stream, pctx->p.cur_ts);
125776404edcSAsim Jamshed 		cur_stream->state = TCP_ST_TIME_WAIT;
125876404edcSAsim Jamshed 		cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
125976404edcSAsim Jamshed 		TRACE_STATE("Stream %d: TCP_ST_TIME_WAIT\n", cur_stream->id);
126076404edcSAsim Jamshed 
126176404edcSAsim Jamshed 		AddtoTimewaitList(mtcp, cur_stream, pctx->p.cur_ts);
126276404edcSAsim Jamshed 
126376404edcSAsim Jamshed 	} else {
126476404edcSAsim Jamshed 		TRACE_DBG("Stream %d (TCP_ST_CLOSING): Not ACK\n",
126576404edcSAsim Jamshed 			  cur_stream->id);
126676404edcSAsim Jamshed 		return;
126776404edcSAsim Jamshed 	}
126876404edcSAsim Jamshed }
126976404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
127076404edcSAsim Jamshed void
127176404edcSAsim Jamshed UpdateRecvTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
127276404edcSAsim Jamshed 		struct pkt_ctx *pctx)
127376404edcSAsim Jamshed {
127476404edcSAsim Jamshed 	struct tcphdr* tcph = pctx->p.tcph;
127576404edcSAsim Jamshed 	int ret;
127676404edcSAsim Jamshed 
127776404edcSAsim Jamshed 	assert(cur_stream);
127876404edcSAsim Jamshed 
127976404edcSAsim Jamshed 	/* Validate sequence. if not valid, ignore the packet */
128003d1e42cSAsim Jamshed 	if (cur_stream->state > TCP_ST_SYN_RCVD) {
128176404edcSAsim Jamshed 
128276404edcSAsim Jamshed 		ret = ValidateSequence(mtcp, cur_stream, pctx);
128376404edcSAsim Jamshed 		if (!ret) {
128476404edcSAsim Jamshed 			TRACE_DBG("Stream %d: Unexpected sequence: %u, expected: %u\n",
128576404edcSAsim Jamshed 					cur_stream->id, pctx->p.seq, cur_stream->rcv_nxt);
128676404edcSAsim Jamshed #ifdef DBGMSG
128776404edcSAsim Jamshed 			DumpIPPacket(mtcp, pctx->p.iph, pctx->p.ip_len);
128876404edcSAsim Jamshed #endif
128976404edcSAsim Jamshed #if DUMP_STREAM
129076404edcSAsim Jamshed 			DumpStream(mtcp, cur_stream);
129176404edcSAsim Jamshed #endif
129276404edcSAsim Jamshed 			/* cur_stream->cb_events |= MOS_ON_ERROR; */
129376404edcSAsim Jamshed 		}
129476404edcSAsim Jamshed 	}
129576404edcSAsim Jamshed 	/* Update receive window size */
129676404edcSAsim Jamshed 	if (tcph->syn) {
129776404edcSAsim Jamshed 		cur_stream->sndvar->peer_wnd = pctx->p.window;
129876404edcSAsim Jamshed 	} else {
129976404edcSAsim Jamshed 		cur_stream->sndvar->peer_wnd =
130076404edcSAsim Jamshed 				(uint32_t)pctx->p.window << cur_stream->sndvar->wscale_peer;
130176404edcSAsim Jamshed 	}
130276404edcSAsim Jamshed 
130376404edcSAsim Jamshed 	cur_stream->last_active_ts = pctx->p.cur_ts;
130476404edcSAsim Jamshed 	if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM))
130576404edcSAsim Jamshed 		UpdateTimeoutList(mtcp, cur_stream);
130676404edcSAsim Jamshed 
130776404edcSAsim Jamshed 	/* Process RST: process here only if state > TCP_ST_SYN_SENT */
130876404edcSAsim Jamshed 	if (tcph->rst) {
130976404edcSAsim Jamshed 		cur_stream->have_reset = TRUE;
131076404edcSAsim Jamshed 		if (cur_stream->state > TCP_ST_SYN_SENT) {
131176404edcSAsim Jamshed 			if (ProcessRST(mtcp, cur_stream, pctx)) {
131276404edcSAsim Jamshed 				return;
131376404edcSAsim Jamshed 			}
131476404edcSAsim Jamshed 		}
131576404edcSAsim Jamshed 	}
131676404edcSAsim Jamshed 
131776404edcSAsim Jamshed 	if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_MONITOR_STREAM_ACTIVE) &&
131876404edcSAsim Jamshed 		pctx->p.tcph->fin) {
131976404edcSAsim Jamshed 		if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
132076404edcSAsim Jamshed 			cur_stream->state == TCP_ST_LAST_ACK ||
132176404edcSAsim Jamshed 			cur_stream->state == TCP_ST_CLOSING ||
132276404edcSAsim Jamshed 			cur_stream->state == TCP_ST_TIME_WAIT) {
132376404edcSAsim Jamshed 			/* Handle retransmitted FIN packet */
132476404edcSAsim Jamshed 			if (pctx->p.seq == cur_stream->pair_stream->sndvar->fss) {
132576404edcSAsim Jamshed 				TRACE_DBG("FIN retransmit! (seq = %u / fss = %u)\n",
13264cb4e140SAsim Jamshed 						pctx->p.seq, cur_stream->pair_stream->sndvar->fss);
132776404edcSAsim Jamshed 				cur_stream->cb_events |= MOS_ON_REXMIT;
132876404edcSAsim Jamshed 			}
132976404edcSAsim Jamshed 		}
133076404edcSAsim Jamshed 	}
133176404edcSAsim Jamshed 
133276404edcSAsim Jamshed 	switch (cur_stream->state) {
133376404edcSAsim Jamshed 	case TCP_ST_LISTEN:
133476404edcSAsim Jamshed 		Handle_TCP_ST_LISTEN(mtcp, cur_stream, pctx);
133576404edcSAsim Jamshed 		break;
133676404edcSAsim Jamshed 
133776404edcSAsim Jamshed 	case TCP_ST_SYN_SENT:
133876404edcSAsim Jamshed 		Handle_TCP_ST_SYN_SENT(mtcp, cur_stream, pctx);
133976404edcSAsim Jamshed 		break;
134076404edcSAsim Jamshed 
134176404edcSAsim Jamshed 	case TCP_ST_SYN_RCVD:
134203d1e42cSAsim Jamshed 		/* SYN retransmit implies our SYN/ACK was lost. Resend */
134303d1e42cSAsim Jamshed 		if (tcph->syn && pctx->p.seq == cur_stream->rcvvar->irs)
134403d1e42cSAsim Jamshed 			Handle_TCP_ST_LISTEN(mtcp, cur_stream, pctx);
1345d8823779SAsim Jamshed 		else {
134676404edcSAsim Jamshed 			Handle_TCP_ST_SYN_RCVD(mtcp, cur_stream, pctx);
1347d8823779SAsim Jamshed 			if (pctx->p.payloadlen > 0 && cur_stream->state == TCP_ST_ESTABLISHED)
1348d8823779SAsim Jamshed 				Handle_TCP_ST_ESTABLISHED(mtcp, cur_stream, pctx);
1349d8823779SAsim Jamshed 		}
135076404edcSAsim Jamshed 		break;
135176404edcSAsim Jamshed 
135276404edcSAsim Jamshed 	case TCP_ST_ESTABLISHED:
135376404edcSAsim Jamshed 		Handle_TCP_ST_ESTABLISHED(mtcp, cur_stream, pctx);
135476404edcSAsim Jamshed 		break;
135576404edcSAsim Jamshed 
135676404edcSAsim Jamshed 	case TCP_ST_CLOSE_WAIT:
135776404edcSAsim Jamshed 		Handle_TCP_ST_CLOSE_WAIT(mtcp, cur_stream, pctx);
135876404edcSAsim Jamshed 		break;
135976404edcSAsim Jamshed 
136076404edcSAsim Jamshed 	case TCP_ST_LAST_ACK:
136176404edcSAsim Jamshed 		Handle_TCP_ST_LAST_ACK(mtcp, cur_stream, pctx);
136276404edcSAsim Jamshed 		break;
136376404edcSAsim Jamshed 
136476404edcSAsim Jamshed 	case TCP_ST_FIN_WAIT_1:
136576404edcSAsim Jamshed 		Handle_TCP_ST_FIN_WAIT_1(mtcp, cur_stream, pctx);
136676404edcSAsim Jamshed 		break;
136776404edcSAsim Jamshed 
136876404edcSAsim Jamshed 	case TCP_ST_FIN_WAIT_2:
136976404edcSAsim Jamshed 		Handle_TCP_ST_FIN_WAIT_2(mtcp, cur_stream, pctx);
137076404edcSAsim Jamshed 		break;
137176404edcSAsim Jamshed 
137276404edcSAsim Jamshed 	case TCP_ST_CLOSING:
137376404edcSAsim Jamshed 		Handle_TCP_ST_CLOSING(mtcp, cur_stream, pctx);
137476404edcSAsim Jamshed 		break;
137576404edcSAsim Jamshed 
137676404edcSAsim Jamshed 	case TCP_ST_TIME_WAIT:
137776404edcSAsim Jamshed 		/* the only thing that can arrive in this state is a retransmission
137876404edcSAsim Jamshed 		   of the remote FIN. Acknowledge it, and restart the 2 MSL timeout */
137976404edcSAsim Jamshed 		if (cur_stream->on_timewait_list) {
138076404edcSAsim Jamshed 			RemoveFromTimewaitList(mtcp, cur_stream);
138176404edcSAsim Jamshed 			AddtoTimewaitList(mtcp, cur_stream, pctx->p.cur_ts);
138276404edcSAsim Jamshed 		}
138376404edcSAsim Jamshed 		cur_stream->actions |= MOS_ACT_SEND_CONTROL;
138476404edcSAsim Jamshed 		break;
138576404edcSAsim Jamshed 
138676404edcSAsim Jamshed 	case TCP_ST_CLOSED:
138776404edcSAsim Jamshed 	case TCP_ST_CLOSED_RSVD:
138876404edcSAsim Jamshed 		break;
138976404edcSAsim Jamshed 
139076404edcSAsim Jamshed 	default:
139176404edcSAsim Jamshed 		break;
139276404edcSAsim Jamshed 	}
139376404edcSAsim Jamshed 
139476404edcSAsim Jamshed 	TRACE_STATE("Stream %d: Events: %0lx, Action: %0x\n",
139576404edcSAsim Jamshed 			cur_stream->id, cur_stream->cb_events, cur_stream->actions);
139676404edcSAsim Jamshed 	return;
139776404edcSAsim Jamshed }
139876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
139976404edcSAsim Jamshed void
140076404edcSAsim Jamshed DoActionEndTCPPacket(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
140176404edcSAsim Jamshed 		struct pkt_ctx *pctx)
140276404edcSAsim Jamshed {
140376404edcSAsim Jamshed 	int i;
140476404edcSAsim Jamshed 
140576404edcSAsim Jamshed 	for (i = 1; i < MOS_ACT_CNT; i = i << 1) {
140676404edcSAsim Jamshed 
140776404edcSAsim Jamshed 		if (cur_stream->actions & i) {
140876404edcSAsim Jamshed 			switch(i) {
140976404edcSAsim Jamshed 			case MOS_ACT_SEND_DATA:
141076404edcSAsim Jamshed 				AddtoSendList(mtcp, cur_stream);
141176404edcSAsim Jamshed 				break;
141276404edcSAsim Jamshed 			case MOS_ACT_SEND_ACK_NOW:
141376404edcSAsim Jamshed 				EnqueueACK(mtcp, cur_stream, pctx->p.cur_ts, ACK_OPT_NOW);
141476404edcSAsim Jamshed 				break;
141576404edcSAsim Jamshed 			case MOS_ACT_SEND_ACK_AGG:
141676404edcSAsim Jamshed 				EnqueueACK(mtcp, cur_stream, pctx->p.cur_ts, ACK_OPT_AGGREGATE);
141776404edcSAsim Jamshed 				break;
141876404edcSAsim Jamshed 			case MOS_ACT_SEND_CONTROL:
141976404edcSAsim Jamshed 				AddtoControlList(mtcp, cur_stream, pctx->p.cur_ts);
142076404edcSAsim Jamshed 				break;
142176404edcSAsim Jamshed 			case MOS_ACT_SEND_RST:
142276404edcSAsim Jamshed 				if (cur_stream->state <= TCP_ST_SYN_SENT)
142376404edcSAsim Jamshed 					SendTCPPacketStandalone(mtcp,
142476404edcSAsim Jamshed 							pctx->p.iph->daddr, pctx->p.tcph->dest,
142576404edcSAsim Jamshed 							pctx->p.iph->saddr, pctx->p.tcph->source,
142676404edcSAsim Jamshed 							0, pctx->p.seq + 1, 0, TCP_FLAG_RST | TCP_FLAG_ACK,
1427a834ea89SAsim Jamshed 							NULL, 0, pctx->p.cur_ts, 0, 0, -1);
142876404edcSAsim Jamshed 				else
142976404edcSAsim Jamshed 					SendTCPPacketStandalone(mtcp,
143076404edcSAsim Jamshed 							pctx->p.iph->daddr, pctx->p.tcph->dest,
143176404edcSAsim Jamshed 							pctx->p.iph->saddr, pctx->p.tcph->source,
143276404edcSAsim Jamshed 							pctx->p.ack_seq, 0, 0, TCP_FLAG_RST | TCP_FLAG_ACK,
1433a834ea89SAsim Jamshed 							NULL, 0, pctx->p.cur_ts, 0, 0, -1);
143476404edcSAsim Jamshed 				break;
143576404edcSAsim Jamshed 			case MOS_ACT_DESTROY:
143676404edcSAsim Jamshed 				DestroyTCPStream(mtcp, cur_stream);
143776404edcSAsim Jamshed 				break;
143876404edcSAsim Jamshed 			default:
143976404edcSAsim Jamshed 				assert(1);
144076404edcSAsim Jamshed 				break;
144176404edcSAsim Jamshed 			}
144276404edcSAsim Jamshed 		}
144376404edcSAsim Jamshed 	}
144476404edcSAsim Jamshed 
144576404edcSAsim Jamshed 	cur_stream->actions = 0;
144676404edcSAsim Jamshed }
144776404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
144876404edcSAsim Jamshed /**
144976404edcSAsim Jamshed  * Called (when monitoring mode is enabled).. for every outgoing packet to the
145076404edcSAsim Jamshed  * NIC.
145176404edcSAsim Jamshed  */
145276404edcSAsim Jamshed inline void
145376404edcSAsim Jamshed UpdatePassiveRecvTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
145476404edcSAsim Jamshed 			       struct pkt_ctx *pctx)
145576404edcSAsim Jamshed {
145676404edcSAsim Jamshed 	UpdateRecvTCPContext(mtcp, cur_stream, pctx);
145776404edcSAsim Jamshed }
145876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
1459a834ea89SAsim Jamshed /* NOTE TODO: This event prediction is additional overhaed of HK_RCV hook.
146076404edcSAsim Jamshed  * We can transparently optimize this by disabling prediction of events which
146176404edcSAsim Jamshed  * are not monitored by anyone. */
146276404edcSAsim Jamshed inline void
146376404edcSAsim Jamshed PreRecvTCPEventPrediction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
146476404edcSAsim Jamshed 			  struct tcp_stream *recvside_stream)
146576404edcSAsim Jamshed {
1466a834ea89SAsim Jamshed 	tcp_rb_overlapchk(mtcp, pctx, recvside_stream);
146776404edcSAsim Jamshed }
146876404edcSAsim Jamshed /*----------------------------------------------------------------------------*/
1469