1 #include <unistd.h>
2 #include <string.h>
3
4 #include "tcp_out.h"
5 #include "mtcp.h"
6 #include "ip_in.h"
7 #include "ip_out.h"
8 #include "tcp_in.h"
9 #include "tcp.h"
10 #include "tcp_stream.h"
11 #include "eventpoll.h"
12 #include "timer.h"
13 #include "debug.h"
14 #include "config.h"
15
16 #define TCP_CALCULATE_CHECKSUM TRUE
17 #define ACK_PIGGYBACK TRUE
18 /* Enable this for higher concurrency rate experiments */
19 #define TRY_SEND_BEFORE_QUEUE /*FALSE*/ TRUE
20
21 #define TCP_MAX_WINDOW 65535
22
23 #define MAX(a, b) ((a)>(b)?(a):(b))
24 #define MIN(a, b) ((a)<(b)?(a):(b))
25
26 /*----------------------------------------------------------------------------*/
27 static inline uint16_t
CalculateOptionLength(uint8_t flags)28 CalculateOptionLength(uint8_t flags)
29 {
30 uint16_t optlen = 0;
31
32 if (flags & TCP_FLAG_SYN) {
33 optlen += TCP_OPT_MSS_LEN;
34 #if TCP_OPT_SACK_ENABLED
35 optlen += TCP_OPT_SACK_PERMIT_LEN;
36 #if !TCP_OPT_TIMESTAMP_ENABLED
37 optlen += 2; // insert NOP padding
38 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
39 #endif /* TCP_OPT_SACK_ENABLED */
40
41 #if TCP_OPT_TIMESTAMP_ENABLED
42 optlen += TCP_OPT_TIMESTAMP_LEN;
43 #if !TCP_OPT_SACK_ENABLED
44 optlen += 2; // insert NOP padding
45 #endif /* TCP_OPT_SACK_ENABLED */
46 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
47
48 optlen += TCP_OPT_WSCALE_LEN + 1;
49
50 } else {
51
52 #if TCP_OPT_TIMESTAMP_ENABLED
53 optlen += TCP_OPT_TIMESTAMP_LEN + 2;
54 #endif
55
56 #if TCP_OPT_SACK_ENABLED
57 if (flags & TCP_FLAG_SACK) {
58 optlen += TCP_OPT_SACK_LEN + 2;
59 }
60 #endif
61 }
62
63 assert(optlen % 4 == 0);
64
65 return optlen;
66 }
67 /*----------------------------------------------------------------------------*/
68 static inline void
GenerateTCPTimestamp(tcp_stream * cur_stream,uint8_t * tcpopt,uint32_t cur_ts)69 GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
70 {
71 uint32_t *ts = (uint32_t *)(tcpopt + 2);
72
73 tcpopt[0] = TCP_OPT_TIMESTAMP;
74 tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
75 ts[0] = htonl(cur_ts);
76 ts[1] = htonl(cur_stream->rcvvar->ts_recent);
77 }
78 /*----------------------------------------------------------------------------*/
79 static inline void
GenerateTCPOptions(tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * tcpopt,uint16_t optlen)80 GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
81 uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
82 {
83 int i = 0;
84
85 if (flags & TCP_FLAG_SYN) {
86 uint16_t mss;
87
88 /* MSS option */
89 mss = cur_stream->sndvar->mss;
90 tcpopt[i++] = TCP_OPT_MSS;
91 tcpopt[i++] = TCP_OPT_MSS_LEN;
92 tcpopt[i++] = mss >> 8;
93 tcpopt[i++] = mss % 256;
94
95 /* SACK permit */
96 #if TCP_OPT_SACK_ENABLED
97 #if !TCP_OPT_TIMESTAMP_ENABLED
98 tcpopt[i++] = TCP_OPT_NOP;
99 tcpopt[i++] = TCP_OPT_NOP;
100 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
101 tcpopt[i++] = TCP_OPT_SACK_PERMIT;
102 tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
103 TRACE_SACK("Local SACK permited.\n");
104 #endif /* TCP_OPT_SACK_ENABLED */
105
106 /* Timestamp */
107 #if TCP_OPT_TIMESTAMP_ENABLED
108 #if !TCP_OPT_SACK_ENABLED
109 tcpopt[i++] = TCP_OPT_NOP;
110 tcpopt[i++] = TCP_OPT_NOP;
111 #endif /* TCP_OPT_SACK_ENABLED */
112 GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
113 i += TCP_OPT_TIMESTAMP_LEN;
114 #endif /* TCP_OPT_TIMESTAMP_ENABLED */
115
116 /* Window scale */
117 tcpopt[i++] = TCP_OPT_NOP;
118 tcpopt[i++] = TCP_OPT_WSCALE;
119 tcpopt[i++] = TCP_OPT_WSCALE_LEN;
120 tcpopt[i++] = cur_stream->sndvar->wscale_mine;
121
122 } else {
123
124 #if TCP_OPT_TIMESTAMP_ENABLED
125 tcpopt[i++] = TCP_OPT_NOP;
126 tcpopt[i++] = TCP_OPT_NOP;
127 GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
128 i += TCP_OPT_TIMESTAMP_LEN;
129 #endif
130
131 #if TCP_OPT_SACK_ENABLED
132 if (flags & TCP_OPT_SACK) {
133 // TODO: implement SACK support
134 }
135 #endif
136 }
137
138 assert (i == optlen);
139 }
140 /*----------------------------------------------------------------------------*/
141 int
SendTCPPacketStandalone(struct mtcp_manager * mtcp,uint32_t saddr,uint16_t sport,uint32_t daddr,uint16_t dport,uint32_t seq,uint32_t ack_seq,uint16_t window,uint8_t flags,uint8_t * payload,uint16_t payloadlen,uint32_t cur_ts,uint32_t echo_ts,uint16_t ip_id,int8_t in_ifidx)142 SendTCPPacketStandalone(struct mtcp_manager *mtcp,
143 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
144 uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
145 uint8_t *payload, uint16_t payloadlen,
146 uint32_t cur_ts, uint32_t echo_ts, uint16_t ip_id, int8_t in_ifidx)
147 {
148 struct tcphdr *tcph;
149 uint8_t *tcpopt;
150 uint32_t *ts;
151 uint16_t optlen;
152 struct pkt_ctx pctx;
153 int rc = -1;
154
155 memset(&pctx, 0, sizeof(pctx));
156 pctx.p.in_ifidx = in_ifidx;
157 optlen = CalculateOptionLength(flags);
158 if (payloadlen > TCP_DEFAULT_MSS + optlen) {
159 TRACE_ERROR("Payload size exceeds MSS.\n");
160 assert(0);
161 return ERROR;
162 }
163
164 tcph = (struct tcphdr *)IPOutputStandalone(mtcp, htons(ip_id),
165 saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
166 if (tcph == NULL) {
167 return ERROR;
168 }
169 memset(tcph, 0, TCP_HEADER_LEN + optlen);
170
171 tcph->source = sport;
172 tcph->dest = dport;
173
174 if (flags & TCP_FLAG_SYN)
175 tcph->syn = TRUE;
176 if (flags & TCP_FLAG_FIN)
177 tcph->fin = TRUE;
178 if (flags & TCP_FLAG_RST)
179 tcph->rst = TRUE;
180 if (flags & TCP_FLAG_PSH)
181 tcph->psh = TRUE;
182
183 tcph->seq = htonl(seq);
184 if (flags & TCP_FLAG_ACK) {
185 tcph->ack = TRUE;
186 tcph->ack_seq = htonl(ack_seq);
187 }
188
189 tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
190
191 tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
192 ts = (uint32_t *)(tcpopt + 4);
193
194 tcpopt[0] = TCP_OPT_NOP;
195 tcpopt[1] = TCP_OPT_NOP;
196 tcpopt[2] = TCP_OPT_TIMESTAMP;
197 tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
198 ts[0] = htonl(cur_ts);
199 ts[1] = htonl(echo_ts);
200
201 tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
202 // copy payload if exist
203 if (payloadlen > 0) {
204 memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
205 }
206
207 #if TCP_CALCULATE_CHECKSUM
208 /* offload TCP checkum if possible */
209 if (likely(mtcp->iom->dev_ioctl != NULL))
210 rc = mtcp->iom->dev_ioctl(mtcp->ctx,
211 pctx.out_ifidx,
212 PKT_TX_TCP_CSUM,
213 pctx.p.iph);
214 /* otherwise calculate TCP checksum in S/W */
215 if (rc == -1)
216 tcph->check = TCPCalcChecksum((uint16_t *)tcph,
217 TCP_HEADER_LEN +
218 optlen + payloadlen,
219 saddr, daddr);
220 #endif
221
222 if (tcph->syn || tcph->fin) {
223 payloadlen++;
224 }
225
226 #ifdef PKTDUMP
227 DumpPacket(mtcp,
228 (char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
229 payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
230 "OUT", -1);
231 #endif
232
233 struct mon_listener *walk;
234 /* callback for monitor raw socket */
235 TAILQ_FOREACH(walk, &mtcp->monitors, link)
236 if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
237 if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
238 EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
239 pctx.p.eth_len))
240 HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
241 &pctx, MOS_ON_PKT_IN);
242
243 return payloadlen;
244 }
245 /*----------------------------------------------------------------------------*/
246 int
SendTCPPacket(struct mtcp_manager * mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t flags,uint8_t * payload,uint16_t payloadlen)247 SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
248 uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
249 {
250 struct tcphdr *tcph;
251 uint16_t optlen;
252 uint8_t wscale = 0;
253 uint32_t window32 = 0;
254 struct pkt_ctx pctx;
255 int rc = -1;
256
257 memset(&pctx, 0, sizeof(pctx));
258 optlen = CalculateOptionLength(flags);
259 if (payloadlen > cur_stream->sndvar->mss + optlen) {
260 TRACE_ERROR("Payload size exceeds MSS\n");
261 return ERROR;
262 }
263
264 tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
265 TCP_HEADER_LEN + optlen + payloadlen, &pctx, cur_ts);
266 if (tcph == NULL) {
267 return -2;
268 }
269 memset(tcph, 0, TCP_HEADER_LEN + optlen);
270
271 tcph->source = cur_stream->sport;
272 tcph->dest = cur_stream->dport;
273
274 if (flags & TCP_FLAG_SYN) {
275 tcph->syn = TRUE;
276 if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
277 TRACE_DBG("Stream %d: weird SYN sequence. "
278 "snd_nxt: %u, iss: %u\n", cur_stream->id,
279 cur_stream->snd_nxt, cur_stream->sndvar->iss);
280 }
281 TRACE_DBG("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
282 cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
283 }
284 if (flags & TCP_FLAG_RST) {
285 TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
286 tcph->rst = TRUE;
287 }
288 if (flags & TCP_FLAG_PSH)
289 tcph->psh = TRUE;
290
291 if (flags & TCP_FLAG_WACK) {
292 tcph->seq = htonl(cur_stream->snd_nxt - 1);
293 TRACE_CLWND("%u Sending ACK to get new window advertisement. "
294 "seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
295 cur_stream->id,
296 cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
297 cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
298 } else if (flags & TCP_FLAG_FIN) {
299 tcph->fin = TRUE;
300
301 if (cur_stream->sndvar->fss == 0) {
302 TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
303 cur_stream->id, cur_stream->closed);
304 }
305 tcph->seq = htonl(cur_stream->sndvar->fss);
306 cur_stream->sndvar->is_fin_sent = TRUE;
307 TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
308 cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
309 } else {
310 tcph->seq = htonl(cur_stream->snd_nxt);
311 }
312
313 if (flags & TCP_FLAG_ACK) {
314 tcph->ack = TRUE;
315 tcph->ack_seq = htonl(cur_stream->rcv_nxt);
316 cur_stream->sndvar->ts_lastack_sent = cur_ts;
317 cur_stream->last_active_ts = cur_ts;
318 UpdateTimeoutList(mtcp, cur_stream);
319 }
320
321 if (flags & TCP_FLAG_SYN) {
322 wscale = 0;
323 } else {
324 wscale = cur_stream->sndvar->wscale_mine;
325 }
326
327 window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
328 tcph->window = htons((uint16_t)MIN(window32, TCP_MAX_WINDOW));
329 /* if the advertised window is 0, we need to advertise again later */
330 if (window32 == 0) {
331 cur_stream->need_wnd_adv = TRUE;
332 }
333
334 GenerateTCPOptions(cur_stream, cur_ts, flags,
335 (uint8_t *)tcph + TCP_HEADER_LEN, optlen);
336
337 tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
338 // copy payload if exist
339 if (payloadlen > 0) {
340 memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
341 }
342
343 #if TCP_CALCULATE_CHECKSUM
344 if (likely(mtcp->iom->dev_ioctl != NULL))
345 rc = mtcp->iom->dev_ioctl(mtcp->ctx,
346 pctx.out_ifidx,
347 PKT_TX_TCP_CSUM,
348 pctx.p.iph);
349 if (rc == -1)
350 tcph->check = TCPCalcChecksum((uint16_t *)tcph,
351 TCP_HEADER_LEN +
352 optlen + payloadlen,
353 cur_stream->saddr,
354 cur_stream->daddr);
355 #endif
356 cur_stream->snd_nxt += payloadlen;
357
358 if (tcph->syn || tcph->fin) {
359 cur_stream->snd_nxt++;
360 payloadlen++;
361 }
362
363 if (payloadlen > 0) {
364 if (cur_stream->state > TCP_ST_ESTABLISHED) {
365 TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
366 payloadlen, cur_stream->snd_nxt);
367 }
368
369 /* update retransmission timer if have payload */
370 cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
371 TRACE_RTO("Updating retransmission timer. "
372 "cur_ts: %u, rto: %u, ts_rto: %u\n",
373 cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
374 AddtoRTOList(mtcp, cur_stream);
375 }
376
377 struct mon_listener *walk;
378 /* callback for monitor raw socket */
379 TAILQ_FOREACH(walk, &mtcp->monitors, link)
380 if (walk->socket->socktype == MOS_SOCK_MONITOR_RAW)
381 if (ISSET_BPFFILTER(walk->raw_pkt_fcode) &&
382 EVAL_BPFFILTER(walk->raw_pkt_fcode, (uint8_t *)pctx.p.ethh,
383 pctx.p.eth_len))
384 HandleCallback(mtcp, MOS_NULL, walk->socket, MOS_SIDE_BOTH,
385 &pctx, MOS_ON_PKT_IN);
386
387 if (mtcp->num_msp /* this means that stream monitor is on */) {
388 FillPacketContextTCPInfo(&pctx, tcph);
389
390 /* New abstraction for monitor stream */
391 struct tcp_stream *recvside_stream = cur_stream->pair_stream;
392 struct tcp_stream *sendside_stream = cur_stream;
393
394 if (recvside_stream) {
395 if (recvside_stream->rcvvar && recvside_stream->rcvvar->rcvbuf)
396 pctx.p.offset = (uint64_t)seq2loff(recvside_stream->rcvvar->rcvbuf,
397 pctx.p.seq,
398 recvside_stream->rcvvar->irs + 1);
399 UpdateMonitor(mtcp, sendside_stream, recvside_stream, &pctx, false);
400 }
401 }
402
403 #ifdef PKTDUMP
404 DumpPacket(mtcp,
405 (char *)tcph - sizeof(struct iphdr) - sizeof(struct ethhdr),
406 payloadlen + sizeof(struct iphdr) + sizeof(struct ethhdr),
407 "OUT", -1);
408 #endif
409
410
411 return payloadlen;
412 }
413 /*----------------------------------------------------------------------------*/
414 static int
FlushTCPSendingBuffer(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)415 FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
416 {
417 struct tcp_send_vars *sndvar = cur_stream->sndvar;
418 const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
419 uint8_t *data;
420 uint32_t buffered_len;
421 uint32_t seq;
422 uint16_t len;
423 int16_t sndlen;
424 uint32_t window;
425 int packets = 0;
426 uint8_t wack_sent = 0;
427
428 if (!sndvar->sndbuf) {
429 TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
430 assert(0);
431 return 0;
432 }
433
434 SBUF_LOCK(&sndvar->write_lock);
435
436 if (sndvar->sndbuf->len == 0) {
437 packets = 0;
438 goto out;
439 }
440
441 window = MIN(sndvar->cwnd, sndvar->peer_wnd);
442
443 while (1) {
444 seq = cur_stream->snd_nxt;
445
446 if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
447 TRACE_ERROR("Stream %d: Invalid sequence to send. "
448 "state: %s, seq: %u, head_seq: %u.\n",
449 cur_stream->id, TCPStateToString(cur_stream),
450 seq, sndvar->sndbuf->head_seq);
451 assert(0);
452 break;
453 }
454 buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
455 if (cur_stream->state > TCP_ST_ESTABLISHED) {
456 TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
457 "buffered_len: %u\n", sndvar->sndbuf->head_seq,
458 sndvar->sndbuf->len, seq, buffered_len);
459 }
460 if (buffered_len == 0)
461 break;
462
463 data = sndvar->sndbuf->head +
464 (seq - sndvar->sndbuf->head_seq);
465
466 if (buffered_len > maxlen) {
467 len = maxlen;
468 } else {
469 len = buffered_len;
470 }
471
472 if (len > window)
473 len = window;
474
475 if (len <= 0)
476 break;
477
478 if (cur_stream->state > TCP_ST_ESTABLISHED) {
479 TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
480 "buffered_len: %u\n", seq, len, buffered_len);
481 }
482
483 if (seq - sndvar->snd_una + len > window) {
484 /* Ask for new window advertisement to peer */
485 if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
486 TRACE_DBG("Full peer window. "
487 "peer_wnd: %u, (snd_nxt-snd_una): %u\n",
488 sndvar->peer_wnd, seq - sndvar->snd_una);
489 if (!wack_sent && TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
490 EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
491 } else
492 wack_sent = 1;
493 }
494 packets = -3;
495 goto out;
496 }
497
498 sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
499 TCP_FLAG_ACK, data, len);
500 if (sndlen < 0) {
501 packets = sndlen;
502 goto out;
503 }
504 packets++;
505
506 window -= len;
507 }
508
509 out:
510 SBUF_UNLOCK(&sndvar->write_lock);
511 return packets;
512 }
513 /*----------------------------------------------------------------------------*/
514 static inline int
SendControlPacket(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)515 SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
516 {
517 struct tcp_send_vars *sndvar = cur_stream->sndvar;
518 int ret = 0;
519 int flag = 0;
520
521 switch (cur_stream->state) {
522 case TCP_ST_SYN_SENT: /* Send SYN here */
523 flag = TCP_FLAG_SYN;
524 break;
525 case TCP_ST_SYN_RCVD: /* Send SYN/ACK here */
526 cur_stream->snd_nxt = sndvar->iss;
527 flag = TCP_FLAG_SYN | TCP_FLAG_ACK;
528 break;
529 case TCP_ST_ESTABLISHED: /* Send ACK here */
530 case TCP_ST_CLOSE_WAIT: /* Send ACK for the FIN here */
531 case TCP_ST_FIN_WAIT_2: /* Send ACK here */
532 case TCP_ST_TIME_WAIT: /* Send ACK here */
533 flag = TCP_FLAG_ACK;
534 break;
535 case TCP_ST_LAST_ACK:
536 case TCP_ST_FIN_WAIT_1:
537 /* if it is on ack_list, send it after sending ack */
538 if (sndvar->on_send_list || sndvar->on_ack_list)
539 return (-1);
540 flag = TCP_FLAG_FIN | TCP_FLAG_ACK; /* Send FIN/ACK here */
541 break;
542 case TCP_ST_CLOSING:
543 if (sndvar->is_fin_sent) {
544 /* if the sequence is for FIN, send FIN */
545 flag = (cur_stream->snd_nxt == sndvar->fss) ?
546 (TCP_FLAG_FIN | TCP_FLAG_ACK) : TCP_FLAG_ACK;
547 } else {
548 /* if FIN is not sent, send fin with ack */
549 flag = TCP_FLAG_FIN | TCP_FLAG_ACK;
550 }
551 case TCP_ST_CLOSED_RSVD: /* Send RST here */
552 TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED_RSVD)\n",
553 cur_stream->id);
554 /* first flush the data and ack */
555 if (sndvar->on_send_list || sndvar->on_ack_list)
556 return (-1);
557 ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
558 if (ret >= 0)
559 DestroyTCPStream(mtcp, cur_stream);
560 return (ret);
561 default:
562 TRACE_ERROR("Stream %d: shouldn't send a control packet\n",
563 cur_stream->id);
564 assert(0); /* can't reach here! */
565 return (0);
566 }
567
568 return SendTCPPacket(mtcp, cur_stream, cur_ts, flag, NULL, 0);
569 }
570 /*----------------------------------------------------------------------------*/
571 inline int
WriteTCPControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)572 WriteTCPControlList(mtcp_manager_t mtcp,
573 struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
574 {
575 tcp_stream *cur_stream;
576 tcp_stream *next, *last;
577 int cnt = 0;
578 int ret;
579
580 thresh = MIN(thresh, sender->control_list_cnt);
581
582 /* Send TCP control messages */
583 cnt = 0;
584 cur_stream = TAILQ_FIRST(&sender->control_list);
585 last = TAILQ_LAST(&sender->control_list, control_head);
586 while (cur_stream) {
587 if (++cnt > thresh)
588 break;
589
590 TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
591 cnt, cur_stream->id);
592 next = TAILQ_NEXT(cur_stream, sndvar->control_link);
593
594 TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
595 sender->control_list_cnt--;
596
597 if (cur_stream->sndvar->on_control_list) {
598 cur_stream->sndvar->on_control_list = FALSE;
599 //TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
600 ret = SendControlPacket(mtcp, cur_stream, cur_ts);
601 if (ret < 0) {
602 TAILQ_INSERT_HEAD(&sender->control_list,
603 cur_stream, sndvar->control_link);
604 cur_stream->sndvar->on_control_list = TRUE;
605 sender->control_list_cnt++;
606 /* since there is no available write buffer, break */
607 break;
608 }
609 } else {
610 TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
611 }
612
613 if (cur_stream == last)
614 break;
615 cur_stream = next;
616 }
617
618 return cnt;
619 }
620 /*----------------------------------------------------------------------------*/
621 inline int
WriteTCPDataList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)622 WriteTCPDataList(mtcp_manager_t mtcp,
623 struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
624 {
625 tcp_stream *cur_stream;
626 tcp_stream *next, *last;
627 int cnt = 0;
628 int ret;
629
630 /* Send data */
631 cnt = 0;
632 cur_stream = TAILQ_FIRST(&sender->send_list);
633 last = TAILQ_LAST(&sender->send_list, send_head);
634 while (cur_stream) {
635 if (++cnt > thresh)
636 break;
637
638 TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
639 cnt, cur_stream->id);
640 next = TAILQ_NEXT(cur_stream, sndvar->send_link);
641
642 TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
643 if (cur_stream->sndvar->on_send_list) {
644 ret = 0;
645
646 /* Send data here */
647 /* Only can send data when ESTABLISHED or CLOSE_WAIT */
648 if (cur_stream->state == TCP_ST_ESTABLISHED) {
649 if (cur_stream->sndvar->on_control_list) {
650 /* delay sending data after until on_control_list becomes off */
651 //TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
652 ret = -1;
653 } else {
654 ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
655 }
656 } else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
657 cur_stream->state == TCP_ST_FIN_WAIT_1 ||
658 cur_stream->state == TCP_ST_LAST_ACK) {
659 ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
660 } else {
661 TRACE_DBG("Stream %d: on_send_list at state %s\n",
662 cur_stream->id, TCPStateToString(cur_stream));
663 #if DUMP_STREAM
664 DumpStream(mtcp, cur_stream);
665 #endif
666 }
667
668 if (ret < 0) {
669 TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
670 /* since there is no available write buffer, break */
671 break;
672
673 } else {
674 cur_stream->sndvar->on_send_list = FALSE;
675 sender->send_list_cnt--;
676 /* the ret value is the number of packets sent. */
677 /* decrease ack_cnt for the piggybacked acks */
678 #if ACK_PIGGYBACK
679 if (cur_stream->sndvar->ack_cnt > 0) {
680 if (cur_stream->sndvar->ack_cnt > ret) {
681 cur_stream->sndvar->ack_cnt -= ret;
682 } else {
683 cur_stream->sndvar->ack_cnt = 0;
684 }
685 }
686 #endif
687 #if 1
688 if (cur_stream->control_list_waiting) {
689 if (!cur_stream->sndvar->on_ack_list) {
690 cur_stream->control_list_waiting = FALSE;
691 AddtoControlList(mtcp, cur_stream, cur_ts);
692 }
693 }
694 #endif
695 }
696 } else {
697 TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
698 #ifdef DUMP_STREAM
699 DumpStream(mtcp, cur_stream);
700 #endif
701 }
702
703 if (cur_stream == last)
704 break;
705 cur_stream = next;
706 }
707
708 return cnt;
709 }
710 /*----------------------------------------------------------------------------*/
711 inline int
WriteTCPACKList(mtcp_manager_t mtcp,struct mtcp_sender * sender,uint32_t cur_ts,int thresh)712 WriteTCPACKList(mtcp_manager_t mtcp,
713 struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
714 {
715 tcp_stream *cur_stream;
716 tcp_stream *next, *last;
717 int to_ack;
718 int cnt = 0;
719 int ret;
720
721 /* Send aggregated acks */
722 cnt = 0;
723 cur_stream = TAILQ_FIRST(&sender->ack_list);
724 last = TAILQ_LAST(&sender->ack_list, ack_head);
725 while (cur_stream) {
726 if (++cnt > thresh)
727 break;
728
729 TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
730 next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
731
732 if (cur_stream->sndvar->on_ack_list) {
733 /* this list is only to ack the data packets */
734 /* if the ack is not data ack, then it will not process here */
735 to_ack = FALSE;
736 if (cur_stream->state == TCP_ST_ESTABLISHED ||
737 cur_stream->state == TCP_ST_CLOSE_WAIT ||
738 cur_stream->state == TCP_ST_FIN_WAIT_1 ||
739 cur_stream->state == TCP_ST_FIN_WAIT_2 ||
740 cur_stream->state == TCP_ST_TIME_WAIT) {
741 /* TIMEWAIT is possible since the ack is queued
742 at FIN_WAIT_2 */
743 tcprb_t *rb;
744 if ((rb = cur_stream->rcvvar->rcvbuf) &&
745 TCP_SEQ_LEQ(cur_stream->rcv_nxt,
746 (cur_stream->rcvvar->irs + 1) + rb->pile
747 + tcprb_cflen(rb))) {
748 to_ack = TRUE;
749 }
750 } else {
751 TRACE_DBG("Stream %u (%s): "
752 "Try sending ack at not proper state. "
753 "seq: %u, ack_seq: %u, on_control_list: %u\n",
754 cur_stream->id, TCPStateToString(cur_stream),
755 cur_stream->snd_nxt, cur_stream->rcv_nxt,
756 cur_stream->sndvar->on_control_list);
757 #ifdef DUMP_STREAM
758 DumpStream(mtcp, cur_stream);
759 #endif
760 }
761
762 if (to_ack) {
763 /* send the queued ack packets */
764 while (cur_stream->sndvar->ack_cnt > 0) {
765 ret = SendTCPPacket(mtcp, cur_stream,
766 cur_ts, TCP_FLAG_ACK, NULL, 0);
767 if (ret < 0) {
768 /* since there is no available write buffer, break */
769 break;
770 }
771 cur_stream->sndvar->ack_cnt--;
772 }
773
774 /* if is_wack is set, send packet to get window advertisement */
775 if (cur_stream->sndvar->is_wack) {
776 cur_stream->sndvar->is_wack = FALSE;
777 ret = SendTCPPacket(mtcp, cur_stream,
778 cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
779 if (ret < 0) {
780 /* since there is no available write buffer, break */
781 cur_stream->sndvar->is_wack = TRUE;
782 }
783 }
784
785 if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
786 cur_stream->sndvar->on_ack_list = FALSE;
787 TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
788 sender->ack_list_cnt--;
789 }
790 } else {
791 cur_stream->sndvar->on_ack_list = FALSE;
792 cur_stream->sndvar->ack_cnt = 0;
793 cur_stream->sndvar->is_wack = 0;
794 TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
795 sender->ack_list_cnt--;
796 }
797
798 if (cur_stream->control_list_waiting) {
799 if (!cur_stream->sndvar->on_send_list) {
800 cur_stream->control_list_waiting = FALSE;
801 AddtoControlList(mtcp, cur_stream, cur_ts);
802 }
803 }
804 } else {
805 TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
806 TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
807 sender->ack_list_cnt--;
808 #ifdef DUMP_STREAM
809 thread_printf(mtcp, mtcp->log_fp,
810 "Stream %u: not on ack list.\n", cur_stream->id);
811 DumpStream(mtcp, cur_stream);
812 #endif
813 }
814
815 if (cur_stream == last)
816 break;
817 cur_stream = next;
818 }
819
820 return cnt;
821 }
822 /*----------------------------------------------------------------------------*/
823 inline struct mtcp_sender *
GetSender(mtcp_manager_t mtcp,tcp_stream * cur_stream)824 GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
825 {
826 if (cur_stream->sndvar->nif_out < 0) {
827 return mtcp->g_sender;
828
829 } else if (cur_stream->sndvar->nif_out >= g_config.mos->netdev_table->num) {
830 TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
831 return NULL;
832
833 } else {
834 return mtcp->n_sender[cur_stream->sndvar->nif_out];
835 }
836 }
837 /*----------------------------------------------------------------------------*/
838 inline void
AddtoControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts)839 AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
840 {
841 #if TRY_SEND_BEFORE_QUEUE
842 int ret;
843 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
844 assert(sender != NULL);
845
846 ret = SendControlPacket(mtcp, cur_stream, cur_ts);
847 if (ret < 0) {
848 #endif
849 if (!cur_stream->sndvar->on_control_list) {
850 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
851 assert(sender != NULL);
852
853 cur_stream->sndvar->on_control_list = TRUE;
854 TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
855 sender->control_list_cnt++;
856 //TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
857 // cur_stream->id, sender->control_list_cnt);
858 }
859 #if TRY_SEND_BEFORE_QUEUE
860 } else {
861 if (cur_stream->sndvar->on_control_list) {
862 cur_stream->sndvar->on_control_list = FALSE;
863 TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
864 sender->control_list_cnt--;
865 }
866 }
867 #endif
868 }
869 /*----------------------------------------------------------------------------*/
870 inline void
AddtoSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)871 AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
872 {
873 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
874 assert(sender != NULL);
875
876 if(!cur_stream->sndvar->sndbuf) {
877 TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
878 mtcp->ctx->cpu,
879 cur_stream->id);
880 assert(0);
881 return;
882 }
883
884 if (!cur_stream->sndvar->on_send_list) {
885 cur_stream->sndvar->on_send_list = TRUE;
886 TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
887 sender->send_list_cnt++;
888 }
889 }
890 /*----------------------------------------------------------------------------*/
891 inline void
AddtoACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)892 AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
893 {
894 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
895 assert(sender != NULL);
896
897 if (!cur_stream->sndvar->on_ack_list) {
898 cur_stream->sndvar->on_ack_list = TRUE;
899 TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
900 sender->ack_list_cnt++;
901 }
902 }
903 /*----------------------------------------------------------------------------*/
904 inline void
RemoveFromControlList(mtcp_manager_t mtcp,tcp_stream * cur_stream)905 RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
906 {
907 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
908 assert(sender != NULL);
909
910 if (cur_stream->sndvar->on_control_list) {
911 cur_stream->sndvar->on_control_list = FALSE;
912 TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
913 sender->control_list_cnt--;
914 //TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
915 // cur_stream->id, sender->control_list_cnt);
916 }
917 }
918 /*----------------------------------------------------------------------------*/
919 inline void
RemoveFromSendList(mtcp_manager_t mtcp,tcp_stream * cur_stream)920 RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
921 {
922 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
923 assert(sender != NULL);
924
925 if (cur_stream->sndvar->on_send_list) {
926 cur_stream->sndvar->on_send_list = FALSE;
927 TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
928 sender->send_list_cnt--;
929 }
930 }
931 /*----------------------------------------------------------------------------*/
932 inline void
RemoveFromACKList(mtcp_manager_t mtcp,tcp_stream * cur_stream)933 RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
934 {
935 struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
936 assert(sender != NULL);
937
938 if (cur_stream->sndvar->on_ack_list) {
939 cur_stream->sndvar->on_ack_list = FALSE;
940 TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
941 sender->ack_list_cnt--;
942 }
943 }
944 /*----------------------------------------------------------------------------*/
945 inline void
EnqueueACK(mtcp_manager_t mtcp,tcp_stream * cur_stream,uint32_t cur_ts,uint8_t opt)946 EnqueueACK(mtcp_manager_t mtcp,
947 tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
948 {
949 if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
950 cur_stream->state == TCP_ST_CLOSE_WAIT ||
951 cur_stream->state == TCP_ST_FIN_WAIT_1 ||
952 cur_stream->state == TCP_ST_FIN_WAIT_2)) {
953 TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
954 cur_stream->id, TCPStateToString(cur_stream));
955 }
956
957 if (opt == ACK_OPT_NOW) {
958 if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
959 cur_stream->sndvar->ack_cnt++;
960 }
961 } else if (opt == ACK_OPT_AGGREGATE) {
962 if (cur_stream->sndvar->ack_cnt == 0) {
963 cur_stream->sndvar->ack_cnt = 1;
964 }
965 } else if (opt == ACK_OPT_WACK) {
966 cur_stream->sndvar->is_wack = TRUE;
967 }
968 AddtoACKList(mtcp, cur_stream);
969 }
970 /*----------------------------------------------------------------------------*/
971 inline void
DumpControlList(mtcp_manager_t mtcp,struct mtcp_sender * sender)972 DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
973 {
974 tcp_stream *stream;
975
976 TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
977 TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
978 TRACE_DBG("Stream id: %u in control list\n", stream->id);
979 }
980 }
981 /*----------------------------------------------------------------------------*/
982 static inline void
UpdatePassiveSendTCPContext_SynSent(struct tcp_stream * cur_stream,struct pkt_ctx * pctx)983 UpdatePassiveSendTCPContext_SynSent(struct tcp_stream *cur_stream,
984 struct pkt_ctx *pctx)
985 {
986 assert(cur_stream);
987 assert(pctx);
988
989 /* add event */
990 if (cur_stream->state < TCP_ST_SYN_SENT) {
991 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
992 cur_stream->cb_events |= MOS_ON_CONN_START;
993 }
994 /* initialize TCP send variables of send-side stream */
995 cur_stream->sndvar->cwnd = 1;
996 cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10;
997 cur_stream->sndvar->ip_id = htons(pctx->p.iph->id);
998 cur_stream->sndvar->iss = pctx->p.seq;
999 cur_stream->snd_nxt = pctx->p.seq + 1;
1000 cur_stream->state = TCP_ST_SYN_SENT;
1001 cur_stream->last_active_ts = pctx->p.cur_ts;
1002
1003 /* receive-side conn start event can also be tagged here */
1004 /* blocked since tcp_in.c takes care of this.. */
1005 /* cur_stream->pair_stream->cb_events |= MOS_ON_CONN_START; */
1006 }
1007 /*----------------------------------------------------------------------------*/
1008 /**
1009 * Called (when monitoring mode is enabled).. for every incoming packet from the
1010 * NIC.
1011 */
1012 void
UpdatePassiveSendTCPContext(mtcp_manager_t mtcp,struct tcp_stream * cur_stream,struct pkt_ctx * pctx)1013 UpdatePassiveSendTCPContext(mtcp_manager_t mtcp, struct tcp_stream *cur_stream,
1014 struct pkt_ctx *pctx)
1015 {
1016 struct tcphdr *tcph;
1017
1018 assert(cur_stream);
1019 tcph = pctx->p.tcph;
1020
1021 /* if it is a new TCP stream from client */
1022 if (tcph->syn && !tcph->ack && cur_stream->state <= TCP_ST_SYN_SENT) {
1023 TRACE_STATE("Stream %d: %s\n",
1024 cur_stream->id, TCPStateToString(cur_stream));
1025 UpdatePassiveSendTCPContext_SynSent(cur_stream, pctx);
1026 AddtoTimeoutList(mtcp, cur_stream);
1027 return;
1028 }
1029
1030 if (tcph->ack) {
1031 cur_stream->sndvar->ts_lastack_sent = pctx->p.cur_ts;
1032 cur_stream->last_active_ts = pctx->p.cur_ts;
1033 }
1034
1035 cur_stream->snd_nxt = pctx->p.seq + pctx->p.payloadlen;
1036
1037 /* test for reset packet */
1038 if (tcph->rst) {
1039 cur_stream->have_reset = TRUE;
1040 /* test for reset packet */
1041 cur_stream->state = TCP_ST_CLOSED_RSVD;
1042 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1043 TRACE_STATE("Stream %d: %s\n",
1044 cur_stream->id,
1045 TCPStateToString(cur_stream));
1046 return;
1047 }
1048
1049 /*
1050 * for all others, state transitioning is based on
1051 * current tcp_stream state
1052 */
1053 switch (cur_stream->state) {
1054 case TCP_ST_SYN_SENT:
1055 /* control should not come here */
1056 /* UpdatePassiveReceiveTCPContext() should take care of this */
1057 #ifdef BE_RESILIENT_TO_PACKET_DROP
1058 if (tcph->ack && TCP_SEQ_GT(pctx->p.seq, cur_stream->sndvar->iss)) {
1059 cur_stream->state = TCP_ST_ESTABLISHED;
1060 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1061 cur_stream->snd_nxt = pctx->p.seq;
1062 cur_stream->rcv_nxt = pctx->p.ack_seq;
1063 goto __Handle_TCP_ST_ESTABLISHED;
1064 }
1065 #endif
1066 break;
1067 case TCP_ST_SYN_RCVD:
1068 if (!tcph->ack)
1069 break;
1070
1071 if (tcph->syn) {
1072 cur_stream->sndvar->iss = pctx->p.seq;
1073 cur_stream->snd_nxt = cur_stream->sndvar->iss + 1;
1074 TRACE_DBG("Stream %d (TCP_ST_SYN_RCVD): "
1075 "setting seq: %u = iss\n",
1076 cur_stream->id, pctx->p.seq);
1077 }
1078 #ifdef BE_RESILIENT_TO_PACKET_DROP
1079 else {
1080 cur_stream->state = TCP_ST_ESTABLISHED;
1081 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1082 cur_stream->snd_nxt = pctx->p.seq;
1083 cur_stream->rcv_nxt = pctx->p.ack_seq;
1084 goto __Handle_TCP_ST_ESTABLISHED;
1085 }
1086 #endif
1087 TRACE_STATE("Stream %d: %s\n",
1088 cur_stream->id,
1089 TCPStateToString(cur_stream));
1090 break;
1091 case TCP_ST_ESTABLISHED:
1092 #ifdef BE_RESILIENT_TO_PACKET_DROP
1093 __Handle_TCP_ST_ESTABLISHED:
1094 #endif
1095 /* if application decides to close, fin pkt is sent */
1096 #ifdef BE_RESILIENT_TO_PACKET_DROP
1097 if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1098 {
1099 RAISE_DEBUG_EVENT(mtcp, cur_stream,
1100 "Move rcv_nxt from %u to %u.\n",
1101 cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1102 cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1103 }
1104 #endif
1105 if (tcph->fin) {
1106 cur_stream->state = TCP_ST_FIN_WAIT_1;
1107 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1108 cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1109 cur_stream->sndvar->is_fin_sent = TRUE;
1110 cur_stream->snd_nxt++;
1111 TRACE_STATE("Stream %d: %s\n",
1112 cur_stream->id,
1113 TCPStateToString(cur_stream));
1114 } else {
1115 /* creating tcp send buffer still pending.. */
1116 /* do we need peek for send buffer? */
1117 }
1118 break;
1119 case TCP_ST_CLOSE_WAIT:
1120 /* if application decides to close, fin pkt is sent */
1121 #ifdef BE_RESILIENT_TO_PACKET_DROP
1122 if (tcph->ack && TCP_SEQ_GT(ntohl(tcph->ack_seq), cur_stream->rcv_nxt))
1123 {
1124 RAISE_DEBUG_EVENT(mtcp, cur_stream,
1125 "Move rcv_nxt from %u to %u.\n",
1126 cur_stream->rcv_nxt, ntohl(tcph->ack_seq));
1127 cur_stream->rcv_nxt = ntohl(tcph->ack_seq);
1128 }
1129 #endif
1130 if (tcph->fin) {
1131 cur_stream->sndvar->fss = pctx->p.seq + pctx->p.payloadlen;
1132 cur_stream->sndvar->is_fin_sent = TRUE;
1133 cur_stream->snd_nxt++;
1134
1135 /* verify whether the FIN from the other end is acked */
1136 if ((tcph->ack) && (ntohl(tcph->ack_seq) == cur_stream->rcv_nxt))
1137 cur_stream->state = TCP_ST_LAST_ACK;
1138 else
1139 cur_stream->state = TCP_ST_CLOSING;
1140
1141 cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE;
1142 TRACE_STATE("Stream %d: %s\n",
1143 cur_stream->id,
1144 TCPStateToString(cur_stream));
1145 } else if (tcph->ack) {
1146 TRACE_STATE("Stream %d: %s\n",
1147 cur_stream->id,
1148 TCPStateToString(cur_stream));
1149 }
1150 break;
1151 case TCP_ST_LAST_ACK:
1152 /* control should not come here */
1153 /* UpdatePassiveReceiveTCPContext() should take care of this */
1154 break;
1155 case TCP_ST_FIN_WAIT_1:
1156 /* control should not come here */
1157 /* UpdatePassiveReceiveTCPContext() should take care of this */
1158 break;
1159 case TCP_ST_FIN_WAIT_2:
1160 /* control should not come here */
1161 /* UpdatePassiveReceiveTCPContext() should take care of this */
1162 break;
1163 case TCP_ST_CLOSING:
1164 /* control should not come here */
1165 /* UpdatePassiveReceiveTCPContext() should take care of this */
1166 break;
1167 case TCP_ST_TIME_WAIT:
1168 /* control may come here but... */
1169 /* UpdatePassiveReceiveTCPContext() should take care of this */
1170 if (tcph->ack) {
1171 TRACE_STATE("Stream %d: %s\n",
1172 cur_stream->id,
1173 TCPStateToString(cur_stream));
1174 }
1175 break;
1176 case TCP_ST_CLOSED:
1177 case TCP_ST_CLOSED_RSVD:
1178 /* Waiting to be destroyed */
1179 break;
1180 default:
1181 TRACE_DBG("This should not happen.. Error state: %s reached!\n"
1182 "tcph->syn: %d, tcph->ack: %d\n",
1183 TCPStateToString(cur_stream), pctx->p.tcph->syn,
1184 pctx->p.tcph->ack);
1185 assert(0);
1186 /* This will be enabled once passiverecvcontext is completed */
1187 /*exit(EXIT_FAILURE);*/
1188 }
1189
1190 UNUSED(mtcp);
1191 return;
1192 }
1193 /*----------------------------------------------------------------------------*/
1194 void
PostSendTCPAction(mtcp_manager_t mtcp,struct pkt_ctx * pctx,struct tcp_stream * recvside_stream,struct tcp_stream * sendside_stream)1195 PostSendTCPAction(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
1196 struct tcp_stream *recvside_stream,
1197 struct tcp_stream *sendside_stream)
1198 {
1199 /* this is empty for the time being */
1200 }
1201 /*----------------------------------------------------------------------------*/
1202