xref: /mOS-networking-stack/core/src/tcp_rb.c (revision e5df9dc1)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdint.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <errno.h>
8 #include <unistd.h>
9 #include <sys/uio.h>
10 #include <ctype.h>
11 #include "debug.h"
12 #include "tcp_rb.h"
13 
14 #define FREE(x) do { free(x); x = NULL; } while (0)
15 #ifndef MIN
16 #define MIN(a, b) ((a) < (b) ? (a) : (b))
17 #endif
18 #ifndef MAX
19 #define MAX(a, b) ((a) < (b) ? (b) : (a))
20 #endif
21 
22 #define PRINTF(f, args...) printf("%s:%d: " f, __FILE__, __LINE__, ##args)
23 
24 /* -------------------------------------------------------------------------- */
25 /* Private */
26 /* -------------------------------------------------------------------------- */
27 
28 static inline boff_t
29 loff2boff(tcprb_t *rb, loff_t loff)
30 {
31 	int woff = loff - rb->head; /* window offset */
32 	if (woff < 0 || woff > rb->len)
33 		return -1;
34 
35 	return (boff_t)((loff + rb->corr) % rb->len);
36 }
37 /*--------------------------------------------------------------------------*/
38 static inline tcpfrag_t *
39 frags_new(void)
40 {
41 	return (tcpfrag_t *)calloc(sizeof(tcpfrag_t), 1);
42 }
43 /*--------------------------------------------------------------------------*/
44 static inline void
45 frags_del(tcpfrag_t *f)
46 {
47 	free(f);
48 }
49 /*--------------------------------------------------------------------------*/
50 static inline void
51 frags_insert(tcprb_t *rb, tcpfrag_t *f)
52 {
53 	struct _tcpfrag_t *walk;
54 
55 	TAILQ_FOREACH(walk, &rb->frags, link)
56 		if (walk->head > f->head) {
57 			TAILQ_INSERT_BEFORE(walk, f, link);
58 			break;
59 		}
60 
61 	if (!walk)
62 		TAILQ_INSERT_TAIL(&rb->frags, f, link);
63 }
64 /*--------------------------------------------------------------------------*/
65 static inline tcpbufseg_t *
66 bufseg_new(mem_pool_t mp)
67 {
68 	//return (tcpbufseg_t *)malloc(sizeof(tcpbufseg_t));
69 	return (tcpbufseg_t *)MPAllocateChunk(mp);
70 }
71 /*--------------------------------------------------------------------------*/
72 static inline void
73 bufseg_del(mem_pool_t mp, tcpbufseg_t *b)
74 {
75 	//free(b);
76 	MPFreeChunk(mp, b);
77 }
78 /*--------------------------------------------------------------------------*/
79 /* For on-demand buffser segment allocation, the buffer segment queue must
80  * be always traversed in directin of from head to tail */
81 static inline tcpbufseg_t *
82 buf_first(tcprb_t *rb)
83 {
84 	tcpbufseg_t *tmp;
85 
86 	if ((tmp = TAILQ_FIRST(&rb->bufsegs)))
87 		return tmp;
88 
89 	if ((rb->lbufsegs == 0) || ((tmp = bufseg_new(rb->mp)) == NULL))
90 		return NULL;
91 
92 	tmp->id = 0;
93 	TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
94 
95 	return tmp;
96 }
97 /*--------------------------------------------------------------------------*/
98 static inline tcpbufseg_t *
99 buf_next(tcprb_t *rb, tcpbufseg_t *buf)
100 {
101 	tcpbufseg_t *tmp;
102 
103 	if ((tmp = TAILQ_NEXT(buf, link)))
104 		return tmp;
105 
106 	if ((rb->lbufsegs <= buf->id + 1) || ((tmp = bufseg_new(rb->mp)) == NULL))
107 		return NULL;
108 
109  	tmp->id = buf->id + 1;
110 	TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
111 
112 	return tmp;
113 }
114 /*--------------------------------------------------------------------------*/
115 static inline tcpbufseg_t *
116 buf_getbuf(tcprb_t *rb, boff_t off)
117 {
118 	tcpbufseg_t *buf;
119 	int id = off / UNITBUFSIZE;
120 	assert(id >= 0);
121 
122 #if 0
123 	int max = rb->len / UNITBUFSIZE;
124 
125 	if (max / 2 > id) {
126 		buf = TAILQ_FIRST(&rb->bufsegs);
127 		while (id--)
128 			buf = TAILQ_NEXT(buf, link);
129 	} else {
130 		buf = TAILQ_LAST(&rb->bufsegs, blist);
131 		while (max - ++id)
132 			buf = TAILQ_PREV(buf, blist, link);
133 	}
134 #else
135 	buf = buf_first(rb);
136 	while (id--)
137 		buf = buf_next(rb, buf);
138 #endif
139 
140 	assert(buf);
141 
142 	return buf;
143 }
144 /*--------------------------------------------------------------------------*/
145 #define TAILQ_LOOP_NEXT(head, headname, elm, field) \
146 	((*(((struct headname *)((head)->tqh_last))->tqh_last)) == (elm) ? \
147 	 ((head)->tqh_first) : ((elm)->field.tqe_next))
148 
149 static inline int
150 buf_try_resize(tcprb_t *rb, int len, loff_t data, int datalen)
151 {
152 	/* FIXME: resizing is temporally disabled because of on-demand buffer
153 	 * allocation patch */
154 	//return 0;
155 
156 	assert(rb);
157 	assert(rb->len);
158 	assert(len % UNITBUFSIZE == 0 || len < 2);
159 
160 	int segdiff = (len - rb->len) / UNITBUFSIZE;
161 	if (segdiff == 0)
162 		return 0;
163 
164 	boff_t head = loff2boff(rb, data);
165 	boff_t tail = (data + datalen - 1) % rb->len;
166 
167 	tcpbufseg_t *headseg = buf_getbuf(rb, head);
168 	tcpbufseg_t *tailseg = buf_getbuf(rb, tail);
169 
170 	if (segdiff > 0) {
171 		/* Expand the buffer */
172 		rb->len = len;
173 		boff_t new_head = loff2boff(rb, data);
174 		rb->corr = (rb->corr + (segdiff * UNITBUFSIZE) - (new_head - head))
175 				   % rb->len;
176 		if (rb->corr < 0)
177 			rb->corr += rb->len;
178 
179 		if (head > tail && headseg == tailseg) {
180 			tcpbufseg_t *seg = bufseg_new(rb->mp);
181 			assert(seg);
182 			memcpy(&seg->buf[head % UNITBUFSIZE],
183 				   &headseg->buf[head % UNITBUFSIZE],
184 				   UNITBUFSIZE - (head % UNITBUFSIZE));
185 			TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
186 			headseg = seg;
187 			segdiff--;
188 		}
189 		while (segdiff--) {
190 			tcpbufseg_t *seg = bufseg_new(rb->mp);
191 			assert(seg);
192 			TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
193 		}
194 	} else /* if (segdiff < 0) */ {
195 		/* Shrink the buffer */
196 		tcpbufseg_t *seg;
197 
198 		segdiff *= -1;
199 		int shrinkable = (rb->len - datalen) / UNITBUFSIZE;
200 		int tobeshrank = segdiff;
201 
202 		rb->len -= (tobeshrank * UNITBUFSIZE);
203 		if (rb->len) {
204 			boff_t new_head = loff2boff(rb, data);
205 			rb->corr = (rb->corr - (tobeshrank * UNITBUFSIZE) -
206 						(new_head - head)) % rb->len;
207 			if (rb->corr < 0)
208 				rb->corr += rb->len;
209 		}
210 
211 		if (shrinkable < segdiff) {
212 			/* Mark some fragments as empty */
213 			loff_t eh = data + rb->len +
214 				(UNITBUFSIZE - (loff2boff(rb, data) % UNITBUFSIZE));
215 			//loff_t et = eh + (segdiff - shrinkable) * UNITBUFSIZE;
216 			loff_t et = data + datalen;
217 
218 			struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *new;
219 
220 			TAILQ_FOREACH(f, &rb->frags, link) {
221 				if (f->tail <= eh)
222 					continue;
223 
224 				if (f->tail > et) {
225 					new = frags_new();
226 					new->head = et;
227 					new->tail = f->tail;
228 					TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
229 
230 					f->tail = et;
231 				}
232 
233 				if (f->head >= eh && f->tail <= et) {
234 					f->empty = true;
235 					continue;
236 				}
237 
238 				if (f->head < eh) {
239 					new = frags_new();
240 					new->head = eh;
241 					new->tail = f->tail;
242 					TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
243 
244 					f->tail = eh;
245 					continue;
246 				}
247 			}
248 
249 			/* shrink the buffer */
250 			int skip = rb->len / UNITBUFSIZE;
251 			for (seg = headseg; seg; ) {
252 				if (skip-- <= 0) {
253 					TAILQ_REMOVE(&rb->bufsegs, seg, link);
254 					bufseg_del(rb->mp, seg);
255 				}
256 				seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link);
257 				if (seg == headseg)
258 					break;
259 			}
260 		} else {
261 			while (tobeshrank &&
262 					(seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link))
263 					!= headseg) {
264 				TAILQ_REMOVE(&rb->bufsegs, seg, link);
265 				bufseg_del(rb->mp, seg);
266 				tobeshrank--;
267 			}
268 			if (tobeshrank) {
269 				assert(tobeshrank == 1);
270 				assert((tail % UNITBUFSIZE) < (head % UNITBUFSIZE));
271 				memcpy(&tailseg->buf[head % UNITBUFSIZE],
272 						&headseg->buf[head % UNITBUFSIZE],
273 						UNITBUFSIZE - (head % UNITBUFSIZE));
274 				TAILQ_REMOVE(&rb->bufsegs, headseg, link);
275 				bufseg_del(rb->mp, headseg);
276 				headseg = tailseg;
277 				tobeshrank = 0;
278 			}
279 		}
280 	}
281 
282 	return 0;
283 }
284 /*--------------------------------------------------------------------------*/
285 /* buf_read() and buf_write() are highly symmetic, so use macro for function
286  * definition to ease code maintenance. */
287 
288 /* TODO: We do not have tailing anymore. simplify these functions */
289 
290 #define MEMCPY_FOR_read(a, b, len) memcpy(a, b, len)
291 #define MEMCPY_FOR_write(a, b, len) memcpy(b, a, len)
292 
293 #define FUNCDEF_BUF_RW(rw) \
294 static inline void \
295 buf_##rw(tcprb_t *rb, uint8_t *buf, int len, loff_t off) \
296 { \
297 	tcpbufseg_t *bufseg = NULL; \
298  \
299 	assert(rb); \
300  \
301 	boff_t from = loff2boff(rb, off); \
302 	boff_t to = loff2boff(rb, off + len); \
303 	tcpbufseg_t *bufseg_from = buf_getbuf(rb, from); \
304 	tcpbufseg_t *bufseg_to = buf_getbuf(rb, to); \
305  \
306 	if (from > to) { \
307 		off = UNITBUFSIZE - (from % UNITBUFSIZE); \
308 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
309 		for (bufseg = buf_next(rb, bufseg_from); \
310 			 bufseg && (bufseg != bufseg_to); \
311 			 bufseg = buf_next(rb, bufseg)) { \
312 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
313 			off += UNITBUFSIZE; \
314 		} \
315 		for (bufseg = buf_first(rb); \
316 			 bufseg && (bufseg != bufseg_to); \
317 			 bufseg = buf_next(rb, bufseg)) { \
318 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
319 			off += UNITBUFSIZE; \
320 		} \
321 		MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
322 	} else if (bufseg_from == bufseg_to) { \
323 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], len); \
324 	} else { \
325 		off = UNITBUFSIZE - (from % UNITBUFSIZE); \
326 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
327 		for (bufseg = buf_next(rb, bufseg_from); \
328 			 bufseg && (bufseg != bufseg_to); \
329 			 bufseg = buf_next(rb, bufseg)) { \
330 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
331 			off += UNITBUFSIZE; \
332 		} \
333 		MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
334 	} \
335 }
336 
337 FUNCDEF_BUF_RW(read)
338 FUNCDEF_BUF_RW(write)
339 /* -------------------------------------------------------------------------- */
340 /* Public */
341 /* -------------------------------------------------------------------------- */
342 
343 inline loff_t
344 seq2loff(tcprb_t *rb, uint32_t seq, uint32_t isn)
345 {
346 	loff_t off = seq - isn;
347 
348 	while (off < rb->head)
349 		off += 0x100000000;
350 
351 	return off;
352 }
353 /*--------------------------------------------------------------------------*/
354 inline tcprb_t *
355 tcprb_new(mem_pool_t mp, int len, unsigned buf_mgmt)
356 {
357 	tcprb_t *rb;
358 
359 	if (len % UNITBUFSIZE || len < 2)
360 		return NULL;
361 
362 	if (!(rb = calloc(sizeof(tcprb_t), 1)))
363 		return NULL;
364 
365 	TAILQ_INIT(&rb->bufsegs);
366 	rb->lbufsegs = ((len - 1) / UNITBUFSIZE) + 1;
367 
368 #if 0
369 	int i;
370 	for (i = 0; i < rb->lbufsegs; i++) {
371 		tcpbufseg_t *bufseg = bufseg_new(mp);
372 		if (!bufseg) {
373 			TAILQ_FOREACH(bufseg, &rb->bufsegs, link)
374 				bufseg_del(mp, bufseg);
375 			FREE(rb);
376 			return NULL;
377 		}
378 		TAILQ_INSERT_TAIL(&rb->bufsegs, bufseg, link);
379 	}
380 #endif
381 
382 	rb->buf_mgmt = buf_mgmt;
383 	rb->mp = mp;
384 	rb->len = rb->metalen = len;
385 	TAILQ_INIT(&rb->frags);
386 
387 	return rb;
388 }
389 /*--------------------------------------------------------------------------*/
390 inline int
391 tcprb_del(tcprb_t *rb)
392 {
393 	struct _tcpbufseg_t *bwalk, *bnext;
394 	struct _tcpfrag_t *fwalk, *fnext;
395 
396 	for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
397 		bnext = TAILQ_NEXT(bwalk, link);
398 		bufseg_del(rb->mp, bwalk);
399 	}
400 
401 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
402 		fnext = TAILQ_NEXT(fwalk, link);
403 		frags_del(fwalk);
404 	}
405 
406 	FREE(rb);
407 
408 	return 0;
409 }
410 /*--------------------------------------------------------------------------*/
411 inline int
412 tcprb_setpile(tcprb_t *rb, loff_t new)
413 {
414 	if (!rb || new > (rb->head + rb->len) || new < rb->head)
415 		return -1;
416 
417 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
418 
419 	if (!cf || (cf->head != rb->head)) {
420 		/* No contiguous buffer seg. */
421 		assert(rb->pile == rb->head);
422 		return -1;
423 	}
424 
425 	if (new > cf->tail)
426 		return -1;
427 
428 	rb->pile = new;
429 
430 	return 0;
431 }
432 /*--------------------------------------------------------------------------*/
433 inline int
434 tcprb_cflen(tcprb_t *rb)
435 {
436 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
437 
438 	if (!cf || (cf->head != rb->head))
439 		/* No contiguous buffer seg. to taverse */
440 		return 0;
441 
442 	int cflen = cf->tail - rb->pile; /* length of cf */
443 
444 	assert(cflen >= 0);
445 
446 	return cflen;
447 }
448 /*--------------------------------------------------------------------------*/
449 inline int
450 tcprb_ffhead(tcprb_t *rb, int len)
451 {
452 	if (!rb || len < 0)
453 		return 0;
454 	else if (len == 0)
455 		return 0;
456 
457 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
458 
459 	if (!cf || (cf->head != rb->head))
460 		/* No contiguous buffer seg. to taverse */
461 		return 0;
462 
463 	int cflen = cf->tail - cf->head; /* length of cf */
464 	assert(cflen > 0);
465 
466 	int ff = MIN(len, cflen);
467 	ff = MIN(ff, rb->pile - rb->head); /* head cannot go further than pile */
468 
469 	if (cflen == ff) {
470 		/* Fast forward entire contiguous segment */
471 		TAILQ_REMOVE(&rb->frags, cf, link);
472 		frags_del(cf);
473 	} else {
474 		cf->head += ff;
475 	}
476 
477 	rb->head += ff;
478 
479 	return ff;
480 }
481 /*--------------------------------------------------------------------------*/
482 static inline int
483 tcprb_get_datalen(tcprb_t *rb)
484 {
485 	tcpfrag_t *lastfrag = TAILQ_LAST(&rb->frags, flist);
486 	return lastfrag ? (int)(lastfrag->tail - rb->head) : 0;
487 }
488 /*--------------------------------------------------------------------------*/
489 inline int
490 tcprb_resize_meta(tcprb_t *rb, int len)
491 {
492 #ifdef DISABLE_DYN_RESIZE
493 	assert(len == 0);
494 
495 	struct _tcpfrag_t *fwalk, *fnext;
496 
497 	rb->metalen = 0;
498 
499 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
500 		fnext = TAILQ_NEXT(fwalk, link);
501 		TAILQ_REMOVE(&rb->frags, fwalk, link);
502 		frags_del(fwalk);
503 	}
504 
505 	return 0;
506 #else
507 	int diff, ff, datalen;
508 
509 	if ((diff = len - rb->metalen) > 0) {
510 		rb->metalen = len;
511 	} else if (diff < 0) {
512 		ff = diff - (rb->len - tcprb_get_datalen(rb));
513 		tcprb_ffhead(rb, ff);
514 		datalen = tcprb_get_datalen(rb);
515 		rb->metalen = MAX(datalen, len);
516 	}
517 
518 	return rb->metalen;
519 #endif
520 }
521 /*--------------------------------------------------------------------------*/
522 inline int
523 tcprb_setpolicy(tcprb_t *rb, uint8_t policy)
524 {
525 	if (policy >= MOS_OVERLAP_CNT)
526 		return -1;
527 
528 	rb->overlap = policy;
529 	return 0;
530 }
531 /*--------------------------------------------------------------------------*/
532 inline int
533 tcprb_resize(tcprb_t *rb, int len)
534 {
535 #ifdef DISABLE_DYN_RESIZE
536 	assert(len == 0);
537 
538 	struct _tcpbufseg_t *bwalk, *bnext;
539 	struct _tcpfrag_t *fwalk, *fnext;
540 
541 	rb->metalen = 0;
542 	rb->len = 0;
543 
544 	for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
545 		bnext = TAILQ_NEXT(bwalk, link);
546 		TAILQ_REMOVE(&rb->bufsegs, bwalk, link);
547 		bufseg_del(rb->mp, bwalk);
548 	}
549 
550 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
551 		fnext = TAILQ_NEXT(fwalk, link);
552 		TAILQ_REMOVE(&rb->frags, fwalk, link);
553 		frags_del(fwalk);
554 	}
555 
556 	return 0;
557 #else
558 	int diff, ff;
559 
560 	if (len % UNITBUFSIZE)
561 		return -1;
562 	else if (len == rb->len)
563 		return 0;
564 
565 	if ((diff = rb->len - len) > 0 && /* shrinking */
566 		(ff = diff - (rb->len - tcprb_get_datalen(rb))))
567 		tcprb_ffhead(rb, ff);
568 
569 	return buf_try_resize(rb, len, rb->head,
570 						  tcprb_get_datalen(rb));
571 #endif
572 }
573 /*--------------------------------------------------------------------------*/
574 inline int
575 tcprb_ppeek(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
576 {
577 	struct _tcpfrag_t *f;
578 
579 	if (!rb || rb->buf_mgmt != BUFMGMT_FULL || !buf || len < 0)
580 		return -1;
581 	else if (len == 0)
582 		return 0;
583 
584 	TAILQ_FOREACH(f, &rb->frags, link)
585 		if (off >= f->head && off < f->tail) {
586 			if (f->empty)
587 				f = NULL;
588 			break;
589 		}
590 
591 	if (!f) /* No proper fragment found */
592 		return -1;
593 
594 	int plen = MIN(len, f->tail - off);
595 
596 	buf_read(rb, buf, plen, off);
597 
598 	return plen;
599 }
600 /*--------------------------------------------------------------------------*/
601 inline int
602 tcprb_pwrite(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
603 {
604 	int ff, olen;
605 	loff_t efhead = -1;  /* head of empty fragment */
606 	int eflen = 0;           /* length of empty fragment */
607 
608 	/* pwrite() will not overwrite old data with new data.
609 	 * However, */
610 
611 	if (!rb || !buf || len < 0 ||
612 		/* out of window range */
613 		off < rb->head || off >= rb->pile + rb->metalen)
614 		return -1;
615 	else if (len == 0)
616 		return 0;
617 	else if (off + len < rb->pile) /* already written */
618 		return len;
619 
620 	/* move head */
621 	olen = len;
622 	if ((ff = (off + len) - (rb->head + MIN(rb->len, rb->metalen))) > 0)
623 		len -= ff - tcprb_ffhead(rb, ff);
624 
625 	if (rb->metalen > rb->len)
626 		eflen = MIN(olen - len, rb->metalen - rb->len);
627 	if (eflen)
628 		efhead = off + len;
629 
630 	/* write data */
631 	struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *fnext;
632 	int uoff = 0; /* offset of `buf` */
633 
634 	/* head: position of first byte of the fragment
635 	 * tail: head + length of the fragment */
636 	while (uoff < len) {
637 		bool skip = false;
638 		int wrlen = 0; /* length to be written */
639 
640 		while (f) {
641 			struct _tcpfrag_t *ef, *nef;
642 			fnext = TAILQ_NEXT(f, link);
643 
644 			if (f->empty) {
645 				/* skip empty fragment */
646 				f = fnext;
647 				continue;
648 			}
649 
650 			if (f->head <= off + uoff) {
651 				if (f->tail > off + uoff) {
652 					skip = true;
653 					wrlen = f->tail - (off + uoff);
654 					break;
655 				} else if (f->tail == off + uoff) {
656 					skip = false;
657 
658 					/* shrink empty fragment */
659 					for (ef = fnext;
660 						 ef && ef->empty && ef->head < f->tail + len - uoff;
661 						 ef = nef) {
662 						nef = TAILQ_NEXT(ef, link);
663 
664 						if (ef->tail <= f->tail + len - uoff) {
665 							TAILQ_REMOVE(&rb->frags, ef, link);
666 						} else {
667 							ef->head = f->tail + len - uoff;
668 							/* break is not necessary, but for early escape */
669 							break;
670 						}
671 					}
672 					fnext = TAILQ_NEXT(f, link);
673 
674 					wrlen = fnext ? MIN(fnext->head - (off + uoff), len - uoff)
675 								  : len - uoff;
676 					f->tail += wrlen;
677 					if (fnext && (f->tail == fnext->head)) {
678 						/* merge 'f' and 'fnext' */
679 						f->tail = fnext->tail;
680 						TAILQ_REMOVE(&rb->frags, fnext, link);
681 						frags_del(fnext);
682 					}
683 					break;
684 				}
685 			} else if (f->head <= off + len) {
686 				skip = false;
687 				wrlen = MIN(f->head - (off + uoff), len - uoff);
688 				f->head -= wrlen;
689 
690 				/* shrink empty fragment */
691 				for (ef = TAILQ_PREV(f, flist, link);
692 					 ef && ef->empty && ef->tail < f->head;
693 					 ef = nef) {
694 					nef = TAILQ_PREV(ef, flist, link);
695 
696 					if (ef->head <= f->head) {
697 						TAILQ_REMOVE(&rb->frags, ef, link);
698 					} else {
699 						ef->tail = f->head;
700 						/* break is not necessary, but for early escape */
701 						break;
702 					}
703 				}
704 
705 				break;
706 			} else /* if (f->head > off + len) */ {
707 				/* early escape */
708 				f = NULL;
709 				break;
710 			}
711 
712 			f = fnext;
713 		}
714 
715 		if (!f) {
716 			struct _tcpfrag_t *new;
717 
718 			/* create new fragment and insert it into the list */
719 			new = frags_new();
720 
721 			new->head = off;
722 			new->tail = off + len;
723 			wrlen = len;
724 
725 			frags_insert(rb, new);
726 		}
727 
728 		/* copy data */
729 		if ((rb->overlap == MOS_OVERLAP_POLICY_LAST || !skip)
730 			&& rb->buf_mgmt)
731 			buf_write(rb, &buf[uoff], wrlen, off + uoff);
732 		uoff += wrlen;
733 	}
734 
735 	/* Insert empty fragment if necessary */
736 	if (eflen) {
737 		assert(rb->metalen > rb->len);
738 
739 		struct _tcpfrag_t *new;
740 
741 		/* create new fragment and insert it into the list */
742 		new = frags_new();
743 
744 		new->empty = true;
745 		new->head = efhead;
746 		new->tail = efhead + eflen;
747 
748 		frags_insert(rb, new);
749 	}
750 
751 	return len;
752 }
753 /*--------------------------------------------------------------------------*/
754 #define PRT_CL_RST		"\x1b[0m"
755 #define PRT_FG_BLK		"\x1b[30m"
756 #define PRT_FG_RED		"\x1b[31m"
757 #define PRT_FG_GRN		"\x1b[32m"
758 #define PRT_FG_YEL		"\x1b[33m"
759 #define PRT_FG_BLU		"\x1b[34m"
760 #define PRT_FG_PUR		"\x1b[35m"
761 #define PRT_FG_CYN		"\x1b[36m"
762 #define PRT_FG_GRY		"\x1b[37m"
763 #define PRT_BG_BLK		"\x1b[40m"
764 #define PRT_BG_RED		"\x1b[41m"
765 #define PRT_BG_GRN		"\x1b[42m"
766 #define PRT_BG_YEL		"\x1b[43m"
767 #define PRT_BG_BLU		"\x1b[44m"
768 #define PRT_BG_PUR		"\x1b[45m"
769 #define PRT_BG_CYN		"\x1b[46m"
770 #define PRT_BG_GRY		"\x1b[47m"
771 /*--------------------------------------------------------------------------*/
772 inline void
773 tcprb_printfrags(struct _tcprb_t *rb)
774 {
775 	struct _tcpfrag_t *walk;
776 
777 	printf("frags: ");
778 	TAILQ_FOREACH(walk, &rb->frags, link) {
779 		printf("[%lu - %lu]:'", walk->head, walk->tail - 1);
780 #if 1
781 		if (walk->empty)
782 			printf("EMPTY");
783 		else {
784 			loff_t i;
785 			for (i = walk->head; i < walk->tail; i++) {
786 				tcpbufseg_t *bufseg;
787 				boff_t j = loff2boff(rb, i);
788 				bufseg = buf_getbuf(rb, j);
789 				char c = bufseg->buf[j % UNITBUFSIZE];
790 				if (isgraph(c))
791 					printf("%c", c);
792 				else {
793 					printf(PRT_FG_BLU);
794 					if (c == ' ')
795 						printf(" ");
796 					else if (c == '\0')
797 						printf("0");
798 					else if (c == '\r')
799 						printf("R");
800 					else if (c == '\n')
801 						printf("N");
802 					else if (c == '\t')
803 						printf("T");
804 					else
805 						printf("?");
806 					printf(PRT_CL_RST);
807 				}
808 			}
809 		}
810 #endif
811 		printf("' ");
812 	}
813 	printf("\n");
814 }
815 inline void
816 tcprb_printbufsegs(tcprb_t *rb)
817 {
818 	struct _tcpbufseg_t *walk;
819 
820 	printf("bufsegs: ");
821 	TAILQ_FOREACH(walk, &rb->bufsegs, link) {
822 		printf("[%d]:'", walk->id);
823 #if 1
824 		int j;
825 		for (j = 0; j < UNITBUFSIZE; j++) {
826 			char c = walk->buf[j];
827 			if (isgraph(c))
828 				printf("%c", c);
829 			else {
830 				printf(PRT_FG_BLU);
831 				if (c == ' ')
832 					printf(" ");
833 				else if (c == '\0')
834 					printf("0");
835 				else if (c == '\r')
836 					printf("R");
837 				else if (c == '\n')
838 					printf("N");
839 				else if (c == '\t')
840 					printf("T");
841 				else
842 					printf("?");
843 				printf(PRT_CL_RST);
844 			}
845 		}
846 #endif
847 		printf("' ");
848 	}
849 	printf("\n");
850 }
851 /*--------------------------------------------------------------------------*/
852 inline void
853 tcprb_printrb(struct _tcprb_t *rb)
854 {
855 	printf("  rb: len: %d, meta: %d, "
856 			"(head: %ld <= pile: %ld <= tail: %ld)\n  ",
857 			rb->len, rb->metalen, rb->head, rb->pile, rb->head + rb->metalen);
858 	tcprb_printfrags(rb);
859 	tcprb_printbufsegs(rb);
860 	printf("-------------------------------------------------\n");
861 }
862 /*--------------------------------------------------------------------------*/
863 inline void
864 tcp_rb_overlapchk(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
865 		    struct tcp_stream *recvside_stream)
866 {
867 #define DOESOVERLAP(a1, a2, b1, b2)				\
868 	((a1 != b2) && (a2 != b1) && ((a1 > b2) != (a2 > b1)))
869 
870 	/* Check whether this packet is retransmitted or not. */
871 	tcprb_t *rb;
872 	struct socket_map *walk;
873 	if (pctx->p.payloadlen > 0 && recvside_stream->rcvvar != NULL
874 	    && (rb = recvside_stream->rcvvar->rcvbuf) != NULL) {
875 		struct _tcpfrag_t *f;
876 		loff_t off = seq2loff(rb, pctx->p.seq, recvside_stream->rcvvar->irs + 1);
877 		TAILQ_FOREACH(f, &rb->frags, link)
878 			if (DOESOVERLAP(f->head, f->tail, off, off + pctx->p.payloadlen)) {
879 				/*
880 				 * call it immediately (before pkt payload is attached
881 				 * to the tcp ring buffer)
882 				 */
883 				SOCKQ_FOREACH_START(walk, &recvside_stream->msocks) {
884 					HandleCallback(mtcp, MOS_HK_RCV, walk,
885 						       recvside_stream->side,
886 						       pctx, MOS_ON_REXMIT);
887 				} SOCKQ_FOREACH_END;
888 				/* recvside_stream->cb_events |= MOS_ON_REXMIT; */
889 				TRACE_DBG("RETX!\n");
890 				break;
891 			}
892 	}
893 }
894 /*--------------------------------------------------------------------------*/
895