xref: /mOS-networking-stack/core/src/tcp_rb.c (revision 05e3289c)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdint.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <errno.h>
8 #include <unistd.h>
9 #include <sys/uio.h>
10 #include <ctype.h>
11 #include "debug.h"
12 #include "tcp_rb.h"
13 
14 #define FREE(x) do { free(x); x = NULL; } while (0)
15 #ifndef MIN
16 #define MIN(a, b) ((a) < (b) ? (a) : (b))
17 #endif
18 #ifndef MAX
19 #define MAX(a, b) ((a) < (b) ? (b) : (a))
20 #endif
21 
22 #define PRINTF(f, args...) printf("%s:%d: " f, __FILE__, __LINE__, ##args)
23 
24 /* -------------------------------------------------------------------------- */
25 /* Private */
26 /* -------------------------------------------------------------------------- */
27 
28 static inline boff_t
loff2boff(tcprb_t * rb,loff_t loff)29 loff2boff(tcprb_t *rb, loff_t loff)
30 {
31 	int woff = loff - rb->head; /* window offset */
32 	if (woff < 0 || woff > rb->len)
33 		return -1;
34 
35 	return (boff_t)((loff + rb->corr) % rb->len);
36 }
37 /*--------------------------------------------------------------------------*/
38 static inline tcpfrag_t *
frags_new(void)39 frags_new(void)
40 {
41 	return (tcpfrag_t *)calloc(sizeof(tcpfrag_t), 1);
42 }
43 /*--------------------------------------------------------------------------*/
44 static inline void
frags_del(tcpfrag_t * f)45 frags_del(tcpfrag_t *f)
46 {
47 	free(f);
48 }
49 /*--------------------------------------------------------------------------*/
50 static inline void
frags_insert(tcprb_t * rb,tcpfrag_t * f)51 frags_insert(tcprb_t *rb, tcpfrag_t *f)
52 {
53 	struct _tcpfrag_t *walk;
54 
55 	TAILQ_FOREACH(walk, &rb->frags, link)
56 		if (walk->head > f->head) {
57 			TAILQ_INSERT_BEFORE(walk, f, link);
58 			break;
59 		}
60 
61 	if (!walk)
62 		TAILQ_INSERT_TAIL(&rb->frags, f, link);
63 }
64 /*--------------------------------------------------------------------------*/
65 static inline tcpbufseg_t *
bufseg_new(mem_pool_t mp)66 bufseg_new(mem_pool_t mp)
67 {
68 	//return (tcpbufseg_t *)malloc(sizeof(tcpbufseg_t));
69 	return (tcpbufseg_t *)MPAllocateChunk(mp);
70 }
71 /*--------------------------------------------------------------------------*/
72 static inline void
bufseg_del(mem_pool_t mp,tcpbufseg_t * b)73 bufseg_del(mem_pool_t mp, tcpbufseg_t *b)
74 {
75 	//free(b);
76 	MPFreeChunk(mp, b);
77 }
78 /*--------------------------------------------------------------------------*/
79 /* For on-demand buffser segment allocation, the buffer segment queue must
80  * be always traversed in directin of from head to tail */
81 static inline tcpbufseg_t *
buf_first(tcprb_t * rb)82 buf_first(tcprb_t *rb)
83 {
84 	tcpbufseg_t *tmp;
85 
86 	if ((tmp = TAILQ_FIRST(&rb->bufsegs)))
87 		return tmp;
88 
89 	if ((rb->lbufsegs == 0) || ((tmp = bufseg_new(rb->mp)) == NULL))
90 		return NULL;
91 
92 	tmp->id = 0;
93 	TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
94 
95 	return tmp;
96 }
97 /*--------------------------------------------------------------------------*/
98 static inline tcpbufseg_t *
buf_next(tcprb_t * rb,tcpbufseg_t * buf)99 buf_next(tcprb_t *rb, tcpbufseg_t *buf)
100 {
101 	tcpbufseg_t *tmp;
102 
103 	if ((tmp = TAILQ_NEXT(buf, link)))
104 		return tmp;
105 
106 	if ((rb->lbufsegs <= buf->id + 1) || ((tmp = bufseg_new(rb->mp)) == NULL))
107 		return NULL;
108 
109  	tmp->id = buf->id + 1;
110 	TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
111 
112 	return tmp;
113 }
114 /*--------------------------------------------------------------------------*/
115 static inline tcpbufseg_t *
buf_getbuf(tcprb_t * rb,boff_t off)116 buf_getbuf(tcprb_t *rb, boff_t off)
117 {
118 	tcpbufseg_t *buf;
119 	int id = off / UNITBUFSIZE;
120 	assert(id >= 0);
121 
122 #if 0
123 	int max = rb->len / UNITBUFSIZE;
124 
125 	if (max / 2 > id) {
126 		buf = TAILQ_FIRST(&rb->bufsegs);
127 		while (id--)
128 			buf = TAILQ_NEXT(buf, link);
129 	} else {
130 		buf = TAILQ_LAST(&rb->bufsegs, blist);
131 		while (max - ++id)
132 			buf = TAILQ_PREV(buf, blist, link);
133 	}
134 #else
135 	buf = buf_first(rb);
136 	while (id--)
137 		buf = buf_next(rb, buf);
138 #endif
139 
140 	assert(buf);
141 
142 	return buf;
143 }
144 /*--------------------------------------------------------------------------*/
145 #define TAILQ_LOOP_NEXT(head, headname, elm, field) \
146 	((*(((struct headname *)((head)->tqh_last))->tqh_last)) == (elm) ? \
147 	 ((head)->tqh_first) : ((elm)->field.tqe_next))
148 
149 static inline int
buf_try_resize(tcprb_t * rb,int len,loff_t data,int datalen)150 buf_try_resize(tcprb_t *rb, int len, loff_t data, int datalen)
151 {
152 	/* FIXME: resizing is temporally disabled because of on-demand buffer
153 	 * allocation patch */
154 	//return 0;
155 
156 	assert(rb);
157 	assert(rb->len);
158 	assert(len % UNITBUFSIZE == 0 || len < 2);
159 
160 	int segdiff = (len - rb->len) / UNITBUFSIZE;
161 	if (segdiff == 0)
162 		return 0;
163 
164 	boff_t head = loff2boff(rb, data);
165 	boff_t tail = (data + datalen - 1) % rb->len;
166 
167 	tcpbufseg_t *headseg = buf_getbuf(rb, head);
168 	tcpbufseg_t *tailseg = buf_getbuf(rb, tail);
169 
170 	if (segdiff > 0) {
171 		/* Expand the buffer */
172 		rb->len = len;
173 		boff_t new_head = loff2boff(rb, data);
174 		rb->corr = (rb->corr + (segdiff * UNITBUFSIZE) - (new_head - head))
175 				   % rb->len;
176 		if (rb->corr < 0)
177 			rb->corr += rb->len;
178 
179 		if (head > tail && headseg == tailseg) {
180 			tcpbufseg_t *seg = bufseg_new(rb->mp);
181 			assert(seg);
182 			memcpy(&seg->buf[head % UNITBUFSIZE],
183 				   &headseg->buf[head % UNITBUFSIZE],
184 				   UNITBUFSIZE - (head % UNITBUFSIZE));
185 			TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
186 			headseg = seg;
187 			segdiff--;
188 		}
189 		while (segdiff--) {
190 			tcpbufseg_t *seg = bufseg_new(rb->mp);
191 			assert(seg);
192 			TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
193 		}
194 	} else /* if (segdiff < 0) */ {
195 		/* Shrink the buffer */
196 		tcpbufseg_t *seg;
197 
198 		segdiff *= -1;
199 		int shrinkable = (rb->len - datalen) / UNITBUFSIZE;
200 		int tobeshrank = segdiff;
201 
202 		rb->len -= (tobeshrank * UNITBUFSIZE);
203 		if (rb->len) {
204 			boff_t new_head = loff2boff(rb, data);
205 			rb->corr = (rb->corr - (tobeshrank * UNITBUFSIZE) -
206 						(new_head - head)) % rb->len;
207 			if (rb->corr < 0)
208 				rb->corr += rb->len;
209 		}
210 
211 		if (shrinkable < segdiff) {
212 			/* Mark some fragments as empty */
213 			loff_t eh = data + rb->len +
214 				(UNITBUFSIZE - (loff2boff(rb, data) % UNITBUFSIZE));
215 			//loff_t et = eh + (segdiff - shrinkable) * UNITBUFSIZE;
216 			loff_t et = data + datalen;
217 
218 			struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *new;
219 
220 			TAILQ_FOREACH(f, &rb->frags, link) {
221 				if (f->tail <= eh)
222 					continue;
223 
224 				if (f->tail > et) {
225 					new = frags_new();
226 					new->head = et;
227 					new->tail = f->tail;
228 					TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
229 
230 					f->tail = et;
231 				}
232 
233 				if (f->head >= eh && f->tail <= et) {
234 					f->empty = true;
235 					continue;
236 				}
237 
238 				if (f->head < eh) {
239 					new = frags_new();
240 					new->head = eh;
241 					new->tail = f->tail;
242 					TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
243 
244 					f->tail = eh;
245 					continue;
246 				}
247 			}
248 
249 			/* shrink the buffer */
250 			int skip = rb->len / UNITBUFSIZE;
251 			for (seg = headseg; seg; ) {
252 				if (skip-- <= 0) {
253 					TAILQ_REMOVE(&rb->bufsegs, seg, link);
254 					bufseg_del(rb->mp, seg);
255 				}
256 				seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link);
257 				if (seg == headseg)
258 					break;
259 			}
260 		} else {
261 			while (tobeshrank &&
262 					(seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link))
263 					!= headseg) {
264 				TAILQ_REMOVE(&rb->bufsegs, seg, link);
265 				bufseg_del(rb->mp, seg);
266 				tobeshrank--;
267 			}
268 			if (tobeshrank) {
269 				assert(tobeshrank == 1);
270 				assert((tail % UNITBUFSIZE) < (head % UNITBUFSIZE));
271 				memcpy(&tailseg->buf[head % UNITBUFSIZE],
272 						&headseg->buf[head % UNITBUFSIZE],
273 						UNITBUFSIZE - (head % UNITBUFSIZE));
274 				TAILQ_REMOVE(&rb->bufsegs, headseg, link);
275 				bufseg_del(rb->mp, headseg);
276 				headseg = tailseg;
277 				tobeshrank = 0;
278 			}
279 		}
280 	}
281 
282 	return 0;
283 }
284 /*--------------------------------------------------------------------------*/
285 /* buf_read() and buf_write() are highly symmetic, so use macro for function
286  * definition to ease code maintenance. */
287 
288 /* TODO: We do not have tailing anymore. simplify these functions */
289 
290 #define MEMCPY_FOR_read(a, b, len) memcpy(a, b, len)
291 #define MEMCPY_FOR_write(a, b, len) memcpy(b, a, len)
292 
293 #define FUNCDEF_BUF_RW(rw) \
294 static inline void \
295 buf_##rw(tcprb_t *rb, uint8_t *buf, int len, loff_t off) \
296 { \
297 	tcpbufseg_t *bufseg = NULL; \
298  \
299 	assert(rb); \
300  \
301 	boff_t from = loff2boff(rb, off); \
302 	boff_t to = loff2boff(rb, off + len); \
303 	tcpbufseg_t *bufseg_from = buf_getbuf(rb, from); \
304 	tcpbufseg_t *bufseg_to = buf_getbuf(rb, to); \
305  \
306 	if (from > to) { \
307 		off = UNITBUFSIZE - (from % UNITBUFSIZE); \
308 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
309 		for (bufseg = buf_next(rb, bufseg_from); \
310 			 bufseg && (bufseg != bufseg_to); \
311 			 bufseg = buf_next(rb, bufseg)) { \
312 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
313 			off += UNITBUFSIZE; \
314 		} \
315 		for (bufseg = buf_first(rb); \
316 			 bufseg && (bufseg != bufseg_to); \
317 			 bufseg = buf_next(rb, bufseg)) { \
318 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
319 			off += UNITBUFSIZE; \
320 		} \
321 		MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
322 	} else if (bufseg_from == bufseg_to) { \
323 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], len); \
324 	} else { \
325 		off = UNITBUFSIZE - (from % UNITBUFSIZE); \
326 		MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
327 		for (bufseg = buf_next(rb, bufseg_from); \
328 			 bufseg && (bufseg != bufseg_to); \
329 			 bufseg = buf_next(rb, bufseg)) { \
330 			MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
331 			off += UNITBUFSIZE; \
332 		} \
333 		MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
334 	} \
335 }
336 
337 FUNCDEF_BUF_RW(read)
FUNCDEF_BUF_RW(write)338 FUNCDEF_BUF_RW(write)
339 /* -------------------------------------------------------------------------- */
340 /* Public */
341 /* -------------------------------------------------------------------------- */
342 
343 inline loff_t
344 seq2loff(tcprb_t *rb, uint32_t seq, uint32_t isn)
345 {
346 	loff_t off = seq - isn;
347 
348 	while (off < rb->head)
349 		off += 0x100000000;
350 
351 	return off;
352 }
353 /*--------------------------------------------------------------------------*/
354 inline tcprb_t *
tcprb_new(mem_pool_t mp,int len,unsigned buf_mgmt)355 tcprb_new(mem_pool_t mp, int len, unsigned buf_mgmt)
356 {
357 	tcprb_t *rb;
358 
359 	if (len % UNITBUFSIZE || len < 2)
360 		return NULL;
361 
362 	if (!(rb = calloc(sizeof(tcprb_t), 1)))
363 		return NULL;
364 
365 	TAILQ_INIT(&rb->bufsegs);
366 	rb->lbufsegs = ((len - 1) / UNITBUFSIZE) + 1;
367 
368 #if 0
369 	int i;
370 	for (i = 0; i < rb->lbufsegs; i++) {
371 		tcpbufseg_t *bufseg = bufseg_new(mp);
372 		if (!bufseg) {
373 			TAILQ_FOREACH(bufseg, &rb->bufsegs, link)
374 				bufseg_del(mp, bufseg);
375 			FREE(rb);
376 			return NULL;
377 		}
378 		TAILQ_INSERT_TAIL(&rb->bufsegs, bufseg, link);
379 	}
380 #endif
381 
382 	rb->buf_mgmt = buf_mgmt;
383 	rb->mp = mp;
384 	rb->len = rb->metalen = len;
385 	TAILQ_INIT(&rb->frags);
386 
387 	return rb;
388 }
389 /*--------------------------------------------------------------------------*/
390 inline int
tcprb_del(tcprb_t * rb)391 tcprb_del(tcprb_t *rb)
392 {
393 	struct _tcpbufseg_t *bwalk, *bnext;
394 	struct _tcpfrag_t *fwalk, *fnext;
395 
396 	for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
397 		bnext = TAILQ_NEXT(bwalk, link);
398 		bufseg_del(rb->mp, bwalk);
399 	}
400 
401 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
402 		fnext = TAILQ_NEXT(fwalk, link);
403 		frags_del(fwalk);
404 	}
405 
406 	FREE(rb);
407 
408 	return 0;
409 }
410 /*--------------------------------------------------------------------------*/
411 inline int
tcprb_setpile(tcprb_t * rb,loff_t new)412 tcprb_setpile(tcprb_t *rb, loff_t new)
413 {
414 	if (!rb || new > (rb->head + rb->len) || new < rb->head)
415 		return -1;
416 
417 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
418 
419 	if (!cf || (cf->head != rb->head)) {
420 		/* No contiguous buffer seg. */
421 		assert(rb->pile == rb->head);
422 		return -1;
423 	}
424 
425 	if (new > cf->tail)
426 		return -1;
427 
428 	rb->pile = new;
429 
430 	return 0;
431 }
432 /*--------------------------------------------------------------------------*/
433 inline int
tcprb_cflen(tcprb_t * rb)434 tcprb_cflen(tcprb_t *rb)
435 {
436 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
437 
438 	if (!cf || (cf->head != rb->head))
439 		/* No contiguous buffer seg. to taverse */
440 		return 0;
441 
442 	int cflen = cf->tail - rb->pile; /* length of cf */
443 
444 	assert(cflen >= 0);
445 
446 	return cflen;
447 }
448 /*--------------------------------------------------------------------------*/
449 inline int
tcprb_ffhead(tcprb_t * rb,int len)450 tcprb_ffhead(tcprb_t *rb, int len)
451 {
452 	if (!rb || len < 0)
453 		return 0;
454 	else if (len == 0)
455 		return 0;
456 
457 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
458 
459 	if (!cf || (cf->head != rb->head))
460 		/* No contiguous buffer seg. to taverse */
461 		return 0;
462 
463 	int cflen = cf->tail - cf->head; /* length of cf */
464 	assert(cflen > 0);
465 
466 	int ff = MIN(len, cflen);
467 	ff = MIN(ff, rb->pile - rb->head); /* head cannot go further than pile */
468 
469 	if (cflen == ff) {
470 		/* Fast forward entire contiguous segment */
471 		TAILQ_REMOVE(&rb->frags, cf, link);
472 		frags_del(cf);
473 	} else {
474 		cf->head += ff;
475 	}
476 
477 	rb->head += ff;
478 
479 	return ff;
480 }
481 /*--------------------------------------------------------------------------*/
482 /* return the number of bytes that would be fast forwarded */
483 inline int
tcprb_fflen(tcprb_t * rb,uint8_t * buf,int len,loff_t off)484 tcprb_fflen(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
485 {
486 	if (!rb || !buf || len < 0 ||
487 		off < rb->head || off >= rb->pile + rb->metalen)
488 		return -1;
489 	/* nothing to write or already written */
490 	else if (len == 0 || off + len < rb->pile)
491 		return 0;
492 
493 	int ff = (off + len) - (rb->head + MIN(rb->len, rb->metalen));
494 	if (ff <= 0)
495 		return 0;
496 
497 	tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
498 
499 	if (!cf || (cf->head != rb->head))
500 		/* No contiguous buffer seg. to traverse */
501 		return 0;
502 
503 	int cflen = cf->tail - cf->head; /* length of cf */
504 	assert(cflen > 0);
505 
506 	int fflen = MIN(len, cflen);
507 	fflen = MIN(ff, rb->pile - rb->head); /* head cannot go further than pile */
508 
509 	return fflen;
510 }
511 /*--------------------------------------------------------------------------*/
512 static inline int
tcprb_get_datalen(tcprb_t * rb)513 tcprb_get_datalen(tcprb_t *rb)
514 {
515 	tcpfrag_t *lastfrag = TAILQ_LAST(&rb->frags, flist);
516 	return lastfrag ? (int)(lastfrag->tail - rb->head) : 0;
517 }
518 /*--------------------------------------------------------------------------*/
519 inline int
tcprb_resize_meta(tcprb_t * rb,int len)520 tcprb_resize_meta(tcprb_t *rb, int len)
521 {
522 #ifdef DISABLE_DYN_RESIZE
523 	assert(len == 0);
524 
525 	struct _tcpfrag_t *fwalk, *fnext;
526 
527 	rb->metalen = 0;
528 
529 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
530 		fnext = TAILQ_NEXT(fwalk, link);
531 		TAILQ_REMOVE(&rb->frags, fwalk, link);
532 		frags_del(fwalk);
533 	}
534 
535 	return 0;
536 #else
537 	int diff, ff, datalen;
538 
539 	if ((diff = len - rb->metalen) > 0) {
540 		rb->metalen = len;
541 	} else if (diff < 0) {
542 		ff = diff - (rb->len - tcprb_get_datalen(rb));
543 		tcprb_ffhead(rb, ff);
544 		datalen = tcprb_get_datalen(rb);
545 		rb->metalen = MAX(datalen, len);
546 	}
547 
548 	return rb->metalen;
549 #endif
550 }
551 /*--------------------------------------------------------------------------*/
552 inline int
tcprb_setpolicy(tcprb_t * rb,uint8_t policy)553 tcprb_setpolicy(tcprb_t *rb, uint8_t policy)
554 {
555 	if (policy >= MOS_OVERLAP_CNT)
556 		return -1;
557 
558 	rb->overlap = policy;
559 	return 0;
560 }
561 /*--------------------------------------------------------------------------*/
562 inline int
tcprb_resize(tcprb_t * rb,int len)563 tcprb_resize(tcprb_t *rb, int len)
564 {
565 #ifdef DISABLE_DYN_RESIZE
566 	assert(len == 0);
567 
568 	struct _tcpbufseg_t *bwalk, *bnext;
569 	struct _tcpfrag_t *fwalk, *fnext;
570 
571 	rb->metalen = 0;
572 	rb->len = 0;
573 
574 	for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
575 		bnext = TAILQ_NEXT(bwalk, link);
576 		TAILQ_REMOVE(&rb->bufsegs, bwalk, link);
577 		bufseg_del(rb->mp, bwalk);
578 	}
579 
580 	for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
581 		fnext = TAILQ_NEXT(fwalk, link);
582 		TAILQ_REMOVE(&rb->frags, fwalk, link);
583 		frags_del(fwalk);
584 	}
585 
586 	return 0;
587 #else
588 	int diff, ff;
589 
590 	if (len % UNITBUFSIZE)
591 		return -1;
592 	else if (len == rb->len)
593 		return 0;
594 
595 	if ((diff = rb->len - len) > 0 && /* shrinking */
596 		(ff = diff - (rb->len - tcprb_get_datalen(rb))))
597 		tcprb_ffhead(rb, ff);
598 
599 	return buf_try_resize(rb, len, rb->head,
600 						  tcprb_get_datalen(rb));
601 #endif
602 }
603 /*--------------------------------------------------------------------------*/
604 inline int
tcprb_ppeek(tcprb_t * rb,uint8_t * buf,int len,loff_t off)605 tcprb_ppeek(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
606 {
607 	struct _tcpfrag_t *f;
608 
609 	if (!rb || rb->buf_mgmt != BUFMGMT_FULL || !buf || len < 0)
610 		return -1;
611 	else if (len == 0)
612 		return 0;
613 
614 	TAILQ_FOREACH(f, &rb->frags, link)
615 		if (off >= f->head && off < f->tail) {
616 			if (f->empty)
617 				f = NULL;
618 			break;
619 		}
620 
621 	if (!f) /* No proper fragment found */
622 		return -1;
623 
624 	int plen = MIN(len, f->tail - off);
625 
626 	buf_read(rb, buf, plen, off);
627 
628 	return plen;
629 }
630 /*--------------------------------------------------------------------------*/
631 inline int
tcprb_pwrite(tcprb_t * rb,uint8_t * buf,int len,loff_t off)632 tcprb_pwrite(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
633 {
634 	int ff, olen;
635 	loff_t efhead = -1;      /* head of empty fragment */
636 	int eflen = 0;           /* length of empty fragment */
637 
638 	/*
639 	 * we don't raise MOS_ON_ERROR anymore here,
640 	 * (out of receive buffer case is handled at ProcessTCPPayload())
641 	 */
642 	if (!rb || !buf || len < 0 ||
643 		off < rb->head || off >= rb->pile + rb->metalen)
644 		return -1;
645 	else if (len == 0)
646 		return 0;
647 	else if (off + len < rb->pile) /* already written */
648 		return len;
649 
650 	/* check whether we should move its head offset (fast forward) */
651 	olen = len;
652 	if ((ff = (off + len) - (rb->head + MIN(rb->len, rb->metalen))) > 0)
653 		len -= ff - tcprb_ffhead(rb, ff);
654 	if (rb->metalen > rb->len)
655 		eflen = MIN(olen - len, rb->metalen - rb->len);
656 	if (eflen)
657 		efhead = off + len;
658 
659 	/* write data */
660 	struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *fnext;
661 	int uoff = 0; /* offset of `buf` */
662 
663 	/* head: position of first byte of the fragment
664 	 * tail: head + length of the fragment */
665 	while (uoff < len) {
666 		bool skip = false;
667 		int wrlen = 0; /* length to be written */
668 
669 		while (f) {
670 			struct _tcpfrag_t *ef, *nef;
671 			fnext = TAILQ_NEXT(f, link);
672 
673 			if (f->empty) {
674 				/* skip empty fragment */
675 				f = fnext;
676 				continue;
677 			}
678 
679 			if (f->head <= off + uoff) {
680 				if (f->tail > off + uoff) {
681 					skip = true;
682 					wrlen = f->tail - (off + uoff);
683 					break;
684 				} else if (f->tail == off + uoff) {
685 					skip = false;
686 
687 					/* shrink empty fragment */
688 					for (ef = fnext;
689 						 ef && ef->empty && ef->head < f->tail + len - uoff;
690 						 ef = nef) {
691 						nef = TAILQ_NEXT(ef, link);
692 
693 						if (ef->tail <= f->tail + len - uoff) {
694 							TAILQ_REMOVE(&rb->frags, ef, link);
695 						} else {
696 							ef->head = f->tail + len - uoff;
697 							/* break is not necessary, but for early escape */
698 							break;
699 						}
700 					}
701 					fnext = TAILQ_NEXT(f, link);
702 
703 					wrlen = fnext ? MIN(fnext->head - (off + uoff), len - uoff)
704 								  : len - uoff;
705 					f->tail += wrlen;
706 					if (fnext && (f->tail == fnext->head)) {
707 						/* merge 'f' and 'fnext' */
708 						f->tail = fnext->tail;
709 						TAILQ_REMOVE(&rb->frags, fnext, link);
710 						frags_del(fnext);
711 					}
712 					break;
713 				}
714 			} else if (f->head <= off + len) {
715 				skip = false;
716 				wrlen = MIN(f->head - (off + uoff), len - uoff);
717 				f->head -= wrlen;
718 
719 				/* shrink empty fragment */
720 				for (ef = TAILQ_PREV(f, flist, link);
721 					 ef && ef->empty && ef->tail < f->head;
722 					 ef = nef) {
723 					nef = TAILQ_PREV(ef, flist, link);
724 
725 					if (ef->head <= f->head) {
726 						TAILQ_REMOVE(&rb->frags, ef, link);
727 					} else {
728 						ef->tail = f->head;
729 						/* break is not necessary, but for early escape */
730 						break;
731 					}
732 				}
733 
734 				break;
735 			} else /* if (f->head > off + len) */ {
736 				/* early escape */
737 				f = NULL;
738 				break;
739 			}
740 
741 			f = fnext;
742 		}
743 
744 		if (!f) {
745 			struct _tcpfrag_t *new;
746 
747 			/* create new fragment and insert it into the list */
748 			new = frags_new();
749 
750 			new->head = off;
751 			new->tail = off + len;
752 			wrlen = len;
753 
754 			frags_insert(rb, new);
755 		}
756 
757 		/* copy data */
758 		if ((rb->overlap == MOS_OVERLAP_POLICY_LAST || !skip)
759 			&& rb->buf_mgmt)
760 			buf_write(rb, &buf[uoff], wrlen, off + uoff);
761 		uoff += wrlen;
762 	}
763 
764 	/* Insert empty fragment if necessary */
765 	if (eflen) {
766 		assert(rb->metalen > rb->len);
767 
768 		struct _tcpfrag_t *new;
769 
770 		/* create new fragment and insert it into the list */
771 		new = frags_new();
772 
773 		new->empty = true;
774 		new->head = efhead;
775 		new->tail = efhead + eflen;
776 
777 		frags_insert(rb, new);
778 	}
779 
780 	return len;
781 }
782 /*--------------------------------------------------------------------------*/
783 #define PRT_CL_RST		"\x1b[0m"
784 #define PRT_FG_BLK		"\x1b[30m"
785 #define PRT_FG_RED		"\x1b[31m"
786 #define PRT_FG_GRN		"\x1b[32m"
787 #define PRT_FG_YEL		"\x1b[33m"
788 #define PRT_FG_BLU		"\x1b[34m"
789 #define PRT_FG_PUR		"\x1b[35m"
790 #define PRT_FG_CYN		"\x1b[36m"
791 #define PRT_FG_GRY		"\x1b[37m"
792 #define PRT_BG_BLK		"\x1b[40m"
793 #define PRT_BG_RED		"\x1b[41m"
794 #define PRT_BG_GRN		"\x1b[42m"
795 #define PRT_BG_YEL		"\x1b[43m"
796 #define PRT_BG_BLU		"\x1b[44m"
797 #define PRT_BG_PUR		"\x1b[45m"
798 #define PRT_BG_CYN		"\x1b[46m"
799 #define PRT_BG_GRY		"\x1b[47m"
800 /*--------------------------------------------------------------------------*/
801 inline void
tcprb_printfrags(struct _tcprb_t * rb)802 tcprb_printfrags(struct _tcprb_t *rb)
803 {
804 	struct _tcpfrag_t *walk;
805 
806 	printf("frags: ");
807 	TAILQ_FOREACH(walk, &rb->frags, link) {
808 		printf("[%lu - %lu]:'", walk->head, walk->tail - 1);
809 #if 1
810 		if (walk->empty)
811 			printf("EMPTY");
812 		else {
813 			loff_t i;
814 			for (i = walk->head; i < walk->tail; i++) {
815 				tcpbufseg_t *bufseg;
816 				boff_t j = loff2boff(rb, i);
817 				bufseg = buf_getbuf(rb, j);
818 				char c = bufseg->buf[j % UNITBUFSIZE];
819 				if (isgraph(c))
820 					printf("%c", c);
821 				else {
822 					printf(PRT_FG_BLU);
823 					if (c == ' ')
824 						printf(" ");
825 					else if (c == '\0')
826 						printf("0");
827 					else if (c == '\r')
828 						printf("R");
829 					else if (c == '\n')
830 						printf("N");
831 					else if (c == '\t')
832 						printf("T");
833 					else
834 						printf("?");
835 					printf(PRT_CL_RST);
836 				}
837 			}
838 		}
839 #endif
840 		printf("' ");
841 	}
842 	printf("\n");
843 }
844 inline void
tcprb_printbufsegs(tcprb_t * rb)845 tcprb_printbufsegs(tcprb_t *rb)
846 {
847 	struct _tcpbufseg_t *walk;
848 
849 	printf("bufsegs: ");
850 	TAILQ_FOREACH(walk, &rb->bufsegs, link) {
851 		printf("[%d]:'", walk->id);
852 #if 1
853 		int j;
854 		for (j = 0; j < UNITBUFSIZE; j++) {
855 			char c = walk->buf[j];
856 			if (isgraph(c))
857 				printf("%c", c);
858 			else {
859 				printf(PRT_FG_BLU);
860 				if (c == ' ')
861 					printf(" ");
862 				else if (c == '\0')
863 					printf("0");
864 				else if (c == '\r')
865 					printf("R");
866 				else if (c == '\n')
867 					printf("N");
868 				else if (c == '\t')
869 					printf("T");
870 				else
871 					printf("?");
872 				printf(PRT_CL_RST);
873 			}
874 		}
875 #endif
876 		printf("' ");
877 	}
878 	printf("\n");
879 }
880 /*--------------------------------------------------------------------------*/
881 inline void
tcprb_printrb(struct _tcprb_t * rb)882 tcprb_printrb(struct _tcprb_t *rb)
883 {
884 	printf("  rb: len: %d, meta: %d, "
885 			"(head: %ld <= pile: %ld <= tail: %ld)\n  ",
886 			rb->len, rb->metalen, rb->head, rb->pile, rb->head + rb->metalen);
887 	tcprb_printfrags(rb);
888 	tcprb_printbufsegs(rb);
889 	printf("-------------------------------------------------\n");
890 }
891 /*--------------------------------------------------------------------------*/
892 inline void
tcp_rb_overlapchk(mtcp_manager_t mtcp,struct pkt_ctx * pctx,struct tcp_stream * recvside_stream)893 tcp_rb_overlapchk(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
894 		    struct tcp_stream *recvside_stream)
895 {
896 #define DOESOVERLAP(a1, a2, b1, b2)				\
897 	((a1 != b2) && (a2 != b1) && ((a1 > b2) != (a2 > b1)))
898 
899 	/* Check whether this packet is retransmitted or not. */
900 	tcprb_t *rb;
901 	struct socket_map *walk;
902 	if (pctx->p.payloadlen > 0 && recvside_stream->rcvvar != NULL
903 	    && (rb = recvside_stream->rcvvar->rcvbuf) != NULL) {
904 		struct _tcpfrag_t *f;
905 		loff_t off = seq2loff(rb, pctx->p.seq, recvside_stream->rcvvar->irs + 1);
906 		TAILQ_FOREACH(f, &rb->frags, link)
907 			if (DOESOVERLAP(f->head, f->tail, off, off + pctx->p.payloadlen)) {
908 				/*
909 				 * call it immediately (before pkt payload is attached
910 				 * to the tcp ring buffer)
911 				 */
912 				if (recvside_stream->side == MOS_SIDE_CLI) {
913 					SOCKQ_FOREACH_REVERSE(walk, &recvside_stream->msocks) {
914 						HandleCallback(mtcp, MOS_HK_RCV, walk,
915 							       recvside_stream->side,
916 							       pctx, MOS_ON_REXMIT);
917 					} SOCKQ_FOREACH_END;
918 				} else { /* recvside_stream->side == MOS_SIDE_SVR */
919 					SOCKQ_FOREACH_START(walk, &recvside_stream->msocks) {
920 						HandleCallback(mtcp, MOS_HK_RCV, walk,
921 							       recvside_stream->side,
922 							       pctx, MOS_ON_REXMIT);
923 					} SOCKQ_FOREACH_END;
924 				}
925 				/* recvside_stream->cb_events |= MOS_ON_REXMIT; */
926 				TRACE_DBG("RETX!\n");
927 				break;
928 			}
929 	}
930 }
931 /*--------------------------------------------------------------------------*/
932