1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdint.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <errno.h>
8 #include <unistd.h>
9 #include <sys/uio.h>
10 #include <ctype.h>
11 #include "debug.h"
12 #include "tcp_rb.h"
13
14 #define FREE(x) do { free(x); x = NULL; } while (0)
15 #ifndef MIN
16 #define MIN(a, b) ((a) < (b) ? (a) : (b))
17 #endif
18 #ifndef MAX
19 #define MAX(a, b) ((a) < (b) ? (b) : (a))
20 #endif
21
22 #define PRINTF(f, args...) printf("%s:%d: " f, __FILE__, __LINE__, ##args)
23
24 /* -------------------------------------------------------------------------- */
25 /* Private */
26 /* -------------------------------------------------------------------------- */
27
28 static inline boff_t
loff2boff(tcprb_t * rb,loff_t loff)29 loff2boff(tcprb_t *rb, loff_t loff)
30 {
31 int woff = loff - rb->head; /* window offset */
32 if (woff < 0 || woff > rb->len)
33 return -1;
34
35 return (boff_t)((loff + rb->corr) % rb->len);
36 }
37 /*--------------------------------------------------------------------------*/
38 static inline tcpfrag_t *
frags_new(void)39 frags_new(void)
40 {
41 return (tcpfrag_t *)calloc(sizeof(tcpfrag_t), 1);
42 }
43 /*--------------------------------------------------------------------------*/
44 static inline void
frags_del(tcpfrag_t * f)45 frags_del(tcpfrag_t *f)
46 {
47 free(f);
48 }
49 /*--------------------------------------------------------------------------*/
50 static inline void
frags_insert(tcprb_t * rb,tcpfrag_t * f)51 frags_insert(tcprb_t *rb, tcpfrag_t *f)
52 {
53 struct _tcpfrag_t *walk;
54
55 TAILQ_FOREACH(walk, &rb->frags, link)
56 if (walk->head > f->head) {
57 TAILQ_INSERT_BEFORE(walk, f, link);
58 break;
59 }
60
61 if (!walk)
62 TAILQ_INSERT_TAIL(&rb->frags, f, link);
63 }
64 /*--------------------------------------------------------------------------*/
65 static inline tcpbufseg_t *
bufseg_new(mem_pool_t mp)66 bufseg_new(mem_pool_t mp)
67 {
68 //return (tcpbufseg_t *)malloc(sizeof(tcpbufseg_t));
69 return (tcpbufseg_t *)MPAllocateChunk(mp);
70 }
71 /*--------------------------------------------------------------------------*/
72 static inline void
bufseg_del(mem_pool_t mp,tcpbufseg_t * b)73 bufseg_del(mem_pool_t mp, tcpbufseg_t *b)
74 {
75 //free(b);
76 MPFreeChunk(mp, b);
77 }
78 /*--------------------------------------------------------------------------*/
79 /* For on-demand buffser segment allocation, the buffer segment queue must
80 * be always traversed in directin of from head to tail */
81 static inline tcpbufseg_t *
buf_first(tcprb_t * rb)82 buf_first(tcprb_t *rb)
83 {
84 tcpbufseg_t *tmp;
85
86 if ((tmp = TAILQ_FIRST(&rb->bufsegs)))
87 return tmp;
88
89 if ((rb->lbufsegs == 0) || ((tmp = bufseg_new(rb->mp)) == NULL))
90 return NULL;
91
92 tmp->id = 0;
93 TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
94
95 return tmp;
96 }
97 /*--------------------------------------------------------------------------*/
98 static inline tcpbufseg_t *
buf_next(tcprb_t * rb,tcpbufseg_t * buf)99 buf_next(tcprb_t *rb, tcpbufseg_t *buf)
100 {
101 tcpbufseg_t *tmp;
102
103 if ((tmp = TAILQ_NEXT(buf, link)))
104 return tmp;
105
106 if ((rb->lbufsegs <= buf->id + 1) || ((tmp = bufseg_new(rb->mp)) == NULL))
107 return NULL;
108
109 tmp->id = buf->id + 1;
110 TAILQ_INSERT_TAIL(&rb->bufsegs, tmp, link);
111
112 return tmp;
113 }
114 /*--------------------------------------------------------------------------*/
115 static inline tcpbufseg_t *
buf_getbuf(tcprb_t * rb,boff_t off)116 buf_getbuf(tcprb_t *rb, boff_t off)
117 {
118 tcpbufseg_t *buf;
119 int id = off / UNITBUFSIZE;
120 assert(id >= 0);
121
122 #if 0
123 int max = rb->len / UNITBUFSIZE;
124
125 if (max / 2 > id) {
126 buf = TAILQ_FIRST(&rb->bufsegs);
127 while (id--)
128 buf = TAILQ_NEXT(buf, link);
129 } else {
130 buf = TAILQ_LAST(&rb->bufsegs, blist);
131 while (max - ++id)
132 buf = TAILQ_PREV(buf, blist, link);
133 }
134 #else
135 buf = buf_first(rb);
136 while (id--)
137 buf = buf_next(rb, buf);
138 #endif
139
140 assert(buf);
141
142 return buf;
143 }
144 /*--------------------------------------------------------------------------*/
145 #define TAILQ_LOOP_NEXT(head, headname, elm, field) \
146 ((*(((struct headname *)((head)->tqh_last))->tqh_last)) == (elm) ? \
147 ((head)->tqh_first) : ((elm)->field.tqe_next))
148
149 static inline int
buf_try_resize(tcprb_t * rb,int len,loff_t data,int datalen)150 buf_try_resize(tcprb_t *rb, int len, loff_t data, int datalen)
151 {
152 /* FIXME: resizing is temporally disabled because of on-demand buffer
153 * allocation patch */
154 //return 0;
155
156 assert(rb);
157 assert(rb->len);
158 assert(len % UNITBUFSIZE == 0 || len < 2);
159
160 int segdiff = (len - rb->len) / UNITBUFSIZE;
161 if (segdiff == 0)
162 return 0;
163
164 boff_t head = loff2boff(rb, data);
165 boff_t tail = (data + datalen - 1) % rb->len;
166
167 tcpbufseg_t *headseg = buf_getbuf(rb, head);
168 tcpbufseg_t *tailseg = buf_getbuf(rb, tail);
169
170 if (segdiff > 0) {
171 /* Expand the buffer */
172 rb->len = len;
173 boff_t new_head = loff2boff(rb, data);
174 rb->corr = (rb->corr + (segdiff * UNITBUFSIZE) - (new_head - head))
175 % rb->len;
176 if (rb->corr < 0)
177 rb->corr += rb->len;
178
179 if (head > tail && headseg == tailseg) {
180 tcpbufseg_t *seg = bufseg_new(rb->mp);
181 assert(seg);
182 memcpy(&seg->buf[head % UNITBUFSIZE],
183 &headseg->buf[head % UNITBUFSIZE],
184 UNITBUFSIZE - (head % UNITBUFSIZE));
185 TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
186 headseg = seg;
187 segdiff--;
188 }
189 while (segdiff--) {
190 tcpbufseg_t *seg = bufseg_new(rb->mp);
191 assert(seg);
192 TAILQ_INSERT_AFTER(&rb->bufsegs, tailseg, seg, link);
193 }
194 } else /* if (segdiff < 0) */ {
195 /* Shrink the buffer */
196 tcpbufseg_t *seg;
197
198 segdiff *= -1;
199 int shrinkable = (rb->len - datalen) / UNITBUFSIZE;
200 int tobeshrank = segdiff;
201
202 rb->len -= (tobeshrank * UNITBUFSIZE);
203 if (rb->len) {
204 boff_t new_head = loff2boff(rb, data);
205 rb->corr = (rb->corr - (tobeshrank * UNITBUFSIZE) -
206 (new_head - head)) % rb->len;
207 if (rb->corr < 0)
208 rb->corr += rb->len;
209 }
210
211 if (shrinkable < segdiff) {
212 /* Mark some fragments as empty */
213 loff_t eh = data + rb->len +
214 (UNITBUFSIZE - (loff2boff(rb, data) % UNITBUFSIZE));
215 //loff_t et = eh + (segdiff - shrinkable) * UNITBUFSIZE;
216 loff_t et = data + datalen;
217
218 struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *new;
219
220 TAILQ_FOREACH(f, &rb->frags, link) {
221 if (f->tail <= eh)
222 continue;
223
224 if (f->tail > et) {
225 new = frags_new();
226 new->head = et;
227 new->tail = f->tail;
228 TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
229
230 f->tail = et;
231 }
232
233 if (f->head >= eh && f->tail <= et) {
234 f->empty = true;
235 continue;
236 }
237
238 if (f->head < eh) {
239 new = frags_new();
240 new->head = eh;
241 new->tail = f->tail;
242 TAILQ_INSERT_AFTER(&rb->frags, f, new, link);
243
244 f->tail = eh;
245 continue;
246 }
247 }
248
249 /* shrink the buffer */
250 int skip = rb->len / UNITBUFSIZE;
251 for (seg = headseg; seg; ) {
252 if (skip-- <= 0) {
253 TAILQ_REMOVE(&rb->bufsegs, seg, link);
254 bufseg_del(rb->mp, seg);
255 }
256 seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link);
257 if (seg == headseg)
258 break;
259 }
260 } else {
261 while (tobeshrank &&
262 (seg = TAILQ_LOOP_NEXT(&rb->bufsegs, blist, tailseg, link))
263 != headseg) {
264 TAILQ_REMOVE(&rb->bufsegs, seg, link);
265 bufseg_del(rb->mp, seg);
266 tobeshrank--;
267 }
268 if (tobeshrank) {
269 assert(tobeshrank == 1);
270 assert((tail % UNITBUFSIZE) < (head % UNITBUFSIZE));
271 memcpy(&tailseg->buf[head % UNITBUFSIZE],
272 &headseg->buf[head % UNITBUFSIZE],
273 UNITBUFSIZE - (head % UNITBUFSIZE));
274 TAILQ_REMOVE(&rb->bufsegs, headseg, link);
275 bufseg_del(rb->mp, headseg);
276 headseg = tailseg;
277 tobeshrank = 0;
278 }
279 }
280 }
281
282 return 0;
283 }
284 /*--------------------------------------------------------------------------*/
285 /* buf_read() and buf_write() are highly symmetic, so use macro for function
286 * definition to ease code maintenance. */
287
288 /* TODO: We do not have tailing anymore. simplify these functions */
289
290 #define MEMCPY_FOR_read(a, b, len) memcpy(a, b, len)
291 #define MEMCPY_FOR_write(a, b, len) memcpy(b, a, len)
292
293 #define FUNCDEF_BUF_RW(rw) \
294 static inline void \
295 buf_##rw(tcprb_t *rb, uint8_t *buf, int len, loff_t off) \
296 { \
297 tcpbufseg_t *bufseg = NULL; \
298 \
299 assert(rb); \
300 \
301 boff_t from = loff2boff(rb, off); \
302 boff_t to = loff2boff(rb, off + len); \
303 tcpbufseg_t *bufseg_from = buf_getbuf(rb, from); \
304 tcpbufseg_t *bufseg_to = buf_getbuf(rb, to); \
305 \
306 if (from > to) { \
307 off = UNITBUFSIZE - (from % UNITBUFSIZE); \
308 MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
309 for (bufseg = buf_next(rb, bufseg_from); \
310 bufseg && (bufseg != bufseg_to); \
311 bufseg = buf_next(rb, bufseg)) { \
312 MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
313 off += UNITBUFSIZE; \
314 } \
315 for (bufseg = buf_first(rb); \
316 bufseg && (bufseg != bufseg_to); \
317 bufseg = buf_next(rb, bufseg)) { \
318 MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
319 off += UNITBUFSIZE; \
320 } \
321 MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
322 } else if (bufseg_from == bufseg_to) { \
323 MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], len); \
324 } else { \
325 off = UNITBUFSIZE - (from % UNITBUFSIZE); \
326 MEMCPY_FOR_##rw(&buf[0], &bufseg_from->buf[from % UNITBUFSIZE], off); \
327 for (bufseg = buf_next(rb, bufseg_from); \
328 bufseg && (bufseg != bufseg_to); \
329 bufseg = buf_next(rb, bufseg)) { \
330 MEMCPY_FOR_##rw(&buf[off], &bufseg->buf[0], UNITBUFSIZE); \
331 off += UNITBUFSIZE; \
332 } \
333 MEMCPY_FOR_##rw(&buf[off], &bufseg_to->buf[0], to % UNITBUFSIZE); \
334 } \
335 }
336
337 FUNCDEF_BUF_RW(read)
FUNCDEF_BUF_RW(write)338 FUNCDEF_BUF_RW(write)
339 /* -------------------------------------------------------------------------- */
340 /* Public */
341 /* -------------------------------------------------------------------------- */
342
343 inline loff_t
344 seq2loff(tcprb_t *rb, uint32_t seq, uint32_t isn)
345 {
346 loff_t off = seq - isn;
347
348 while (off < rb->head)
349 off += 0x100000000;
350
351 return off;
352 }
353 /*--------------------------------------------------------------------------*/
354 inline tcprb_t *
tcprb_new(mem_pool_t mp,int len,unsigned buf_mgmt)355 tcprb_new(mem_pool_t mp, int len, unsigned buf_mgmt)
356 {
357 tcprb_t *rb;
358
359 if (len % UNITBUFSIZE || len < 2)
360 return NULL;
361
362 if (!(rb = calloc(sizeof(tcprb_t), 1)))
363 return NULL;
364
365 TAILQ_INIT(&rb->bufsegs);
366 rb->lbufsegs = ((len - 1) / UNITBUFSIZE) + 1;
367
368 #if 0
369 int i;
370 for (i = 0; i < rb->lbufsegs; i++) {
371 tcpbufseg_t *bufseg = bufseg_new(mp);
372 if (!bufseg) {
373 TAILQ_FOREACH(bufseg, &rb->bufsegs, link)
374 bufseg_del(mp, bufseg);
375 FREE(rb);
376 return NULL;
377 }
378 TAILQ_INSERT_TAIL(&rb->bufsegs, bufseg, link);
379 }
380 #endif
381
382 rb->buf_mgmt = buf_mgmt;
383 rb->mp = mp;
384 rb->len = rb->metalen = len;
385 TAILQ_INIT(&rb->frags);
386
387 return rb;
388 }
389 /*--------------------------------------------------------------------------*/
390 inline int
tcprb_del(tcprb_t * rb)391 tcprb_del(tcprb_t *rb)
392 {
393 struct _tcpbufseg_t *bwalk, *bnext;
394 struct _tcpfrag_t *fwalk, *fnext;
395
396 for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
397 bnext = TAILQ_NEXT(bwalk, link);
398 bufseg_del(rb->mp, bwalk);
399 }
400
401 for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
402 fnext = TAILQ_NEXT(fwalk, link);
403 frags_del(fwalk);
404 }
405
406 FREE(rb);
407
408 return 0;
409 }
410 /*--------------------------------------------------------------------------*/
411 inline int
tcprb_setpile(tcprb_t * rb,loff_t new)412 tcprb_setpile(tcprb_t *rb, loff_t new)
413 {
414 if (!rb || new > (rb->head + rb->len) || new < rb->head)
415 return -1;
416
417 tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
418
419 if (!cf || (cf->head != rb->head)) {
420 /* No contiguous buffer seg. */
421 assert(rb->pile == rb->head);
422 return -1;
423 }
424
425 if (new > cf->tail)
426 return -1;
427
428 rb->pile = new;
429
430 return 0;
431 }
432 /*--------------------------------------------------------------------------*/
433 inline int
tcprb_cflen(tcprb_t * rb)434 tcprb_cflen(tcprb_t *rb)
435 {
436 tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
437
438 if (!cf || (cf->head != rb->head))
439 /* No contiguous buffer seg. to taverse */
440 return 0;
441
442 int cflen = cf->tail - rb->pile; /* length of cf */
443
444 assert(cflen >= 0);
445
446 return cflen;
447 }
448 /*--------------------------------------------------------------------------*/
449 inline int
tcprb_ffhead(tcprb_t * rb,int len)450 tcprb_ffhead(tcprb_t *rb, int len)
451 {
452 if (!rb || len < 0)
453 return 0;
454 else if (len == 0)
455 return 0;
456
457 tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
458
459 if (!cf || (cf->head != rb->head))
460 /* No contiguous buffer seg. to taverse */
461 return 0;
462
463 int cflen = cf->tail - cf->head; /* length of cf */
464 assert(cflen > 0);
465
466 int ff = MIN(len, cflen);
467 ff = MIN(ff, rb->pile - rb->head); /* head cannot go further than pile */
468
469 if (cflen == ff) {
470 /* Fast forward entire contiguous segment */
471 TAILQ_REMOVE(&rb->frags, cf, link);
472 frags_del(cf);
473 } else {
474 cf->head += ff;
475 }
476
477 rb->head += ff;
478
479 return ff;
480 }
481 /*--------------------------------------------------------------------------*/
482 /* return the number of bytes that would be fast forwarded */
483 inline int
tcprb_fflen(tcprb_t * rb,uint8_t * buf,int len,loff_t off)484 tcprb_fflen(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
485 {
486 if (!rb || !buf || len < 0 ||
487 off < rb->head || off >= rb->pile + rb->metalen)
488 return -1;
489 /* nothing to write or already written */
490 else if (len == 0 || off + len < rb->pile)
491 return 0;
492
493 int ff = (off + len) - (rb->head + MIN(rb->len, rb->metalen));
494 if (ff <= 0)
495 return 0;
496
497 tcpfrag_t *cf = TAILQ_FIRST(&rb->frags); /* contiguous buffer seg. */
498
499 if (!cf || (cf->head != rb->head))
500 /* No contiguous buffer seg. to traverse */
501 return 0;
502
503 int cflen = cf->tail - cf->head; /* length of cf */
504 assert(cflen > 0);
505
506 int fflen = MIN(len, cflen);
507 fflen = MIN(ff, rb->pile - rb->head); /* head cannot go further than pile */
508
509 return fflen;
510 }
511 /*--------------------------------------------------------------------------*/
512 static inline int
tcprb_get_datalen(tcprb_t * rb)513 tcprb_get_datalen(tcprb_t *rb)
514 {
515 tcpfrag_t *lastfrag = TAILQ_LAST(&rb->frags, flist);
516 return lastfrag ? (int)(lastfrag->tail - rb->head) : 0;
517 }
518 /*--------------------------------------------------------------------------*/
519 inline int
tcprb_resize_meta(tcprb_t * rb,int len)520 tcprb_resize_meta(tcprb_t *rb, int len)
521 {
522 #ifdef DISABLE_DYN_RESIZE
523 assert(len == 0);
524
525 struct _tcpfrag_t *fwalk, *fnext;
526
527 rb->metalen = 0;
528
529 for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
530 fnext = TAILQ_NEXT(fwalk, link);
531 TAILQ_REMOVE(&rb->frags, fwalk, link);
532 frags_del(fwalk);
533 }
534
535 return 0;
536 #else
537 int diff, ff, datalen;
538
539 if ((diff = len - rb->metalen) > 0) {
540 rb->metalen = len;
541 } else if (diff < 0) {
542 ff = diff - (rb->len - tcprb_get_datalen(rb));
543 tcprb_ffhead(rb, ff);
544 datalen = tcprb_get_datalen(rb);
545 rb->metalen = MAX(datalen, len);
546 }
547
548 return rb->metalen;
549 #endif
550 }
551 /*--------------------------------------------------------------------------*/
552 inline int
tcprb_setpolicy(tcprb_t * rb,uint8_t policy)553 tcprb_setpolicy(tcprb_t *rb, uint8_t policy)
554 {
555 if (policy >= MOS_OVERLAP_CNT)
556 return -1;
557
558 rb->overlap = policy;
559 return 0;
560 }
561 /*--------------------------------------------------------------------------*/
562 inline int
tcprb_resize(tcprb_t * rb,int len)563 tcprb_resize(tcprb_t *rb, int len)
564 {
565 #ifdef DISABLE_DYN_RESIZE
566 assert(len == 0);
567
568 struct _tcpbufseg_t *bwalk, *bnext;
569 struct _tcpfrag_t *fwalk, *fnext;
570
571 rb->metalen = 0;
572 rb->len = 0;
573
574 for (bwalk = TAILQ_FIRST(&rb->bufsegs); bwalk; bwalk = bnext) {
575 bnext = TAILQ_NEXT(bwalk, link);
576 TAILQ_REMOVE(&rb->bufsegs, bwalk, link);
577 bufseg_del(rb->mp, bwalk);
578 }
579
580 for (fwalk = TAILQ_FIRST(&rb->frags); fwalk; fwalk = fnext) {
581 fnext = TAILQ_NEXT(fwalk, link);
582 TAILQ_REMOVE(&rb->frags, fwalk, link);
583 frags_del(fwalk);
584 }
585
586 return 0;
587 #else
588 int diff, ff;
589
590 if (len % UNITBUFSIZE)
591 return -1;
592 else if (len == rb->len)
593 return 0;
594
595 if ((diff = rb->len - len) > 0 && /* shrinking */
596 (ff = diff - (rb->len - tcprb_get_datalen(rb))))
597 tcprb_ffhead(rb, ff);
598
599 return buf_try_resize(rb, len, rb->head,
600 tcprb_get_datalen(rb));
601 #endif
602 }
603 /*--------------------------------------------------------------------------*/
604 inline int
tcprb_ppeek(tcprb_t * rb,uint8_t * buf,int len,loff_t off)605 tcprb_ppeek(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
606 {
607 struct _tcpfrag_t *f;
608
609 if (!rb || rb->buf_mgmt != BUFMGMT_FULL || !buf || len < 0)
610 return -1;
611 else if (len == 0)
612 return 0;
613
614 TAILQ_FOREACH(f, &rb->frags, link)
615 if (off >= f->head && off < f->tail) {
616 if (f->empty)
617 f = NULL;
618 break;
619 }
620
621 if (!f) /* No proper fragment found */
622 return -1;
623
624 int plen = MIN(len, f->tail - off);
625
626 buf_read(rb, buf, plen, off);
627
628 return plen;
629 }
630 /*--------------------------------------------------------------------------*/
631 inline int
tcprb_pwrite(tcprb_t * rb,uint8_t * buf,int len,loff_t off)632 tcprb_pwrite(tcprb_t *rb, uint8_t *buf, int len, loff_t off)
633 {
634 int ff, olen;
635 loff_t efhead = -1; /* head of empty fragment */
636 int eflen = 0; /* length of empty fragment */
637
638 /*
639 * we don't raise MOS_ON_ERROR anymore here,
640 * (out of receive buffer case is handled at ProcessTCPPayload())
641 */
642 if (!rb || !buf || len < 0 ||
643 off < rb->head || off >= rb->pile + rb->metalen)
644 return -1;
645 else if (len == 0)
646 return 0;
647 else if (off + len < rb->pile) /* already written */
648 return len;
649
650 /* check whether we should move its head offset (fast forward) */
651 olen = len;
652 if ((ff = (off + len) - (rb->head + MIN(rb->len, rb->metalen))) > 0)
653 len -= ff - tcprb_ffhead(rb, ff);
654 if (rb->metalen > rb->len)
655 eflen = MIN(olen - len, rb->metalen - rb->len);
656 if (eflen)
657 efhead = off + len;
658
659 /* write data */
660 struct _tcpfrag_t *f = TAILQ_FIRST(&rb->frags), *fnext;
661 int uoff = 0; /* offset of `buf` */
662
663 /* head: position of first byte of the fragment
664 * tail: head + length of the fragment */
665 while (uoff < len) {
666 bool skip = false;
667 int wrlen = 0; /* length to be written */
668
669 while (f) {
670 struct _tcpfrag_t *ef, *nef;
671 fnext = TAILQ_NEXT(f, link);
672
673 if (f->empty) {
674 /* skip empty fragment */
675 f = fnext;
676 continue;
677 }
678
679 if (f->head <= off + uoff) {
680 if (f->tail > off + uoff) {
681 skip = true;
682 wrlen = f->tail - (off + uoff);
683 break;
684 } else if (f->tail == off + uoff) {
685 skip = false;
686
687 /* shrink empty fragment */
688 for (ef = fnext;
689 ef && ef->empty && ef->head < f->tail + len - uoff;
690 ef = nef) {
691 nef = TAILQ_NEXT(ef, link);
692
693 if (ef->tail <= f->tail + len - uoff) {
694 TAILQ_REMOVE(&rb->frags, ef, link);
695 } else {
696 ef->head = f->tail + len - uoff;
697 /* break is not necessary, but for early escape */
698 break;
699 }
700 }
701 fnext = TAILQ_NEXT(f, link);
702
703 wrlen = fnext ? MIN(fnext->head - (off + uoff), len - uoff)
704 : len - uoff;
705 f->tail += wrlen;
706 if (fnext && (f->tail == fnext->head)) {
707 /* merge 'f' and 'fnext' */
708 f->tail = fnext->tail;
709 TAILQ_REMOVE(&rb->frags, fnext, link);
710 frags_del(fnext);
711 }
712 break;
713 }
714 } else if (f->head <= off + len) {
715 skip = false;
716 wrlen = MIN(f->head - (off + uoff), len - uoff);
717 f->head -= wrlen;
718
719 /* shrink empty fragment */
720 for (ef = TAILQ_PREV(f, flist, link);
721 ef && ef->empty && ef->tail < f->head;
722 ef = nef) {
723 nef = TAILQ_PREV(ef, flist, link);
724
725 if (ef->head <= f->head) {
726 TAILQ_REMOVE(&rb->frags, ef, link);
727 } else {
728 ef->tail = f->head;
729 /* break is not necessary, but for early escape */
730 break;
731 }
732 }
733
734 break;
735 } else /* if (f->head > off + len) */ {
736 /* early escape */
737 f = NULL;
738 break;
739 }
740
741 f = fnext;
742 }
743
744 if (!f) {
745 struct _tcpfrag_t *new;
746
747 /* create new fragment and insert it into the list */
748 new = frags_new();
749
750 new->head = off;
751 new->tail = off + len;
752 wrlen = len;
753
754 frags_insert(rb, new);
755 }
756
757 /* copy data */
758 if ((rb->overlap == MOS_OVERLAP_POLICY_LAST || !skip)
759 && rb->buf_mgmt)
760 buf_write(rb, &buf[uoff], wrlen, off + uoff);
761 uoff += wrlen;
762 }
763
764 /* Insert empty fragment if necessary */
765 if (eflen) {
766 assert(rb->metalen > rb->len);
767
768 struct _tcpfrag_t *new;
769
770 /* create new fragment and insert it into the list */
771 new = frags_new();
772
773 new->empty = true;
774 new->head = efhead;
775 new->tail = efhead + eflen;
776
777 frags_insert(rb, new);
778 }
779
780 return len;
781 }
782 /*--------------------------------------------------------------------------*/
783 #define PRT_CL_RST "\x1b[0m"
784 #define PRT_FG_BLK "\x1b[30m"
785 #define PRT_FG_RED "\x1b[31m"
786 #define PRT_FG_GRN "\x1b[32m"
787 #define PRT_FG_YEL "\x1b[33m"
788 #define PRT_FG_BLU "\x1b[34m"
789 #define PRT_FG_PUR "\x1b[35m"
790 #define PRT_FG_CYN "\x1b[36m"
791 #define PRT_FG_GRY "\x1b[37m"
792 #define PRT_BG_BLK "\x1b[40m"
793 #define PRT_BG_RED "\x1b[41m"
794 #define PRT_BG_GRN "\x1b[42m"
795 #define PRT_BG_YEL "\x1b[43m"
796 #define PRT_BG_BLU "\x1b[44m"
797 #define PRT_BG_PUR "\x1b[45m"
798 #define PRT_BG_CYN "\x1b[46m"
799 #define PRT_BG_GRY "\x1b[47m"
800 /*--------------------------------------------------------------------------*/
801 inline void
tcprb_printfrags(struct _tcprb_t * rb)802 tcprb_printfrags(struct _tcprb_t *rb)
803 {
804 struct _tcpfrag_t *walk;
805
806 printf("frags: ");
807 TAILQ_FOREACH(walk, &rb->frags, link) {
808 printf("[%lu - %lu]:'", walk->head, walk->tail - 1);
809 #if 1
810 if (walk->empty)
811 printf("EMPTY");
812 else {
813 loff_t i;
814 for (i = walk->head; i < walk->tail; i++) {
815 tcpbufseg_t *bufseg;
816 boff_t j = loff2boff(rb, i);
817 bufseg = buf_getbuf(rb, j);
818 char c = bufseg->buf[j % UNITBUFSIZE];
819 if (isgraph(c))
820 printf("%c", c);
821 else {
822 printf(PRT_FG_BLU);
823 if (c == ' ')
824 printf(" ");
825 else if (c == '\0')
826 printf("0");
827 else if (c == '\r')
828 printf("R");
829 else if (c == '\n')
830 printf("N");
831 else if (c == '\t')
832 printf("T");
833 else
834 printf("?");
835 printf(PRT_CL_RST);
836 }
837 }
838 }
839 #endif
840 printf("' ");
841 }
842 printf("\n");
843 }
844 inline void
tcprb_printbufsegs(tcprb_t * rb)845 tcprb_printbufsegs(tcprb_t *rb)
846 {
847 struct _tcpbufseg_t *walk;
848
849 printf("bufsegs: ");
850 TAILQ_FOREACH(walk, &rb->bufsegs, link) {
851 printf("[%d]:'", walk->id);
852 #if 1
853 int j;
854 for (j = 0; j < UNITBUFSIZE; j++) {
855 char c = walk->buf[j];
856 if (isgraph(c))
857 printf("%c", c);
858 else {
859 printf(PRT_FG_BLU);
860 if (c == ' ')
861 printf(" ");
862 else if (c == '\0')
863 printf("0");
864 else if (c == '\r')
865 printf("R");
866 else if (c == '\n')
867 printf("N");
868 else if (c == '\t')
869 printf("T");
870 else
871 printf("?");
872 printf(PRT_CL_RST);
873 }
874 }
875 #endif
876 printf("' ");
877 }
878 printf("\n");
879 }
880 /*--------------------------------------------------------------------------*/
881 inline void
tcprb_printrb(struct _tcprb_t * rb)882 tcprb_printrb(struct _tcprb_t *rb)
883 {
884 printf(" rb: len: %d, meta: %d, "
885 "(head: %ld <= pile: %ld <= tail: %ld)\n ",
886 rb->len, rb->metalen, rb->head, rb->pile, rb->head + rb->metalen);
887 tcprb_printfrags(rb);
888 tcprb_printbufsegs(rb);
889 printf("-------------------------------------------------\n");
890 }
891 /*--------------------------------------------------------------------------*/
892 inline void
tcp_rb_overlapchk(mtcp_manager_t mtcp,struct pkt_ctx * pctx,struct tcp_stream * recvside_stream)893 tcp_rb_overlapchk(mtcp_manager_t mtcp, struct pkt_ctx *pctx,
894 struct tcp_stream *recvside_stream)
895 {
896 #define DOESOVERLAP(a1, a2, b1, b2) \
897 ((a1 != b2) && (a2 != b1) && ((a1 > b2) != (a2 > b1)))
898
899 /* Check whether this packet is retransmitted or not. */
900 tcprb_t *rb;
901 struct socket_map *walk;
902 if (pctx->p.payloadlen > 0 && recvside_stream->rcvvar != NULL
903 && (rb = recvside_stream->rcvvar->rcvbuf) != NULL) {
904 struct _tcpfrag_t *f;
905 loff_t off = seq2loff(rb, pctx->p.seq, recvside_stream->rcvvar->irs + 1);
906 TAILQ_FOREACH(f, &rb->frags, link)
907 if (DOESOVERLAP(f->head, f->tail, off, off + pctx->p.payloadlen)) {
908 /*
909 * call it immediately (before pkt payload is attached
910 * to the tcp ring buffer)
911 */
912 if (recvside_stream->side == MOS_SIDE_CLI) {
913 SOCKQ_FOREACH_REVERSE(walk, &recvside_stream->msocks) {
914 HandleCallback(mtcp, MOS_HK_RCV, walk,
915 recvside_stream->side,
916 pctx, MOS_ON_REXMIT);
917 } SOCKQ_FOREACH_END;
918 } else { /* recvside_stream->side == MOS_SIDE_SVR */
919 SOCKQ_FOREACH_START(walk, &recvside_stream->msocks) {
920 HandleCallback(mtcp, MOS_HK_RCV, walk,
921 recvside_stream->side,
922 pctx, MOS_ON_REXMIT);
923 } SOCKQ_FOREACH_END;
924 }
925 /* recvside_stream->cb_events |= MOS_ON_REXMIT; */
926 TRACE_DBG("RETX!\n");
927 break;
928 }
929 }
930 }
931 /*--------------------------------------------------------------------------*/
932