1 #ifdef NEWEV
2
3 /* ----------------------------------------------------------------------------
4 * Please see 4 Jul. 2015 meeting slides for conceptual explanation.
5 * 'Subtree' has special meaning and its 'ID' is very important concept for
6 * understanding this code.
7 *
8 * Please contact Donghwi Kim for further questions,
9 * via e-mail: [email protected]
10 */
11 #include "debug.h"
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tree.h>
16 #include <key_value_store.h>
17 #include "scalable_event.h"
18
19 #include "mtcp.h"
20 #include "mos_api.h"
21 #include "mtcp_api.h"
22
23 #define NEWHASH
24 #ifdef NEWHASH
25 #include "mtcp_util.h"
26 #define XXHSEED 18446744073709551557ULL
27 #endif
28 /*----------------------------------------------------------------------------*/
29 #define POWER_OF_TWO(x) (!((x - 1) & (x)))
30
31 #define MAX_EVENTS 30240
32 #define KVS_BUCKETS 1024
33 #define KVS_ENTRIES 102400
34 #define MAX_DEPTH MAX_EVENTS
35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb))
36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV))
37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e))
38
39 struct _tree_node_t *g_evroot;
40 event_t g_evid;
41
42 static __thread stree_t *g_cur_stree = NULL;
43 static __thread tree_node_t *g_cur_ev = NULL;
44
45 static __thread struct {
46 struct _tree_node_t *stack[MAX_EVENTS];
47 struct _tree_node_t **sp;
48 } g_evstack;
49
50 #define free(x) do { \
51 assert(x); \
52 free(x); \
53 } while (0)
54
55 /*----------------------------------------------------------------------------*/
56 struct devent {
57 event_t ev; /* event id = key */
58 tree_node_t *ptr; /* ptr to the event node in the d-forest = value */
59 struct devent *next; /* next pointer */
60 };
61 #define MAX_BUCKET 2048
62 #define HASHVAL(x) ((x) & (MAX_BUCKET-1))
63 struct devent* g_dfTable[MAX_BUCKET] = {0}; /* d-forest hash table */
64 /*----------------------------------------------------------------------------*/
65 /* simple hash table implementation */
66 static bool
dforest_store(event_t ev,tree_node_t * ptr)67 dforest_store(event_t ev, tree_node_t* ptr)
68 {
69 struct devent *de, *p;
70 int idx;
71
72 if ((de = malloc(sizeof(*de))) == NULL) {
73 assert(0); /* should not happen! */
74 return false;
75 }
76 de->ev = ev;
77 de->ptr = ptr;
78 de->next = NULL;
79
80 idx = HASHVAL(ev);
81 if (g_dfTable[idx] == NULL) {
82 g_dfTable[idx] = de;
83 }
84 else {
85 p = g_dfTable[idx];
86 while (p->next)
87 p = p->next;
88 p->next = de;
89 }
90 return true;
91 }
92 /*----------------------------------------------------------------------------*/
93 static tree_node_t *
dforest_search(event_t ev)94 dforest_search(event_t ev)
95 {
96 int idx;
97 struct devent *p;
98
99 idx = HASHVAL(ev);
100 if (g_dfTable[idx] == NULL)
101 return NULL;
102
103 for (p = g_dfTable[idx];
104 p != NULL;
105 p = p->next) {
106 if (p->ev == ev)
107 return p->ptr;
108 }
109 return NULL;
110 }
111 /*----------------------------------------------------------------------------*/
112 /** Create a new tree node
113 * @return the new tree node
114 * */
115 inline tree_node_t *
tree_node_new()116 tree_node_new()
117 {
118 return calloc(1, sizeof(tree_node_t));
119 }
120 /*----------------------------------------------------------------------------*/
121 /** Delete a tree node */
122 inline void
tree_node_del(tree_node_t * node)123 tree_node_del(tree_node_t *node)
124 {
125 free(node);
126 }
127 /*----------------------------------------------------------------------------*/
128 /** Delete a tree node and all of its descendants
129 * @param [in] node: target node
130 * */
131 inline void
tree_del_recursive(tree_node_t * node)132 tree_del_recursive(tree_node_t *node)
133 {
134 struct _tree_node_t *walk = NULL, *del = NULL;
135 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
136
137 TREE_DFS_FOREACH(walk, node, &stack, link) {
138 if (del)
139 tree_node_del(del);
140 del = walk;
141 }
142 if (del)
143 tree_node_del(del);
144 }
145 /*----------------------------------------------------------------------------*/
146 /** Search a tree node by event ID
147 * @param [in] root: root node
148 * @param [in] ev: event ID
149 * @return target node
150 *
151 * The search is in DFS mannar
152 * */
153 inline tree_node_t *
tree_search(tree_node_t * root,event_t ev)154 tree_search(tree_node_t *root, event_t ev)
155 {
156 struct _tree_node_t *walk = NULL;
157 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
158
159 TREE_DFS_FOREACH(walk, root, &stack, link)
160 if (walk->ev == ev) {
161 return walk;
162 }
163 return NULL;
164 }
165 /*----------------------------------------------------------------------------*/
166 /** Create a new subtree
167 * @param [in] id: subtree ID
168 * @param [in] root: root of the subtree
169 * @return new subtree
170 *
171 * Allocate and initialize subtree structure, stree_t:
172 * 1. Allocate the structure.
173 * 2. Initialize reference counter and fill ID field.
174 * 3. Set 'root' pointer which points the root node of a tree.
175 * The tree should contain all required tree nodes.
176 * 4. Set built-in event tree node pointers.
177 * */
178 inline stree_t *
stree_create(uint64_t id,tree_node_t * root)179 stree_create(uint64_t id, tree_node_t *root)
180 {
181 tree_node_t *bev;
182 stree_t *nstree = malloc(sizeof(stree_t));
183 if (!nstree)
184 return NULL;
185
186 nstree->ref_cnt = 0;
187 nstree->id = id;
188 nstree->root = root;
189 memset(nstree->bevs, 0, sizeof(nstree->bevs));
190
191 for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) {
192 int idx = 0;
193 while (bev->ev >> (idx + 1))
194 idx++;
195 nstree->bevs[idx] = bev;
196 }
197
198 return nstree;
199 }
200 /*----------------------------------------------------------------------------*/
201 /** Destroy the subtree
202 * @param [in] stree: the target subtree */
203 inline void
stree_destroy(kvs_t * store,stree_t * stree)204 stree_destroy(kvs_t *store, stree_t *stree)
205 {
206 tree_del_recursive(stree->root);
207 free(stree);
208 }
209 /*----------------------------------------------------------------------------*/
210 /** Increase reference counter of the subtree
211 * @param [in] stree: the target subtree */
212 inline void
stree_inc_ref(stree_t * stree)213 stree_inc_ref(stree_t *stree)
214 {
215 stree->ref_cnt++;
216 }
217 /*----------------------------------------------------------------------------*/
218 /** Decrease reference counter of the subtree, and remove it from the key-
219 * value-store if needed
220 * @param [in] store: the key-value-store
221 * @param [in] stree: the target subtree
222 *
223 * In addition to decreasing the reference counter, remove the subtree from
224 * key-value-store if decreased reference counter is zero.
225 * */
226 inline void
stree_dec_ref(kvs_t * store,stree_t * stree)227 stree_dec_ref(kvs_t *store, stree_t *stree)
228 {
229 if (stree && !--stree->ref_cnt) {
230 kvs_remove(store, stree->id);
231 stree_destroy(store, stree);
232 }
233 }
234 /*----------------------------------------------------------------------------*/
235 /** Hash the event ID and callback pair
236 * @param [in] ev: the event ID
237 * @param [in] cb: the callback function pointer
238 * @return hash value
239 *
240 * TODO: Replace 32-bit Jenkins hash with better one.
241 * */
242 inline static uint64_t
hash64(event_t ev,callback_t cb)243 hash64(event_t ev, callback_t cb)
244 {
245 #ifdef NEWHASH
246 struct {
247 event_t ev;
248 callback_t cb;
249 } instance;
250
251 instance.ev = ev;
252 instance.cb = cb;
253
254 return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED);
255 #else
256 uint64_t hash = 0;
257 int i;
258
259 for (i = 0; i < sizeof(ev); ++i) {
260 hash += ((uint8_t *)&ev)[i];
261 hash += (hash << 10);
262 hash ^= (hash >> 6);
263 }
264 for (i = 0; i < sizeof(cb); ++i) {
265 hash += ((uint8_t *)&cb)[i];
266 hash += (hash << 10);
267 hash ^= (hash >> 6);
268 }
269 hash += (hash << 3);
270 hash ^= (hash >> 11);
271 hash += (hash << 15);
272
273 return hash;
274 #endif
275 }
276
277 /*-------------------------------------------------------------------------*/
278 /* Create a new tree that consists of a tree node (w) with a new
279 * callback and take a snapshot of the subtree from w all the way up to
280 * the root node by traversing the ancestors of w.
281 */
282 /*-------------------------------------------------------------------------*/
283 static tree_node_t*
create_spine(tree_node_t * w,callback_t cb)284 create_spine(tree_node_t* w, callback_t cb)
285 {
286 tree_node_t *ntn, *ntree = NULL;
287
288 if ((ntn = tree_node_new()) == NULL)
289 return NULL;
290
291 /* the leaf event node with callback function */
292 ntn->ft = w->ft;
293 ntn->cb = cb;
294 ntn->ev = w->ev;
295 ntn->arg = w->arg;
296 ntree = ntn; /* we start from the leaf */
297
298 do {
299 if (!(ntn = tree_node_new())) {
300 if (ntree)
301 tree_del_recursive(ntree);
302 return NULL;
303 }
304 ntn->ft = w->ft;
305 ntn->ev = w->ev;
306 ntn->arg = w->arg;
307
308 TREE_INSERT_CHILD(ntn, ntree, link);
309 if (!IS_FLOATING_EVENT(ntree))
310 TREE_INSERT_CHILD(ntn, ntree, invk);
311
312 /* ntn becomes the new tree */
313 ntree = ntn;
314 } while ((w = TREE_PARENT(w, link)));
315
316 return ntree;
317 }
318 #if 0
319 /*----------------------------------------------------------------------------*/
320 static stree_t *
321 clone_stree(stree_t * stree)
322 {
323 stree_t *newstree, *walk;
324 tree_node_t *newtree;
325 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
326 struct _tree_node_t *walk = NULL, *del = NULL;
327
328 /* clone the tree */
329 TREE_DFS_FOREACH(walk, stree->root, &stack, link) {
330 new_tree_node_new()
331 if (del)
332 tree_node_del(del);
333 del = walk;
334 }
335 if (del)
336 tree_node_del(del);
337
338
339 TREE_DFS_FOREACH(w, stree->root, &stack, link) {
340 tree_node_t *ntn, *ptn = NULL;
341 }
342 }
343 #endif
344 /*----------------------------------------------------------------------------*/
345 /** Register a callback
346 * @param [in] store: the key-value-store for subtrees
347 * @param [in, out] pstree: the target subtree pointer
348 * @param [in] ev: the target event
349 * @param [in] cb: the callback to be registered
350 * @return 0 for success, -1 for failure
351 *
352 * For callback registeration, there can be some cases.
353 * 1. A new tree what we want is already there, so we can use it by simply
354 * updating reference counters.
355 * 2. A new tree what we want is not there, so we are going to create a
356 * new subtree. The new subtree will not only have new callback, but also
357 * inherit previous subtree.
358 * 2.1. The reference counter of previous subtree is 1, so we can avoid
359 * new tree allocation by reusing tree nodes of previous subtree.
360 * 2.2. The reference counter of previous subtree is not 1, so we should
361 * copy previous subtree's tree nodes.
362 * 3. Previously, there was no subtree (in other words, this is the first
363 * callback registeration) so we are going to make a new subtree which
364 * has only one callback
365 * */
366 inline int
RegCb(kvs_t * store,stree_t ** pstree,event_t ev,callback_t cb)367 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
368 {
369 #ifdef DBGMSG
370 __PREPARE_DBGLOGGING();
371 #endif
372 TRACE_DBG("/>>>>>> start <<<<<<\\\n");
373 stree_t *stree, *nstree;
374 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
375 tree_node_t *w, *w2, *ntree;
376 uint64_t id = 0, nid;
377
378 if ((stree = *pstree)) {
379 /* XXX: tree_search() is heavy operation, but it is used for only error
380 * checking. Is there any way to avoid this?
381 * --> We can have a key-value-store for this, but is it too much? */
382 #if 0
383 if (tree_search(stree->root, ev))
384 /* Illegal trial of overwriting a previously registerd callback */
385 return -1;
386 #endif
387
388 id = stree->id;
389 }
390
391 /* calculate new subtree ID */
392 nid = NEWID(id, ev, cb);
393
394 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", nid, id, ev, cb);
395
396 if ((nstree = kvs_search(store, nid))) { /* case 1. */
397 TRACE_DBG("Case 1\n");
398 /* update reference counters */
399 if (stree)
400 stree_dec_ref(store, stree);
401
402 nstree->ref_cnt++;
403 *pstree = nstree;
404 #if 0
405 fprintf(stderr, "sizeof(tcp_stream)=%ld sizeof(tcp_send_vars)=%ld"
406 "sizeof(tcp_recv_vars)=%ld\n",
407 sizeof(tcp_stream),
408 sizeof(struct tcp_send_vars), sizeof(struct tcp_recv_vars));
409 #endif
410 return 0;
411 }
412
413 /* find the event node with ev and take a snapshot of the ancestors */
414 w = dforest_search(ev);
415 // w = tree_search(g_evroot, ev);
416 assert(w);
417 if ((ntree = create_spine(w, cb)) == NULL)
418 return (-1);
419
420 if (stree) { /* case 2. */
421 TRACE_DBG("Case 2\n");
422 tree_node_t *sptr;
423
424 /* attach flesh: Inherit previous subtree */
425 sptr = ntree;
426
427 #if 0
428 /* new code */
429 if (stree->ref_cnt > 1) {
430 /* take a snapshot of the entire forest */
431 curf = clone_tree(stree);
432 curf->root->ref_cnt = 1;
433 stree->ref_cnt--;
434 } else {
435 assert(stree->ref_cnt == 1);
436 curf = stree;
437 /* remove the current id from hash table */
438 kvs_remove(store, stree->id);
439 }
440
441 /* merge sptr into curf->root (existing tree) */
442 #endif
443
444 if (stree->ref_cnt == 1) {
445 /* case 2.1. */
446 TRACE_DBG("Case 2.1\n");
447 w = stree->root;
448 while (w) {
449 tree_node_t *next_walk = NULL, *next_walk2 = NULL,
450 *next_sptr = TREE_FIRSTBORN(sptr, link);
451
452 for (w2 = TREE_FIRSTBORN(w, link);
453 w2 != NULL;
454 w2 = next_walk2) {
455 next_walk2 = TREE_YOUNGER(w2, link);
456 if (sptr && w2->ev == next_sptr->ev) {
457 assert(next_walk == NULL);
458 next_walk = w2;
459 if (next_sptr->cb != next_walk->cb)
460 next_sptr->cb = next_walk->cb;
461 }
462 else {
463 TREE_DETACH(w2, link);
464 if (!IS_FLOATING_EVENT(w2))
465 TREE_DETACH(w2, invk);
466 if (next_walk) {
467 TREE_INSERT_CHILD(sptr, w2, link);
468 if (!IS_FLOATING_EVENT(w2))
469 TREE_INSERT_CHILD(sptr, w2, invk);
470 } else {
471 tree_node_t *f = TREE_FIRSTBORN(sptr, link);
472 TREE_INSERT_OLDER(f, w2, link);
473 if (!IS_FLOATING_EVENT(w2)) {
474 if ((f = TREE_FIRSTBORN(sptr, invk)))
475 TREE_INSERT_OLDER(f, w2, invk);
476 else
477 TREE_INSERT_CHILD(sptr, w2, invk);
478 }
479 }
480 }
481 }
482 w = next_walk;
483 sptr = next_sptr;
484 next_walk = NULL;
485 }
486 } else { // stree->ref_count != 1
487 /* case 2.2. */
488 TRACE_DBG("Case 2.2\n");
489 TREE_DFS_FOREACH(w, stree->root, &stack, link) {
490 tree_node_t *ntn, *ptn = NULL;
491
492 if (sptr && w->ev == sptr->ev) {
493 if (w->cb != sptr->cb)
494 sptr->cb = w->cb;
495 sptr = TREE_FIRSTBORN(sptr, link);
496 continue;
497 }
498
499 if (!(ntn = tree_node_new())) {
500 if (ntree)
501 tree_del_recursive(ntree);
502 return -1;
503 }
504 ntn->ft = w->ft;
505 ntn->cb = w->cb;
506 ntn->ev = w->ev;
507 ntn->arg = w->arg;
508
509 if (TREE_PARENT(w, link)) {
510 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
511 assert(ptn);
512 TREE_INSERT_CHILD(ptn, ntn, link);
513 if (!IS_FLOATING_EVENT(ntn))
514 TREE_INSERT_CHILD(ptn, ntn, invk);
515 } else {
516 if (ntree)
517 tree_del_recursive(ntree);
518 free(ntn);
519 TRACE_ERROR("Can't find parent\n");
520 assert(0);
521 exit(EXIT_FAILURE);
522 }
523 }
524 }
525 }
526
527 /* case 3 if (stree == NULL) */
528 nstree = stree_create(nid, ntree);
529 if (nstree == NULL) {
530 TRACE_ERROR("Failed to create stree!\n");
531 assert(nstree);
532 exit(EXIT_FAILURE);
533 }
534 nstree->ref_cnt = 1;
535 kvs_insert(store, nid, nstree);
536 /* update reference counters */
537 if (stree)
538 stree_dec_ref(store, stree);
539 *pstree = nstree;
540
541 TRACE_DBG("\\>>>>>> end <<<<<</\n");
542 return 0;
543 }
544 /*----------------------------------------------------------------------------*/
545 /** Unregister a callback
546 * @param [in] store: the key-value-store for subtrees
547 * @param [in, out] pstree: the target subtree pointer
548 * @param [in] ev: the target event
549 * @return 0 for success, -1 for failure
550 *
551 * For callback unregisteration, there can be some cases.
552 * 1. No registered callback will be left after this unregistration.
553 * 2. A new tree what we want is already there, so we can use it by simply
554 * updating reference counters.
555 * 3. A new tree what we want is not there, so we create a new subtree by
556 * copying a part of previous subtree
557 * */
558 inline int
UnregCb(kvs_t * store,stree_t ** pstree,event_t ev)559 UnregCb(kvs_t *store, stree_t **pstree, event_t ev)
560 {
561 #ifdef DBGMSG
562 __PREPARE_DBGLOGGING();
563 #endif
564 TRACE_DBG("/>>>>>> start <<<<<<\\\n");
565 stree_t *stree, *nstree;
566 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
567 tree_node_t *w, *target;
568 uint64_t id = 0, nid;
569 callback_t cb;
570
571 if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) ||
572 !(cb = target->cb))
573 /* Illegal trial of unregistering a callback which is never registered
574 * before. */
575 return -1;
576
577 id = stree->id;
578
579 /* Calculate new subtree ID */
580 nid = NEWID(id, ev, cb);
581
582 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
583 nid, id, ev, cb);
584
585 if (nid == 0) {
586 /* case 1. */
587 TRACE_DBG("Case 1\n");
588 /* update reference counters */
589 if (stree)
590 stree_dec_ref(store, stree);
591
592 *pstree = NULL;
593 } else if ((nstree = kvs_search(store, nid))) {
594 /* case 2. */
595 TRACE_DBG("Case 2\n");
596 /* update reference counters */
597 if (stree)
598 stree_dec_ref(store, stree);
599
600 nstree->ref_cnt++;
601 *pstree = nstree;
602 } else if (stree) {
603 /* case 3. */
604 TRACE_DBG("Case 3\n");
605 bool proceed;
606 tree_node_t *ntree = NULL, *sptr;
607
608 sptr = TREE_PARENT(target, link);
609 #define TREE_IS_ONLY_CHILD(root, x, field) \
610 (TREE_PARENT((x), field) ? \
611 TREE_PARENT((x), field)->field.tn_first == (x) && \
612 TREE_PARENT((x), field)->field.tn_last == (x) : \
613 (x) == (root) && (x)->field.tn_younger == NULL)
614 while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link))
615 sptr = TREE_PARENT(sptr, link);
616
617 assert(sptr);
618 /* 'sptr == NULL' means the tree will lose the only callback.
619 * This case should be handled by 'Case 1' */
620
621 TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) {
622 tree_node_t *ntn, *ptn;
623
624 if (w == sptr)
625 proceed = false;
626
627 if (!(ntn = tree_node_new())) {
628 /* free incomplete ntree */
629 if (ntree)
630 tree_del_recursive(ntree);
631 return -1;
632 }
633 ntn->ft = w->ft;
634 ntn->cb = w->cb;
635 ntn->ev = w->ev;
636 ntn->arg = w->arg;
637
638 if (TREE_PARENT(w, link)) {
639 assert(ntree);
640 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
641
642 assert(ptn);
643
644 TREE_INSERT_CHILD(ptn, ntn, link);
645 if (!IS_FLOATING_EVENT(ntn))
646 TREE_INSERT_CHILD(ptn, ntn, invk);
647 } else
648 /* this is the root node */
649 ntree = ntn;
650 }
651
652 nstree = stree_create(nid, ntree);
653 if (nstree == NULL) {
654 TRACE_ERROR("Failed to create stree!\n");
655 assert(nstree);
656 exit(EXIT_FAILURE);
657 }
658
659 nstree->ref_cnt = 1;
660
661 kvs_insert(store, nid, nstree);
662
663 /* update reference counters */
664 stree_dec_ref(store, stree);
665
666 *pstree = nstree;
667 } else /* if (!stree) */
668 return -1;
669
670 TRACE_DBG("\\>>>>>> end <<<<<</\n");
671 return 0;
672 }
673 /*----------------------------------------------------------------------------*/
674 inline int
ModCb(kvs_t * store,stree_t ** pstree,event_t ev,callback_t cb)675 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
676 {
677 assert(*pstree || cb);
678
679 /* Event ID starts from 1 */
680 if (ev == 0)
681 return -1;
682
683 if (cb)
684 return RegCb(store, pstree, ev, cb);
685 else
686 return UnregCb(store, pstree, ev);
687 }
688 /*----------------------------------------------------------------------------*/
689 void
GlobInitEvent(void)690 GlobInitEvent(void)
691 {
692 int i;
693
694 if (!(g_evroot = tree_node_new())) {
695 perror("FATAL: Failed to allocate essentials for the system\n");
696 exit(0);
697 }
698 g_evroot->ev = 0;
699 g_evroot->ft = NULL;
700 g_evroot->cb = NULL;
701 TREE_INIT_NODE(g_evroot, link);
702 TREE_INIT_NODE(g_evroot, invk);
703 /* add it to the dforest hash table */
704 dforest_store(g_evroot->ev, g_evroot);
705 for (i = 0; i < NUM_BEV; i++) {
706 tree_node_t *ntn;
707 if (!(ntn = tree_node_new())) {
708 perror("FATAL: Failed to allocate essentials for the system\n");
709 exit(0);
710 }
711 ntn->ev = 1 << i;
712 ntn->ft = NULL;
713 ntn->cb = NULL;
714 TREE_INIT_NODE(ntn, link);
715 TREE_INIT_NODE(ntn, invk);
716 TREE_INSERT_CHILD(g_evroot, ntn, link);
717 TREE_INSERT_CHILD(g_evroot, ntn, invk);
718 /* add it to the dforest hash table */
719 dforest_store(ntn->ev, ntn);
720 }
721 g_evid = 1 << NUM_BEV;
722 }
723 /*----------------------------------------------------------------------------*/
724 void
InitEvent(mtcp_manager_t mtcp)725 InitEvent(mtcp_manager_t mtcp)
726 {
727 mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES);
728 }
729 /*----------------------------------------------------------------------------*/
730 event_t
mtcp_alloc_event(event_t event)731 mtcp_alloc_event(event_t event)
732 {
733 tree_node_t *parent, *new;
734
735 if (!(parent = dforest_search(event)))
736 // if (!(parent = tree_search(g_evroot, event)))
737 return MOS_NULL_EVENT;
738
739 if ((new = tree_node_new()) == NULL)
740 return MOS_NULL_EVENT;
741
742 new->ev = g_evid++;
743 new->ft = NULL;
744 new->cb = NULL;
745 new->arg.arg = NULL;
746 new->arg.len = 0;
747
748 TREE_INIT_NODE(new, link);
749 TREE_INIT_NODE(new, invk);
750
751 TREE_INSERT_CHILD(parent, new, link);
752
753 return new->ev;
754 }
755 /*----------------------------------------------------------------------------*/
756 event_t
mtcp_define_event(event_t event,filter_t filter,struct filter_arg * arg)757 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg)
758 {
759 #ifdef DBGMSG
760 __PREPARE_DBGLOGGING();
761 #endif
762 tree_node_t *parent, *new, *walk;
763
764 /* FIX - TODO. This code needs to have proper locking since this
765 deals with global variables that are shared across multiple
766 threads
767 */
768
769 if (!filter)
770 return MOS_NULL_EVENT;
771 if (arg && arg->len > 0 && !arg->arg)
772 return MOS_NULL_EVENT;
773 if (!(parent = dforest_search(event)))
774 // if (!(parent = tree_search(g_evroot, event)))
775 return MOS_NULL_EVENT;
776 for (walk = TREE_FIRSTBORN(parent, link);
777 walk;
778 walk = TREE_YOUNGER(walk, link))
779 if ((walk->ft == filter) &&
780 ((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) ||
781 (walk->arg.len == arg->len &&
782 memcmp(walk->arg.arg, arg->arg, arg->len) == 0)))
783 return walk->ev;
784
785 if ((new = tree_node_new()) == NULL) {
786 errno = ENOMEM;
787 return MOS_NULL_EVENT;
788 }
789 new->ev = g_evid++;
790 new->ft = filter;
791
792 /* add it to the dforest hash table */
793 dforest_store(new->ev, new);
794
795 if (arg && arg->arg && arg->len > 0) {
796 new->arg.arg = malloc(arg->len);
797 if (new->arg.arg == NULL) {
798 errno = ENOMEM;
799 g_evid--;
800 free(new);
801 return MOS_NULL_EVENT;
802 }
803 memcpy(new->arg.arg, arg->arg, arg->len);
804 new->arg.len = arg->len;
805 }
806
807 TRACE_DBG("parent: %ld\n", parent->ev);
808 TREE_INSERT_CHILD(parent, new, link);
809 TREE_INSERT_CHILD(parent, new, invk);
810
811 return new->ev;
812 }
813 /*----------------------------------------------------------------------------*/
814 #define EVENT_STACK_PUSH(s, var) \
815 (assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \
816 *(++((s)->sp)) = (var))
817 #define EVENT_STACK_POP(s) \
818 (*(((s)->sp)--))
819 #define EVENT_STACK_CLR(s) \
820 ((s)->stack[0] = NULL, (s)->sp = (s)->stack)
821
822 inline int
RaiseEv(kvs_t * store,event_t event)823 RaiseEv(kvs_t *store, event_t event)
824 {
825 tree_node_t *ev = NULL;
826 #if 1
827 struct {
828 event_t ev;
829 uint64_t sid;
830 } instance;
831
832 instance.ev = event;
833 instance.sid = g_cur_stree->id;
834
835 _key_t key = XXH64(&instance, sizeof(instance), XXHSEED);
836 ev = (tree_node_t *)kvs_search(store, key);
837 #else
838 tree_node_t *walk;
839 for (walk = TREE_FIRSTBORN(g_cur_ev, link);
840 walk;
841 walk = TREE_YOUNGER(walk, link)) {
842 if (walk->ev == event && IS_FLOATING_EVENT(walk)) {
843 ev = walk;
844 break;
845 }
846 }
847 #endif
848
849 if (!ev)
850 return -1;
851
852 if (!ev->is_in_raiseq) {
853 ev->is_in_raiseq = true;
854 EVENT_STACK_PUSH(&g_evstack, ev);
855 }
856
857 return 0;
858 }
859 /*----------------------------------------------------------------------------*/
860 int
mtcp_raise_event(mctx_t mctx,event_t event)861 mtcp_raise_event(mctx_t mctx, event_t event)
862 {
863 mtcp_manager_t mtcp = GetMTCPManager(mctx);
864 if (!mtcp)
865 return -1;
866
867 return RaiseEv(mtcp->ev_store, event);
868 }
869 /*----------------------------------------------------------------------------*/
870 #define PRINT_EV(event, field) \
871 printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \
872 "last: %ld, parent: %ld, ft: %p, cb: %p\n", \
873 #field, (event), (event)->ev, \
874 (event)->field.tn_older ? (event)->field.tn_older->ev : -1, \
875 (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \
876 (event)->field.tn_first ? (event)->field.tn_first->ev : -1, \
877 (event)->field.tn_last ? (event)->field.tn_last->ev : -1, \
878 (event)->field.tn_parent ? (event)->field.tn_parent->ev : -1, \
879 (event)->ft, (event)->cb)
880 inline void
HandleCb(mctx_t const mctx,int sock,int side,stree_t * const stree,event_t events)881 HandleCb(mctx_t const mctx, int sock, int side,
882 stree_t * const stree, event_t events)
883 {
884 int i;
885 bool proceed;
886 tree_node_t *walk, *bev, *ev;
887 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue;
888 assert(stree);
889
890 g_cur_stree = stree;
891
892 for (i = 0; i < NUM_BEV; i++) {
893 if (!((1L << i) & events))
894 continue;
895
896 if (!(bev = stree->bevs[i]))
897 continue;
898
899 EVENT_STACK_CLR(&g_evstack);
900 tree_node_t **ptr = g_evstack.sp;
901 TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) {
902 //PRINT_EV(walk, link);
903 //PRINT_EV(walk, invk);
904
905 g_cur_ev = walk;
906
907 if (walk != bev && walk->ft &&
908 walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) {
909 ptr = g_evstack.sp;
910 proceed = false;
911 continue;
912 }
913
914 if (walk->cb)
915 walk->cb(mctx, sock, side, walk->ev, &walk->arg);
916
917 while (ptr < g_evstack.sp) {
918 ptr++;
919 TREE_INSERT_CHILD(walk, *ptr, invk);
920 }
921 }
922 g_cur_ev = NULL;
923
924 while ((ev = EVENT_STACK_POP(&g_evstack))) {
925 ev->is_in_raiseq = false;
926 TREE_DETACH(ev, invk);
927 }
928
929 if (!(events &= ~(1L << i))) {
930 g_cur_stree = NULL;
931 return;
932 }
933 }
934
935 g_cur_stree = NULL;
936 }
937 /*----------------------------------------------------------------------------*/
938 int
mtcp_register_callback(mctx_t mctx,int sockid,event_t event,int hook_point,callback_t callback)939 mtcp_register_callback(mctx_t mctx, int sockid, event_t event,
940 int hook_point, callback_t callback)
941 {
942 socket_map_t socket;
943 stree_t **pstree;
944 event_t root_event = event;
945 tree_node_t *ev_node;
946
947 /* (1) check if event variable is NULL
948 * (2) any event within the built-in event range should be 2^N
949 */
950 if (event == MOS_NULL_EVENT ||
951 (event < (1L << NUM_BEV) && !POWER_OF_TWO(event)))
952 return -1;
953
954 /* for UDEs, retrieve the root built-in event */
955 if (root_event >= (1L << NUM_BEV)) {
956 if ((ev_node = dforest_search(event)) == NULL)
957 return -1;
958 root_event = (ev_node)->link.tn_parent->ev;
959 /* the root built-in event should not be NULL */
960 if (root_event == MOS_NULL_EVENT)
961 return -1;
962 }
963
964 /* check if there is any invalid event-hook mapping */
965 if (hook_point == MOS_NULL) {
966 if (root_event & (MOS_ON_CONN_START | MOS_ON_REXMIT |
967 MOS_ON_TCP_STATE_CHANGE | MOS_ON_CONN_END)) {
968 errno = EINVAL;
969 return -1;
970 }
971 } else if (hook_point == MOS_HK_RCV || hook_point == MOS_HK_SND) {
972 if (root_event & (MOS_ON_CONN_NEW_DATA | MOS_ON_ORPHAN | MOS_ON_ERROR)) {
973 errno = EINVAL;
974 return -1;
975 }
976 } else {
977 /* invalid hook point */
978 errno = EINVAL;
979 return -1;
980 }
981
982 mtcp_manager_t mtcp = GetMTCPManager(mctx);
983 if (!mtcp)
984 return -1;
985
986 socket = &mtcp->msmap[sockid];
987
988 if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
989 if (hook_point == MOS_NULL)
990 pstree = &socket->monitor_stream->stree_dontcare;
991 else if (hook_point == MOS_HK_RCV)
992 pstree = &socket->monitor_stream->stree_pre_rcv;
993 else if (hook_point == MOS_HK_SND)
994 pstree = &socket->monitor_stream->stree_post_snd;
995 else
996 return -1;
997
998 } else if (socket->socktype == MOS_SOCK_MONITOR_STREAM
999 || socket->socktype == MOS_SOCK_MONITOR_RAW) {
1000 if (hook_point == MOS_NULL)
1001 pstree = &socket->monitor_listener->stree_dontcare;
1002 else if (hook_point == MOS_HK_RCV)
1003 pstree = &socket->monitor_listener->stree_pre_rcv;
1004 else if (hook_point == MOS_HK_SND)
1005 pstree = &socket->monitor_listener->stree_post_snd;
1006 else
1007 return -1;
1008 } else {
1009 errno = EINVAL;
1010 return -1;
1011 }
1012
1013 return ModCb(mtcp->ev_store, pstree, event, callback);
1014 }
1015 /*----------------------------------------------------------------------------*/
1016 int
mtcp_unregister_callback(mctx_t mctx,int sockid,event_t event,int hook_point)1017 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event,
1018 int hook_point)
1019 {
1020 return mtcp_register_callback(mctx, sockid, event, hook_point, NULL);
1021 }
1022 /*----------------------------------------------------------------------------*/
1023 inline void
HandleCallback(mtcp_manager_t mtcp,uint32_t hook,socket_map_t socket,int side,struct pkt_ctx * pctx,event_t events)1024 HandleCallback(mtcp_manager_t mtcp, uint32_t hook,
1025 socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events)
1026 {
1027 struct sfbpf_program fcode;
1028
1029 if (!socket)
1030 return;
1031
1032 if (!events)
1033 return;
1034 assert(events);
1035
1036 /* if client side monitoring is disabled, then skip */
1037 if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0)
1038 return;
1039 /* if server side monitoring is disabled, then skip */
1040 else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0)
1041 return;
1042
1043 #define MSTRM(sock) (sock)->monitor_stream
1044 #define MLSNR(sock) (sock)->monitor_listener
1045 /* We use `?:` notation instead of `if/else` to make `evp` as const */
1046 stree_t * const stree =
1047 (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ?
1048 ((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv :
1049 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd :
1050 MSTRM(socket)->stree_dontcare)
1051 : (socket->socktype == MOS_SOCK_MONITOR_STREAM)
1052 || (socket->socktype == MOS_SOCK_MONITOR_RAW) ?
1053 ((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv :
1054 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd :
1055 MLSNR(socket)->stree_dontcare) :
1056 NULL;
1057
1058 if (!stree)
1059 return;
1060
1061 /* mtcp_bind_monitor_filter()
1062 - BPF filter is evaluated only for RAW socket and PASSIVE
1063 socket (= orphan filter)
1064 - stream syn filter is moved to and evaluated on socket creation
1065 */
1066 if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
1067 fcode = MLSNR(socket)->stream_orphan_fcode;
1068 /* if not match with filter, return */
1069 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1070 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1071 pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1072 return;
1073 }
1074 if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
1075 fcode = MLSNR(socket)->raw_pkt_fcode;
1076 /* if not match with filter, return */
1077 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1078 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1079 pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1080 return;
1081 }
1082
1083 mctx_t const mctx = g_ctx[mtcp->ctx->cpu];
1084 mtcp->pctx = pctx; /* for mtcp_getcurpkt() */
1085
1086 HandleCb(mctx, socket->id, side, stree, events);
1087 }
1088 /*----------------------------------------------------------------------------*/
1089 #endif
1090