1 #ifdef NEWEV
2 
3 /* ----------------------------------------------------------------------------
4  * Please see 4 Jul. 2015 meeting slides for conceptual explanation.
5  * 'Subtree' has special meaning and its 'ID' is very important concept for
6  * understanding this code.
7  *
8  * Please contact Donghwi Kim for further questions,
9  * via e-mail: [email protected]
10  */
11 #include "debug.h"
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tree.h>
16 #include <key_value_store.h>
17 #include "scalable_event.h"
18 
19 #include "mtcp.h"
20 #include "mos_api.h"
21 #include "mtcp_api.h"
22 
23 #define NEWHASH
24 #ifdef NEWHASH
25 #include "mtcp_util.h"
26 #define XXHSEED 18446744073709551557ULL
27 #endif
28 /*----------------------------------------------------------------------------*/
29 #define POWER_OF_TWO(x) (!((x - 1) & (x)))
30 
31 #define MAX_EVENTS  30240
32 #define KVS_BUCKETS 1024
33 #define KVS_ENTRIES 102400
34 #define MAX_DEPTH   MAX_EVENTS
35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb))
36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV))
37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e))
38 
39 struct _tree_node_t *g_evroot;
40 event_t g_evid;
41 
42 static __thread stree_t *g_cur_stree = NULL;
43 static __thread tree_node_t *g_cur_ev = NULL;
44 
45 static __thread struct {
46 	struct _tree_node_t *stack[MAX_EVENTS];
47 	struct _tree_node_t **sp;
48 } g_evstack;
49 
50 #define free(x) do {				\
51 		assert(x);			\
52 		free(x);			\
53 	} while (0)
54 
55 /*----------------------------------------------------------------------------*/
56 struct devent {
57 	event_t ev;            /* event id = key */
58 	tree_node_t *ptr;      /* ptr to the event node in the d-forest = value */
59 	struct devent *next;   /* next pointer */
60 };
61 #define MAX_BUCKET 2048
62 #define HASHVAL(x) ((x) & (MAX_BUCKET-1))
63 struct devent* g_dfTable[MAX_BUCKET] = {0};  /* d-forest hash table */
64 /*----------------------------------------------------------------------------*/
65 /* simple hash table implementation */
66 static bool
67 dforest_store(event_t ev, tree_node_t* ptr)
68 {
69 	struct devent *de, *p;
70 	int idx;
71 
72 	if ((de = malloc(sizeof(*de))) == NULL) {
73 		assert(0); /* should not happen! */
74 		return false;
75 	}
76 	de->ev = ev;
77 	de->ptr = ptr;
78 	de->next = NULL;
79 
80 	idx = HASHVAL(ev);
81 	if (g_dfTable[idx] == NULL) {
82 		g_dfTable[idx] = de;
83 	}
84 	else  {
85 		p = g_dfTable[idx];
86 		while (p->next)
87 			p = p->next;
88 		p->next = de;
89 	}
90 	return true;
91 }
92 /*----------------------------------------------------------------------------*/
93 static tree_node_t *
94 dforest_search(event_t ev)
95 {
96 	int idx;
97 	struct devent *p;
98 
99 	idx = HASHVAL(ev);
100 	if (g_dfTable[idx] == NULL)
101 		return NULL;
102 
103 	for (p = g_dfTable[idx];
104 	     p != NULL;
105 	     p = p->next) {
106 		if (p->ev == ev)
107 			return p->ptr;
108 	}
109 	return NULL;
110 }
111 /*----------------------------------------------------------------------------*/
112 /** Create a new tree node
113  * @return the new tree node
114  * */
115 inline tree_node_t *
116 tree_node_new()
117 {
118 	return calloc(1, sizeof(tree_node_t));
119 }
120 /*----------------------------------------------------------------------------*/
121 /** Delete a tree node */
122 inline void
123 tree_node_del(tree_node_t *node)
124 {
125 	free(node);
126 }
127 /*----------------------------------------------------------------------------*/
128 /** Delete a tree node and all of its descendants
129  * @param [in] node: target node
130  * */
131 inline void
132 tree_del_recursive(tree_node_t *node)
133 {
134 	struct _tree_node_t *walk = NULL, *del = NULL;
135 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
136 
137 	TREE_DFS_FOREACH(walk, node, &stack, link) {
138 		if (del)
139 			tree_node_del(del);
140 		del = walk;
141 	}
142 	if (del)
143 		tree_node_del(del);
144 }
145 /*----------------------------------------------------------------------------*/
146 /** Search a tree node by event ID
147  * @param [in] root: root node
148  * @param [in] ev: event ID
149  * @return target node
150  *
151  * The search is in DFS mannar
152  * */
153 inline tree_node_t *
154 tree_search(tree_node_t *root, event_t ev)
155 {
156 	struct _tree_node_t *walk = NULL;
157 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
158 
159 	TREE_DFS_FOREACH(walk, root, &stack, link)
160 		if (walk->ev == ev) {
161 			return walk;
162 		}
163 	return NULL;
164 }
165 /*----------------------------------------------------------------------------*/
166 /** Create a new subtree
167  * @param [in] id: subtree ID
168  * @param [in] root: root of the subtree
169  * @return new subtree
170  *
171  * Allocate and initialize subtree structure, stree_t:
172  *   1. Allocate the structure.
173  *   2. Initialize reference counter and fill ID field.
174  *   3. Set 'root' pointer which points the root node of a tree.
175  *      The tree should contain all required tree nodes.
176  *   4. Set built-in event tree node pointers.
177  * */
178 inline stree_t *
179 stree_create(uint64_t id, tree_node_t *root)
180 {
181 	tree_node_t *bev;
182 	stree_t *nstree = malloc(sizeof(stree_t));
183 	if (!nstree)
184 		return NULL;
185 
186 	nstree->ref_cnt = 0;
187 	nstree->id = id;
188 	nstree->root = root;
189 	memset(nstree->bevs, 0, sizeof(nstree->bevs));
190 
191 	for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) {
192 		int idx = 0;
193 		while (bev->ev >> (idx + 1))
194 			idx++;
195 		nstree->bevs[idx] = bev;
196 	}
197 
198 	return nstree;
199 }
200 /*----------------------------------------------------------------------------*/
201 /** Destroy the subtree
202  * @param [in] stree: the target subtree */
203 inline void
204 stree_destroy(kvs_t *store, stree_t *stree)
205 {
206 	tree_del_recursive(stree->root);
207 	free(stree);
208 }
209 /*----------------------------------------------------------------------------*/
210 /** Increase reference counter of the subtree
211  * @param [in] stree: the target subtree */
212 inline void
213 stree_inc_ref(stree_t *stree)
214 {
215 	stree->ref_cnt++;
216 }
217 /*----------------------------------------------------------------------------*/
218 /** Decrease reference counter of the subtree, and remove it from the key-
219  * value-store if needed
220  * @param [in] store: the key-value-store
221  * @param [in] stree: the target subtree
222  *
223  * In addition to decreasing the reference counter, remove the subtree from
224  * key-value-store if decreased reference counter is zero.
225  * */
226 inline void
227 stree_dec_ref(kvs_t *store, stree_t *stree)
228 {
229 	if (stree && !--stree->ref_cnt) {
230 	  	kvs_remove(store, stree->id);
231 		stree_destroy(store, stree);
232 	}
233 }
234 /*----------------------------------------------------------------------------*/
235 /** Hash the event ID and callback pair
236  * @param [in] ev: the event ID
237  * @param [in] cb: the callback function pointer
238  * @return hash value
239  *
240  * TODO: Replace 32-bit Jenkins hash with better one.
241  * */
242 inline static uint64_t
243 hash64(event_t ev, callback_t cb)
244 {
245 #ifdef NEWHASH
246 	struct {
247 		event_t ev;
248 		callback_t cb;
249 	} instance;
250 
251 	instance.ev = ev;
252 	instance.cb = cb;
253 
254 	return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED);
255 #else
256 	uint64_t hash = 0;
257 	int i;
258 
259 	for (i = 0; i < sizeof(ev); ++i) {
260 		hash += ((uint8_t *)&ev)[i];
261 		hash += (hash << 10);
262 		hash ^= (hash >> 6);
263 	}
264 	for (i = 0; i < sizeof(cb); ++i) {
265 		hash += ((uint8_t *)&cb)[i];
266 		hash += (hash << 10);
267 		hash ^= (hash >> 6);
268 	}
269 	hash += (hash << 3);
270 	hash ^= (hash >> 11);
271 	hash += (hash << 15);
272 
273 	return hash;
274 #endif
275 }
276 
277 /*-------------------------------------------------------------------------*/
278 /* Create a new tree that consists of a tree node (w) with a new
279  * callback and take a snapshot of the subtree from w all the way up to
280  * the root node by traversing the ancestors of w.
281 */
282 /*-------------------------------------------------------------------------*/
283 static tree_node_t*
284 create_spine(tree_node_t* w, callback_t cb)
285 {
286 	tree_node_t *ntn, *ntree = NULL;
287 
288 	if ((ntn = tree_node_new()) == NULL)
289 		return NULL;
290 
291 	/* the leaf event node with callback function */
292 	ntn->ft  = w->ft;
293 	ntn->cb  = cb;
294 	ntn->ev  = w->ev;
295 	ntn->arg = w->arg;
296 	ntree = ntn;          /* we start from the leaf */
297 
298 	do {
299 		if (!(ntn = tree_node_new())) {
300 			if (ntree)
301 				tree_del_recursive(ntree);
302 			return NULL;
303 		}
304 		ntn->ft = w->ft;
305 		ntn->ev = w->ev;
306 		ntn->arg = w->arg;
307 
308 		TREE_INSERT_CHILD(ntn, ntree, link);
309 		if (!IS_FLOATING_EVENT(ntree))
310 			TREE_INSERT_CHILD(ntn, ntree, invk);
311 
312 		/* ntn becomes the new tree */
313 		ntree = ntn;
314 	} while ((w = TREE_PARENT(w, link)));
315 
316 	return ntree;
317 }
318 #if 0
319 /*----------------------------------------------------------------------------*/
320 static stree_t *
321 clone_stree(stree_t * stree)
322 {
323 	stree_t *newstree, *walk;
324 	tree_node_t *newtree;
325 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
326 	struct _tree_node_t *walk = NULL, *del = NULL;
327 
328 	/* clone the tree */
329 	TREE_DFS_FOREACH(walk, stree->root, &stack, link) {
330 		new_tree_node_new()
331 			if (del)
332 				tree_node_del(del);
333 		del = walk;
334 	}
335 	if (del)
336 		tree_node_del(del);
337 
338 
339 	TREE_DFS_FOREACH(w, stree->root, &stack, link) {
340 		tree_node_t *ntn, *ptn = NULL;
341 	}
342 }
343 #endif
344 /*----------------------------------------------------------------------------*/
345 /** Register a callback
346  * @param [in] store: the key-value-store for subtrees
347  * @param [in, out] pstree: the target subtree pointer
348  * @param [in] ev: the target event
349  * @param [in] cb: the callback to be registered
350  * @return 0 for success, -1 for failure
351  *
352  * For callback registeration, there can be some cases.
353  *   1. A new tree what we want is already there, so we can use it by simply
354  *      updating reference counters.
355  *   2. A new tree what we want is not there, so we are going to create a
356  *      new subtree. The new subtree will not only have new callback, but also
357  *      inherit previous subtree.
358  *      2.1. The reference counter of previous subtree is 1, so we can avoid
359  *           new tree allocation by reusing tree nodes of previous subtree.
360  *      2.2. The reference counter of previous subtree is not 1, so we should
361  *           copy previous subtree's tree nodes.
362  *   3. Previously, there was no subtree (in other words, this is the first
363  *      callback registeration) so we are going to make a new subtree which
364  *      has only one callback
365  * */
366 inline int
367 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
368 {
369 #ifdef DBGMSG
370 	__PREPARE_DBGLOGGING();
371 #endif
372 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
373 	stree_t *stree, *nstree;
374 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
375 	tree_node_t *w, *w2, *ntree;
376 	uint64_t id = 0, nid;
377 
378 	if ((stree = *pstree)) {
379 		/* XXX: tree_search() is heavy operation, but it is used for only error
380 		 * checking. Is there any way to avoid this?
381 		 * --> We can have a key-value-store for this, but is it too much? */
382 #if 0
383 		if (tree_search(stree->root, ev))
384 			/* Illegal trial of overwriting a previously registerd callback */
385 			return -1;
386 #endif
387 
388 		id = stree->id;
389 	}
390 
391 	/* calculate new subtree ID */
392 	nid = NEWID(id, ev, cb);
393 
394 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", nid, id, ev, cb);
395 
396 	if ((nstree = kvs_search(store, nid))) {  /* case 1. */
397 		TRACE_DBG("Case 1\n");
398 		/* update reference counters */
399 		if (stree)
400 			stree_dec_ref(store, stree);
401 
402 		nstree->ref_cnt++;
403 		*pstree = nstree;
404 #if 0
405 		fprintf(stderr, "sizeof(tcp_stream)=%ld sizeof(tcp_send_vars)=%ld"
406 			"sizeof(tcp_recv_vars)=%ld\n",
407 			sizeof(tcp_stream),
408 			sizeof(struct tcp_send_vars), sizeof(struct tcp_recv_vars));
409 #endif
410 		return 0;
411 	}
412 
413 	/* find the event node with ev and take a snapshot of the ancestors */
414 	w = dforest_search(ev);
415 	//	w = tree_search(g_evroot, ev);
416 	assert(w);
417 	if ((ntree = create_spine(w, cb)) == NULL)
418 		return (-1);
419 
420 	if (stree) {  /* case 2. */
421 		TRACE_DBG("Case 2\n");
422 		tree_node_t *sptr;
423 
424 		/* attach flesh: Inherit previous subtree */
425 		sptr = ntree;
426 
427 #if 0
428 		/* new code */
429 		if (stree->ref_cnt > 1) {
430 			/* take a snapshot of the entire forest */
431 			curf = clone_tree(stree);
432 			curf->root->ref_cnt = 1;
433 			stree->ref_cnt--;
434 		} else {
435 			assert(stree->ref_cnt == 1);
436 			curf = stree;
437 			/* remove the current id from hash table */
438 			kvs_remove(store, stree->id);
439 		}
440 
441 		/* merge sptr into curf->root (existing tree) */
442 #endif
443 
444 		if (stree->ref_cnt == 1) {
445 			/* case 2.1. */
446 			TRACE_DBG("Case 2.1\n");
447 			w = stree->root;
448 			while (w) {
449 				tree_node_t *next_walk = NULL, *next_walk2 = NULL,
450 					*next_sptr = TREE_FIRSTBORN(sptr, link);
451 
452 				for (w2 = TREE_FIRSTBORN(w, link);
453 				     w2 != NULL;
454 				     w2 = next_walk2) {
455 					next_walk2 = TREE_YOUNGER(w2, link);
456 					if (sptr && w2->ev == next_sptr->ev) {
457 						assert(next_walk == NULL);
458 						next_walk = w2;
459 						if (next_sptr->cb != next_walk->cb)
460 							next_sptr->cb = next_walk->cb;
461 					}
462 					else {
463 						TREE_DETACH(w2, link);
464 						if (!IS_FLOATING_EVENT(w2))
465 							TREE_DETACH(w2, invk);
466 						if (next_walk) {
467 							TREE_INSERT_CHILD(sptr, w2, link);
468 							if (!IS_FLOATING_EVENT(w2))
469 								TREE_INSERT_CHILD(sptr, w2, invk);
470 						} else {
471 							tree_node_t *f = TREE_FIRSTBORN(sptr, link);
472 							TREE_INSERT_OLDER(f, w2, link);
473 							if (!IS_FLOATING_EVENT(w2)) {
474 								if ((f = TREE_FIRSTBORN(sptr, invk)))
475 									TREE_INSERT_OLDER(f, w2, invk);
476 								else
477 									TREE_INSERT_CHILD(sptr, w2, invk);
478 							}
479 						}
480 					}
481 				}
482 				w = next_walk;
483 				sptr = next_sptr;
484 				next_walk = NULL;
485 			}
486 		} else { // stree->ref_count != 1
487 			/* case 2.2. */
488 			TRACE_DBG("Case 2.2\n");
489 			TREE_DFS_FOREACH(w, stree->root, &stack, link) {
490 				tree_node_t *ntn, *ptn = NULL;
491 
492 				if (sptr && w->ev == sptr->ev) {
493 					if (w->cb != sptr->cb)
494 						sptr->cb = w->cb;
495 					sptr = TREE_FIRSTBORN(sptr, link);
496 					continue;
497 				}
498 
499 				if (!(ntn = tree_node_new())) {
500 					if (ntree)
501 						tree_del_recursive(ntree);
502 					return -1;
503 				}
504 				ntn->ft  = w->ft;
505 				ntn->cb  = w->cb;
506 				ntn->ev  = w->ev;
507 				ntn->arg = w->arg;
508 
509 				if (TREE_PARENT(w, link)) {
510 					ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
511 					assert(ptn);
512 					TREE_INSERT_CHILD(ptn, ntn, link);
513 					if (!IS_FLOATING_EVENT(ntn))
514 						TREE_INSERT_CHILD(ptn, ntn, invk);
515 				} else {
516 					if (ntree)
517 						tree_del_recursive(ntree);
518 					free(ntn);
519 					TRACE_ERROR("Can't find parent\n");
520 					assert(0);
521 					exit(EXIT_FAILURE);
522 				}
523 			}
524 		}
525 	}
526 
527 	/* case 3 if (stree == NULL) */
528 	nstree = stree_create(nid, ntree);
529 	if (nstree == NULL) {
530 		TRACE_ERROR("Failed to create stree!\n");
531 		assert(nstree);
532 	}
533 	nstree->ref_cnt = 1;
534 	kvs_insert(store, nid, nstree);
535 	/* update reference counters */
536 	if (stree)
537 		stree_dec_ref(store, stree);
538 	*pstree = nstree;
539 
540 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
541 	return 0;
542 }
543 /*----------------------------------------------------------------------------*/
544 /** Unregister a callback
545  * @param [in] store: the key-value-store for subtrees
546  * @param [in, out] pstree: the target subtree pointer
547  * @param [in] ev: the target event
548  * @return 0 for success, -1 for failure
549  *
550  * For callback unregisteration, there can be some cases.
551  *   1. No registered callback will be left after this unregistration.
552  *   2. A new tree what we want is already there, so we can use it by simply
553  *      updating reference counters.
554  *   3. A new tree what we want is not there, so we create a new subtree by
555  *      copying a part of previous subtree
556  * */
557 inline int
558 UnregCb(kvs_t *store, stree_t **pstree, event_t ev)
559 {
560 #ifdef DBGMSG
561 	__PREPARE_DBGLOGGING();
562 #endif
563 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
564 	stree_t *stree, *nstree;
565 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
566 	tree_node_t *w, *target;
567 	uint64_t id = 0, nid;
568 	callback_t cb;
569 
570 	if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) ||
571 	    !(cb = target->cb))
572 		/* Illegal trial of unregistering a callback which is never registered
573 		 * before. */
574 		return -1;
575 
576 	id = stree->id;
577 
578 	/* Calculate new subtree ID */
579 	nid = NEWID(id, ev, cb);
580 
581 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
582 		  nid, id, ev, cb);
583 
584 	if (nid == 0) {
585 		/* case 1. */
586 		TRACE_DBG("Case 1\n");
587 		/* update reference counters */
588 		if (stree)
589 			stree_dec_ref(store, stree);
590 
591 		*pstree = NULL;
592 	} else if ((nstree = kvs_search(store, nid))) {
593 		/* case 2. */
594 		TRACE_DBG("Case 2\n");
595 		/* update reference counters */
596 		if (stree)
597 			stree_dec_ref(store, stree);
598 
599 		nstree->ref_cnt++;
600 		*pstree = nstree;
601 	} else if (stree) {
602 		/* case 3. */
603 		TRACE_DBG("Case 3\n");
604 		bool proceed;
605 		tree_node_t *ntree = NULL, *sptr;
606 
607 		sptr = TREE_PARENT(target, link);
608 #define TREE_IS_ONLY_CHILD(root, x, field) \
609 		(TREE_PARENT((x), field) ?			   \
610 		 TREE_PARENT((x), field)->field.tn_first == (x) && \
611 		 TREE_PARENT((x), field)->field.tn_last == (x) :   \
612 		 (x) == (root) && (x)->field.tn_younger == NULL)
613 		while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link))
614 			sptr = TREE_PARENT(sptr, link);
615 
616 		assert(sptr);
617 		/* 'sptr == NULL' means the tree will lose the only callback.
618 		 * This case should be handled by 'Case 1' */
619 
620 		TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) {
621 			tree_node_t *ntn, *ptn;
622 
623 			if (w == sptr)
624 				proceed = false;
625 
626 			if (!(ntn = tree_node_new())) {
627 				/* free incomplete ntree */
628 				if (ntree)
629 					tree_del_recursive(ntree);
630 				return -1;
631 			}
632 			ntn->ft = w->ft;
633 			ntn->cb = w->cb;
634 			ntn->ev = w->ev;
635 			ntn->arg = w->arg;
636 
637 			if (TREE_PARENT(w, link)) {
638 				assert(ntree);
639 				ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
640 
641 				assert(ptn);
642 
643 				TREE_INSERT_CHILD(ptn, ntn, link);
644 				if (!IS_FLOATING_EVENT(ntn))
645 					TREE_INSERT_CHILD(ptn, ntn, invk);
646 			} else
647 				/* this is the root node */
648 				ntree = ntn;
649 		}
650 
651 		nstree = stree_create(nid, ntree);
652 		if (nstree == NULL) {
653 			TRACE_ERROR("Failed to create stree!\n");
654 			assert(nstree);
655 		}
656 
657 		nstree->ref_cnt = 1;
658 
659 		kvs_insert(store, nid, nstree);
660 
661 		/* update reference counters */
662 		stree_dec_ref(store, stree);
663 
664 		*pstree = nstree;
665 	} else /* if (!stree) */
666 		return -1;
667 
668 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
669 	return 0;
670 }
671 /*----------------------------------------------------------------------------*/
672 inline int
673 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
674 {
675 	assert(*pstree || cb);
676 
677 	/* Event ID starts from 1 */
678 	if (ev == 0)
679 		return -1;
680 
681 	if (cb)
682 		return RegCb(store, pstree, ev, cb);
683 	else
684 		return UnregCb(store, pstree, ev);
685 }
686 /*----------------------------------------------------------------------------*/
687 void
688 GlobInitEvent(void)
689 {
690 	int i;
691 
692 	if (!(g_evroot = tree_node_new())) {
693 		perror("FATAL: Failed to allocate essentials for the system\n");
694 		exit(0);
695 	}
696 	g_evroot->ev = 0;
697 	g_evroot->ft = NULL;
698 	g_evroot->cb = NULL;
699 	TREE_INIT_NODE(g_evroot, link);
700 	TREE_INIT_NODE(g_evroot, invk);
701 	/* add it to the dforest hash table */
702 	dforest_store(g_evroot->ev, g_evroot);
703 	for (i = 0; i < NUM_BEV; i++) {
704 		tree_node_t *ntn;
705 		if (!(ntn = tree_node_new())) {
706 			perror("FATAL: Failed to allocate essentials for the system\n");
707 			exit(0);
708 		}
709 		ntn->ev = 1 << i;
710 		ntn->ft = NULL;
711 		ntn->cb = NULL;
712 		TREE_INIT_NODE(ntn, link);
713 		TREE_INIT_NODE(ntn, invk);
714 		TREE_INSERT_CHILD(g_evroot, ntn, link);
715 		TREE_INSERT_CHILD(g_evroot, ntn, invk);
716 		/* add it to the dforest hash table */
717 		dforest_store(ntn->ev, ntn);
718 	}
719 	g_evid = 1 <<  NUM_BEV;
720 }
721 /*----------------------------------------------------------------------------*/
722 void
723 InitEvent(mtcp_manager_t mtcp)
724 {
725 	mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES);
726 }
727 /*----------------------------------------------------------------------------*/
728 event_t
729 mtcp_alloc_event(event_t event)
730 {
731 	tree_node_t *parent, *new;
732 
733 	if (!(parent = dforest_search(event)))
734 	//	if (!(parent = tree_search(g_evroot, event)))
735 		return MOS_NULL_EVENT;
736 
737 	if ((new = tree_node_new()) == NULL)
738 		return MOS_NULL_EVENT;
739 
740 	new->ev = g_evid++;
741 	new->ft = NULL;
742 	new->cb = NULL;
743 	new->arg.arg = NULL;
744 	new->arg.len = 0;
745 
746 	TREE_INIT_NODE(new, link);
747 	TREE_INIT_NODE(new, invk);
748 
749 	TREE_INSERT_CHILD(parent, new, link);
750 
751 	return new->ev;
752 }
753 /*----------------------------------------------------------------------------*/
754 event_t
755 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg)
756 {
757 #ifdef DBGMSG
758 	__PREPARE_DBGLOGGING();
759 #endif
760 	tree_node_t *parent, *new, *walk;
761 
762 	/* FIX - TODO. This code needs to have proper locking since this
763 	   deals with global variables that are shared across multiple
764 	   threads
765 	*/
766 
767 	if (!filter)
768 		return MOS_NULL_EVENT;
769 	if (arg && arg->len > 0 && !arg->arg)
770 		return MOS_NULL_EVENT;
771 	if (!(parent = dforest_search(event)))
772 	//	if (!(parent = tree_search(g_evroot, event)))
773 		return MOS_NULL_EVENT;
774 	for (walk = TREE_FIRSTBORN(parent, link);
775 	     walk;
776 	     walk = TREE_YOUNGER(walk, link))
777 		if ((walk->ft == filter) &&
778 		    ((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) ||
779 		     (walk->arg.len == arg->len &&
780 		      memcmp(walk->arg.arg, arg->arg, arg->len) == 0)))
781 			return walk->ev;
782 
783 	if ((new = tree_node_new()) == NULL)  {
784 		errno = ENOMEM;
785 		return MOS_NULL_EVENT;
786 	}
787 	new->ev = g_evid++;
788 	new->ft = filter;
789 
790 	/* add it to the dforest hash table */
791 	dforest_store(new->ev, new);
792 
793 	if (arg && arg->arg && arg->len > 0) {
794 		new->arg.arg = malloc(arg->len);
795 		if (new->arg.arg == NULL) {
796 			errno = ENOMEM;
797 			g_evid--;
798 			free(new);
799 			return MOS_NULL_EVENT;
800 		}
801 		memcpy(new->arg.arg, arg->arg, arg->len);
802 		new->arg.len = arg->len;
803 	}
804 
805 	TRACE_DBG("parent: %ld\n", parent->ev);
806 	TREE_INSERT_CHILD(parent, new, link);
807 	TREE_INSERT_CHILD(parent, new, invk);
808 
809 	return new->ev;
810 }
811 /*----------------------------------------------------------------------------*/
812 #define EVENT_STACK_PUSH(s, var)					\
813 	(assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \
814 	 *(++((s)->sp)) = (var))
815 #define EVENT_STACK_POP(s)			\
816 	(*(((s)->sp)--))
817 #define EVENT_STACK_CLR(s)				\
818 	((s)->stack[0] = NULL, (s)->sp = (s)->stack)
819 
820 inline int
821 RaiseEv(kvs_t *store, event_t event)
822 {
823 	tree_node_t *ev = NULL;
824 #if 1
825 	struct {
826 		event_t ev;
827 		uint64_t sid;
828 	} instance;
829 
830 	instance.ev = event;
831 	instance.sid = g_cur_stree->id;
832 
833 	_key_t key = XXH64(&instance, sizeof(instance), XXHSEED);
834 	ev = (tree_node_t *)kvs_search(store, key);
835 #else
836 	tree_node_t *walk;
837 	for (walk = TREE_FIRSTBORN(g_cur_ev, link);
838 	     walk;
839 		 walk = TREE_YOUNGER(walk, link)) {
840 		if (walk->ev == event && IS_FLOATING_EVENT(walk)) {
841 			ev = walk;
842 			break;
843 		}
844 	}
845 #endif
846 
847 	if (!ev)
848 		return -1;
849 
850 	if (!ev->is_in_raiseq) {
851 		ev->is_in_raiseq = true;
852 		EVENT_STACK_PUSH(&g_evstack, ev);
853 	}
854 
855 	return 0;
856 }
857 /*----------------------------------------------------------------------------*/
858 int
859 mtcp_raise_event(mctx_t mctx, event_t event)
860 {
861 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
862 	if (!mtcp)
863 		return -1;
864 
865 	return RaiseEv(mtcp->ev_store, event);
866 }
867 /*----------------------------------------------------------------------------*/
868 #define PRINT_EV(event, field)						\
869 	printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \
870 	       "last: %ld, parent: %ld, ft: %p, cb: %p\n",		\
871 	       #field, (event), (event)->ev,				\
872 	       (event)->field.tn_older   ? (event)->field.tn_older->ev   : -1, \
873 	       (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \
874 	       (event)->field.tn_first   ? (event)->field.tn_first->ev   : -1, \
875 	       (event)->field.tn_last    ? (event)->field.tn_last->ev    : -1, \
876 	       (event)->field.tn_parent  ? (event)->field.tn_parent->ev  : -1, \
877 	       (event)->ft, (event)->cb)
878 inline void
879 HandleCb(mctx_t const mctx, int sock, int side,
880 	 stree_t * const stree, event_t events)
881 {
882 	int i;
883 	bool proceed;
884 	tree_node_t *walk, *bev, *ev;
885 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue;
886 	assert(stree);
887 
888 	g_cur_stree = stree;
889 
890 	for (i = 0; i < NUM_BEV; i++) {
891 		if (!((1L << i) & events))
892 			continue;
893 
894 		if (!(bev = stree->bevs[i]))
895 			continue;
896 
897 		EVENT_STACK_CLR(&g_evstack);
898 		tree_node_t **ptr = g_evstack.sp;
899 		TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) {
900 			//PRINT_EV(walk, link);
901 			//PRINT_EV(walk, invk);
902 
903 			g_cur_ev = walk;
904 
905 			if (walk != bev && walk->ft &&
906 			    walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) {
907 				ptr = g_evstack.sp;
908 				proceed = false;
909 				continue;
910 			}
911 
912 			if (walk->cb)
913 				walk->cb(mctx, sock, side, walk->ev, &walk->arg);
914 
915 			while (ptr < g_evstack.sp) {
916 				ptr++;
917 				TREE_INSERT_CHILD(walk, *ptr, invk);
918 			}
919 		}
920 		g_cur_ev = NULL;
921 
922 		while ((ev = EVENT_STACK_POP(&g_evstack))) {
923 			ev->is_in_raiseq = false;
924 			TREE_DETACH(ev, invk);
925 		}
926 
927 		if (!(events &= ~(1L << i))) {
928 			g_cur_stree = NULL;
929 			return;
930 		}
931 	}
932 
933 	g_cur_stree = NULL;
934 }
935 /*----------------------------------------------------------------------------*/
936 int
937 mtcp_register_callback(mctx_t mctx, int sockid, event_t event,
938 					   int hook_point, callback_t callback)
939 {
940 	socket_map_t socket;
941 	stree_t **pstree;
942 
943 	if (event == MOS_NULL_EVENT ||
944 	    (event < (1L << NUM_BEV) && !POWER_OF_TWO(event)))
945 		return -1;
946 
947 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
948 	if (!mtcp)
949 		return -1;
950 
951 	socket = &mtcp->msmap[sockid];
952 
953 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
954 		if (hook_point == MOS_NULL)
955 			pstree = &socket->monitor_stream->stree_dontcare;
956 		else if (hook_point == MOS_HK_RCV)
957 			pstree = &socket->monitor_stream->stree_pre_rcv;
958 		else if (hook_point == MOS_HK_SND)
959 			pstree = &socket->monitor_stream->stree_post_snd;
960 		else
961 			return -1;
962 
963 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM
964 		   || socket->socktype == MOS_SOCK_MONITOR_RAW) {
965 		if (hook_point == MOS_NULL)
966 			pstree = &socket->monitor_listener->stree_dontcare;
967 		else if (hook_point == MOS_HK_RCV)
968 			pstree = &socket->monitor_listener->stree_pre_rcv;
969 		else if (hook_point == MOS_HK_SND)
970 			pstree = &socket->monitor_listener->stree_post_snd;
971 		else
972 			return -1;
973 	} else {
974 		errno = EINVAL;
975 		return -1;
976 	}
977 
978 	return ModCb(mtcp->ev_store, pstree, event, callback);
979 }
980 /*----------------------------------------------------------------------------*/
981 int
982 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event,
983 			 int hook_point)
984 {
985 	return mtcp_register_callback(mctx, sockid, event, hook_point, NULL);
986 }
987 /*----------------------------------------------------------------------------*/
988 inline void
989 HandleCallback(mtcp_manager_t mtcp, uint32_t hook,
990 	       socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events)
991 {
992 	struct sfbpf_program fcode;
993 
994 	if (!socket)
995 		return;
996 
997 	if (!events)
998 		return;
999 	assert(events);
1000 
1001 	/* if client side monitoring is disabled, then skip */
1002 	if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0)
1003 		return;
1004 	/* if server side monitoring is disabled, then skip */
1005 	else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0)
1006 		return;
1007 
1008 #define MSTRM(sock) (sock)->monitor_stream
1009 #define MLSNR(sock) (sock)->monitor_listener
1010 	/* We use `?:` notation instead of `if/else` to make `evp` as const */
1011 	stree_t * const stree =
1012 		(socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ?
1013 		((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv :
1014 		 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd :
1015 		 MSTRM(socket)->stree_dontcare)
1016 		: (socket->socktype == MOS_SOCK_MONITOR_STREAM)
1017 		|| (socket->socktype == MOS_SOCK_MONITOR_RAW) ?
1018 		((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv :
1019 		 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd :
1020 		 MLSNR(socket)->stree_dontcare) :
1021 		NULL;
1022 
1023 	if (!stree)
1024 		return;
1025 
1026 	/* mtcp_bind_monitor_filter()
1027 	   - BPF filter is evaluated only for RAW socket and PASSIVE
1028 	   socket (= orphan filter)
1029 	   - stream syn filter is moved to and evaluated on socket creation
1030 	*/
1031 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
1032 		fcode = MLSNR(socket)->stream_orphan_fcode;
1033 		/* if not match with filter, return */
1034 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1035 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1036 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1037 			return;
1038 	}
1039 	if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
1040 		fcode = MLSNR(socket)->raw_pkt_fcode;
1041 		/* if not match with filter, return */
1042 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1043 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1044 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1045 			return;
1046 	}
1047 
1048 	mctx_t const mctx = g_ctx[mtcp->ctx->cpu];
1049 	mtcp->pctx = pctx; /* for mtcp_getcurpkt() */
1050 
1051 	HandleCb(mctx, socket->id, side, stree, events);
1052 }
1053 /*----------------------------------------------------------------------------*/
1054 #endif
1055