1 #ifdef NEWEV
2 
3 /* ----------------------------------------------------------------------------
4  * Please see 4 Jul. 2015 meeting slides for conceptual explanation.
5  * 'Subtree' has special meaning and its 'ID' is very important concept for
6  * understanding this code.
7  *
8  * Please contact Donghwi Kim for further questions,
9  * via e-mail: [email protected]
10  */
11 #include "debug.h"
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tree.h>
16 #include <key_value_store.h>
17 #include "scalable_event.h"
18 
19 #include "mtcp.h"
20 #include "mos_api.h"
21 #include "mtcp_api.h"
22 
23 #define NEWHASH
24 #ifdef NEWHASH
25 #include "mtcp_util.h"
26 #define XXHSEED 18446744073709551557ULL
27 #endif
28 /*----------------------------------------------------------------------------*/
29 #define POWER_OF_TWO(x) (!((x - 1) & (x)))
30 
31 #define MAX_EVENTS  30240
32 #define KVS_BUCKETS 1024
33 #define KVS_ENTRIES 102400
34 #define MAX_DEPTH   MAX_EVENTS
35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb))
36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV))
37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e))
38 
39 struct _tree_node_t *g_evroot;
40 event_t g_evid;
41 
42 static __thread stree_t *g_cur_stree = NULL;
43 static __thread tree_node_t *g_cur_ev = NULL;
44 
45 static __thread struct {
46 	struct _tree_node_t *stack[MAX_EVENTS];
47 	struct _tree_node_t **sp;
48 } g_evstack;
49 
50 #define free(x) do {				\
51 		assert(x);			\
52 		free(x);			\
53 	} while (0)
54 
55 /*----------------------------------------------------------------------------*/
56 struct devent {
57 	event_t ev;            /* event id = key */
58 	tree_node_t *ptr;      /* ptr to the event node in the d-forest = value */
59 	struct devent *next;   /* next pointer */
60 };
61 #define MAX_BUCKET 2048
62 #define HASHVAL(x) ((x) & (MAX_BUCKET-1))
63 struct devent* g_dfTable[MAX_BUCKET] = {0};  /* d-forest hash table */
64 /*----------------------------------------------------------------------------*/
65 /* simple hash table implementation */
66 static bool
67 dforest_store(event_t ev, tree_node_t* ptr)
68 {
69 	struct devent *de, *p;
70 	int idx;
71 
72 	if ((de = malloc(sizeof(*de))) == NULL) {
73 		assert(0); /* should not happen! */
74 		return false;
75 	}
76 	de->ev = ev;
77 	de->ptr = ptr;
78 	de->next = NULL;
79 
80 	idx = HASHVAL(ev);
81 	if (g_dfTable[idx] == NULL) {
82 		g_dfTable[idx] = de;
83 	}
84 	else  {
85 		p = g_dfTable[idx];
86 		while (p->next)
87 			p = p->next;
88 		p->next = de;
89 	}
90 	return true;
91 }
92 /*----------------------------------------------------------------------------*/
93 static tree_node_t *
94 dforest_search(event_t ev)
95 {
96 	int idx;
97 	struct devent *p;
98 
99 	idx = HASHVAL(ev);
100 	if (g_dfTable[idx] == NULL)
101 		return NULL;
102 
103 	for (p = g_dfTable[idx];
104 	     p != NULL;
105 	     p = p->next) {
106 		if (p->ev == ev)
107 			return p->ptr;
108 	}
109 	return NULL;
110 }
111 /*----------------------------------------------------------------------------*/
112 /** Create a new tree node
113  * @return the new tree node
114  * */
115 inline tree_node_t *
116 tree_node_new()
117 {
118 	return calloc(1, sizeof(tree_node_t));
119 }
120 /*----------------------------------------------------------------------------*/
121 /** Delete a tree node */
122 inline void
123 tree_node_del(tree_node_t *node)
124 {
125 	free(node);
126 }
127 /*----------------------------------------------------------------------------*/
128 /** Delete a tree node and all of its descendants
129  * @param [in] node: target node
130  * */
131 inline void
132 tree_del_recursive(tree_node_t *node)
133 {
134 	struct _tree_node_t *walk = NULL, *del = NULL;
135 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
136 
137 	TREE_DFS_FOREACH(walk, node, &stack, link) {
138 		if (del)
139 			tree_node_del(del);
140 		del = walk;
141 	}
142 	if (del)
143 		tree_node_del(del);
144 }
145 /*----------------------------------------------------------------------------*/
146 /** Search a tree node by event ID
147  * @param [in] root: root node
148  * @param [in] ev: event ID
149  * @return target node
150  *
151  * The search is in DFS mannar
152  * */
153 inline tree_node_t *
154 tree_search(tree_node_t *root, event_t ev)
155 {
156 	struct _tree_node_t *walk = NULL;
157 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
158 
159 	TREE_DFS_FOREACH(walk, root, &stack, link)
160 		if (walk->ev == ev) {
161 			return walk;
162 		}
163 	return NULL;
164 }
165 /*----------------------------------------------------------------------------*/
166 /** Create a new subtree
167  * @param [in] id: subtree ID
168  * @param [in] root: root of the subtree
169  * @return new subtree
170  *
171  * Allocate and initialize subtree structure, stree_t:
172  *   1. Allocate the structure.
173  *   2. Initialize reference counter and fill ID field.
174  *   3. Set 'root' pointer which points the root node of a tree.
175  *      The tree should contain all required tree nodes.
176  *   4. Set built-in event tree node pointers.
177  * */
178 inline stree_t *
179 stree_create(uint64_t id, tree_node_t *root)
180 {
181 	tree_node_t *bev;
182 	stree_t *nstree = malloc(sizeof(stree_t));
183 	if (!nstree)
184 		return NULL;
185 
186 	nstree->ref_cnt = 0;
187 	nstree->id = id;
188 	nstree->root = root;
189 	memset(nstree->bevs, 0, sizeof(nstree->bevs));
190 
191 	for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) {
192 		int idx = 0;
193 		while (bev->ev >> (idx + 1))
194 			idx++;
195 		nstree->bevs[idx] = bev;
196 	}
197 
198 	return nstree;
199 }
200 /*----------------------------------------------------------------------------*/
201 /** Destroy the subtree
202  * @param [in] stree: the target subtree */
203 inline void
204 stree_destroy(kvs_t *store, stree_t *stree)
205 {
206 	tree_del_recursive(stree->root);
207 	free(stree);
208 }
209 /*----------------------------------------------------------------------------*/
210 /** Increase reference counter of the subtree
211  * @param [in] stree: the target subtree */
212 inline void
213 stree_inc_ref(stree_t *stree)
214 {
215 	stree->ref_cnt++;
216 }
217 /*----------------------------------------------------------------------------*/
218 /** Decrease reference counter of the subtree, and remove it from the key-
219  * value-store if needed
220  * @param [in] store: the key-value-store
221  * @param [in] stree: the target subtree
222  *
223  * In addition to decreasing the reference counter, remove the subtree from
224  * key-value-store if decreased reference counter is zero.
225  * */
226 inline void
227 stree_dec_ref(kvs_t *store, stree_t *stree)
228 {
229 	if (stree && !--stree->ref_cnt) {
230 	  	kvs_remove(store, stree->id);
231 		stree_destroy(store, stree);
232 	}
233 }
234 /*----------------------------------------------------------------------------*/
235 /** Hash the event ID and callback pair
236  * @param [in] ev: the event ID
237  * @param [in] cb: the callback function pointer
238  * @return hash value
239  *
240  * TODO: Replace 32-bit Jenkins hash with better one.
241  * */
242 inline static uint64_t
243 hash64(event_t ev, callback_t cb)
244 {
245 #ifdef NEWHASH
246 	struct {
247 		event_t ev;
248 		callback_t cb;
249 	} instance;
250 
251 	instance.ev = ev;
252 	instance.cb = cb;
253 
254 	return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED);
255 #else
256 	uint64_t hash = 0;
257 	int i;
258 
259 	for (i = 0; i < sizeof(ev); ++i) {
260 		hash += ((uint8_t *)&ev)[i];
261 		hash += (hash << 10);
262 		hash ^= (hash >> 6);
263 	}
264 	for (i = 0; i < sizeof(cb); ++i) {
265 		hash += ((uint8_t *)&cb)[i];
266 		hash += (hash << 10);
267 		hash ^= (hash >> 6);
268 	}
269 	hash += (hash << 3);
270 	hash ^= (hash >> 11);
271 	hash += (hash << 15);
272 
273 	return hash;
274 #endif
275 }
276 
277 /*-------------------------------------------------------------------------*/
278 /* Create a new tree that consists of a tree node (w) with a new
279  * callback and take a snapshot of the subtree from w all the way up to
280  * the root node by traversing the ancestors of w.
281 */
282 /*-------------------------------------------------------------------------*/
283 static tree_node_t*
284 create_spine(tree_node_t* w, callback_t cb)
285 {
286 	tree_node_t *ntn, *ntree = NULL;
287 
288 	if ((ntn = tree_node_new()) == NULL)
289 		return NULL;
290 
291 	/* the leaf event node with callback function */
292 	ntn->ft  = w->ft;
293 	ntn->cb  = cb;
294 	ntn->ev  = w->ev;
295 	ntn->arg = w->arg;
296 	ntree = ntn;          /* we start from the leaf */
297 
298 	do {
299 		if (!(ntn = tree_node_new())) {
300 			if (ntree)
301 				tree_del_recursive(ntree);
302 			return NULL;
303 		}
304 		ntn->ft = w->ft;
305 		ntn->ev = w->ev;
306 		ntn->arg = w->arg;
307 
308 		TREE_INSERT_CHILD(ntn, ntree, link);
309 		if (!IS_FLOATING_EVENT(ntree))
310 			TREE_INSERT_CHILD(ntn, ntree, invk);
311 
312 		/* ntn becomes the new tree */
313 		ntree = ntn;
314 	} while ((w = TREE_PARENT(w, link)));
315 
316 	return ntree;
317 }
318 #if 0
319 /*----------------------------------------------------------------------------*/
320 static stree_t *
321 clone_stree(stree_t * stree)
322 {
323 	stree_t *newstree, *walk;
324 	tree_node_t *newtree;
325 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
326 	struct _tree_node_t *walk = NULL, *del = NULL;
327 
328 	/* clone the tree */
329 	TREE_DFS_FOREACH(walk, stree->root, &stack, link) {
330 		new_tree_node_new()
331 			if (del)
332 				tree_node_del(del);
333 		del = walk;
334 	}
335 	if (del)
336 		tree_node_del(del);
337 
338 
339 	TREE_DFS_FOREACH(w, stree->root, &stack, link) {
340 		tree_node_t *ntn, *ptn = NULL;
341 	}
342 }
343 #endif
344 /*----------------------------------------------------------------------------*/
345 /** Register a callback
346  * @param [in] store: the key-value-store for subtrees
347  * @param [in, out] pstree: the target subtree pointer
348  * @param [in] ev: the target event
349  * @param [in] cb: the callback to be registered
350  * @return 0 for success, -1 for failure
351  *
352  * For callback registeration, there can be some cases.
353  *   1. A new tree what we want is already there, so we can use it by simply
354  *      updating reference counters.
355  *   2. A new tree what we want is not there, so we are going to create a
356  *      new subtree. The new subtree will not only have new callback, but also
357  *      inherit previous subtree.
358  *      2.1. The reference counter of previous subtree is 1, so we can avoid
359  *           new tree allocation by reusing tree nodes of previous subtree.
360  *      2.2. The reference counter of previous subtree is not 1, so we should
361  *           copy previous subtree's tree nodes.
362  *   3. Previously, there was no subtree (in other words, this is the first
363  *      callback registeration) so we are going to make a new subtree which
364  *      has only one callback
365  * */
366 inline int
367 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
368 {
369 #ifdef DBGMSG
370 	__PREPARE_DBGLOGGING();
371 #endif
372 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
373 	stree_t *stree, *nstree;
374 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
375 	tree_node_t *w, *w2, *ntree;
376 	uint64_t id = 0, nid;
377 
378 	if ((stree = *pstree)) {
379 		/* XXX: tree_search() is heavy operation, but it is used for only error
380 		 * checking. Is there any way to avoid this?
381 		 * --> We can have a key-value-store for this, but is it too much? */
382 #if 0
383 		if (tree_search(stree->root, ev))
384 			/* Illegal trial of overwriting a previously registerd callback */
385 			return -1;
386 #endif
387 
388 		id = stree->id;
389 	}
390 
391 	/* calculate new subtree ID */
392 	nid = NEWID(id, ev, cb);
393 
394 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", nid, id, ev, cb);
395 
396 	if ((nstree = kvs_search(store, nid))) {  /* case 1. */
397 		TRACE_DBG("Case 1\n");
398 		/* update reference counters */
399 		if (stree)
400 			stree_dec_ref(store, stree);
401 
402 		nstree->ref_cnt++;
403 		*pstree = nstree;
404 #if 0
405 		fprintf(stderr, "sizeof(tcp_stream)=%ld sizeof(tcp_send_vars)=%ld"
406 			"sizeof(tcp_recv_vars)=%ld\n",
407 			sizeof(tcp_stream),
408 			sizeof(struct tcp_send_vars), sizeof(struct tcp_recv_vars));
409 #endif
410 		return 0;
411 	}
412 
413 	/* find the event node with ev and take a snapshot of the ancestors */
414 	w = dforest_search(ev);
415 	//	w = tree_search(g_evroot, ev);
416 	assert(w);
417 	if ((ntree = create_spine(w, cb)) == NULL)
418 		return (-1);
419 
420 	if (stree) {  /* case 2. */
421 		TRACE_DBG("Case 2\n");
422 		tree_node_t *sptr;
423 
424 		/* attach flesh: Inherit previous subtree */
425 		sptr = ntree;
426 
427 #if 0
428 		/* new code */
429 		if (stree->ref_cnt > 1) {
430 			/* take a snapshot of the entire forest */
431 			curf = clone_tree(stree);
432 			curf->root->ref_cnt = 1;
433 			stree->ref_cnt--;
434 		} else {
435 			assert(stree->ref_cnt == 1);
436 			curf = stree;
437 			/* remove the current id from hash table */
438 			kvs_remove(store, stree->id);
439 		}
440 
441 		/* merge sptr into curf->root (existing tree) */
442 #endif
443 
444 		if (stree->ref_cnt == 1) {
445 			/* case 2.1. */
446 			TRACE_DBG("Case 2.1\n");
447 			w = stree->root;
448 			while (w) {
449 				tree_node_t *next_walk = NULL, *next_walk2 = NULL,
450 					*next_sptr = TREE_FIRSTBORN(sptr, link);
451 
452 				for (w2 = TREE_FIRSTBORN(w, link);
453 				     w2 != NULL;
454 				     w2 = next_walk2) {
455 					next_walk2 = TREE_YOUNGER(w2, link);
456 					if (sptr && w2->ev == next_sptr->ev) {
457 						assert(next_walk == NULL);
458 						next_walk = w2;
459 						if (next_sptr->cb != next_walk->cb)
460 							next_sptr->cb = next_walk->cb;
461 					}
462 					else {
463 						TREE_DETACH(w2, link);
464 						if (!IS_FLOATING_EVENT(w2))
465 							TREE_DETACH(w2, invk);
466 						if (next_walk) {
467 							TREE_INSERT_CHILD(sptr, w2, link);
468 							if (!IS_FLOATING_EVENT(w2))
469 								TREE_INSERT_CHILD(sptr, w2, invk);
470 						} else {
471 							tree_node_t *f = TREE_FIRSTBORN(sptr, link);
472 							TREE_INSERT_OLDER(f, w2, link);
473 							if (!IS_FLOATING_EVENT(w2)) {
474 								if ((f = TREE_FIRSTBORN(sptr, invk)))
475 									TREE_INSERT_OLDER(f, w2, invk);
476 								else
477 									TREE_INSERT_CHILD(sptr, w2, invk);
478 							}
479 						}
480 					}
481 				}
482 				w = next_walk;
483 				sptr = next_sptr;
484 				next_walk = NULL;
485 			}
486 		} else { // stree->ref_count != 1
487 			/* case 2.2. */
488 			TRACE_DBG("Case 2.2\n");
489 			TREE_DFS_FOREACH(w, stree->root, &stack, link) {
490 				tree_node_t *ntn, *ptn = NULL;
491 
492 				if (sptr && w->ev == sptr->ev) {
493 					if (w->cb != sptr->cb)
494 						sptr->cb = w->cb;
495 					sptr = TREE_FIRSTBORN(sptr, link);
496 					continue;
497 				}
498 
499 				if (!(ntn = tree_node_new())) {
500 					if (ntree)
501 						tree_del_recursive(ntree);
502 					return -1;
503 				}
504 				ntn->ft  = w->ft;
505 				ntn->cb  = w->cb;
506 				ntn->ev  = w->ev;
507 				ntn->arg = w->arg;
508 
509 				if (TREE_PARENT(w, link)) {
510 					ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
511 					assert(ptn);
512 					TREE_INSERT_CHILD(ptn, ntn, link);
513 					if (!IS_FLOATING_EVENT(ntn))
514 						TREE_INSERT_CHILD(ptn, ntn, invk);
515 				} else {
516 					if (ntree)
517 						tree_del_recursive(ntree);
518 					free(ntn);
519 					TRACE_ERROR("Can't find parent\n");
520 					assert(0);
521 					exit(EXIT_FAILURE);
522 				}
523 			}
524 		}
525 	}
526 
527 	/* case 3 if (stree == NULL) */
528 	nstree = stree_create(nid, ntree);
529 	if (nstree == NULL) {
530 		TRACE_ERROR("Failed to create stree!\n");
531 		assert(nstree);
532 		exit(EXIT_FAILURE);
533 	}
534 	nstree->ref_cnt = 1;
535 	kvs_insert(store, nid, nstree);
536 	/* update reference counters */
537 	if (stree)
538 		stree_dec_ref(store, stree);
539 	*pstree = nstree;
540 
541 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
542 	return 0;
543 }
544 /*----------------------------------------------------------------------------*/
545 /** Unregister a callback
546  * @param [in] store: the key-value-store for subtrees
547  * @param [in, out] pstree: the target subtree pointer
548  * @param [in] ev: the target event
549  * @return 0 for success, -1 for failure
550  *
551  * For callback unregisteration, there can be some cases.
552  *   1. No registered callback will be left after this unregistration.
553  *   2. A new tree what we want is already there, so we can use it by simply
554  *      updating reference counters.
555  *   3. A new tree what we want is not there, so we create a new subtree by
556  *      copying a part of previous subtree
557  * */
558 inline int
559 UnregCb(kvs_t *store, stree_t **pstree, event_t ev)
560 {
561 #ifdef DBGMSG
562 	__PREPARE_DBGLOGGING();
563 #endif
564 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
565 	stree_t *stree, *nstree;
566 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
567 	tree_node_t *w, *target;
568 	uint64_t id = 0, nid;
569 	callback_t cb;
570 
571 	if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) ||
572 	    !(cb = target->cb))
573 		/* Illegal trial of unregistering a callback which is never registered
574 		 * before. */
575 		return -1;
576 
577 	id = stree->id;
578 
579 	/* Calculate new subtree ID */
580 	nid = NEWID(id, ev, cb);
581 
582 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
583 		  nid, id, ev, cb);
584 
585 	if (nid == 0) {
586 		/* case 1. */
587 		TRACE_DBG("Case 1\n");
588 		/* update reference counters */
589 		if (stree)
590 			stree_dec_ref(store, stree);
591 
592 		*pstree = NULL;
593 	} else if ((nstree = kvs_search(store, nid))) {
594 		/* case 2. */
595 		TRACE_DBG("Case 2\n");
596 		/* update reference counters */
597 		if (stree)
598 			stree_dec_ref(store, stree);
599 
600 		nstree->ref_cnt++;
601 		*pstree = nstree;
602 	} else if (stree) {
603 		/* case 3. */
604 		TRACE_DBG("Case 3\n");
605 		bool proceed;
606 		tree_node_t *ntree = NULL, *sptr;
607 
608 		sptr = TREE_PARENT(target, link);
609 #define TREE_IS_ONLY_CHILD(root, x, field) \
610 		(TREE_PARENT((x), field) ?			   \
611 		 TREE_PARENT((x), field)->field.tn_first == (x) && \
612 		 TREE_PARENT((x), field)->field.tn_last == (x) :   \
613 		 (x) == (root) && (x)->field.tn_younger == NULL)
614 		while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link))
615 			sptr = TREE_PARENT(sptr, link);
616 
617 		assert(sptr);
618 		/* 'sptr == NULL' means the tree will lose the only callback.
619 		 * This case should be handled by 'Case 1' */
620 
621 		TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) {
622 			tree_node_t *ntn, *ptn;
623 
624 			if (w == sptr)
625 				proceed = false;
626 
627 			if (!(ntn = tree_node_new())) {
628 				/* free incomplete ntree */
629 				if (ntree)
630 					tree_del_recursive(ntree);
631 				return -1;
632 			}
633 			ntn->ft = w->ft;
634 			ntn->cb = w->cb;
635 			ntn->ev = w->ev;
636 			ntn->arg = w->arg;
637 
638 			if (TREE_PARENT(w, link)) {
639 				assert(ntree);
640 				ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
641 
642 				assert(ptn);
643 
644 				TREE_INSERT_CHILD(ptn, ntn, link);
645 				if (!IS_FLOATING_EVENT(ntn))
646 					TREE_INSERT_CHILD(ptn, ntn, invk);
647 			} else
648 				/* this is the root node */
649 				ntree = ntn;
650 		}
651 
652 		nstree = stree_create(nid, ntree);
653 		if (nstree == NULL) {
654 			TRACE_ERROR("Failed to create stree!\n");
655 			assert(nstree);
656 			exit(EXIT_FAILURE);
657 		}
658 
659 		nstree->ref_cnt = 1;
660 
661 		kvs_insert(store, nid, nstree);
662 
663 		/* update reference counters */
664 		stree_dec_ref(store, stree);
665 
666 		*pstree = nstree;
667 	} else /* if (!stree) */
668 		return -1;
669 
670 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
671 	return 0;
672 }
673 /*----------------------------------------------------------------------------*/
674 inline int
675 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
676 {
677 	assert(*pstree || cb);
678 
679 	/* Event ID starts from 1 */
680 	if (ev == 0)
681 		return -1;
682 
683 	if (cb)
684 		return RegCb(store, pstree, ev, cb);
685 	else
686 		return UnregCb(store, pstree, ev);
687 }
688 /*----------------------------------------------------------------------------*/
689 void
690 GlobInitEvent(void)
691 {
692 	int i;
693 
694 	if (!(g_evroot = tree_node_new())) {
695 		perror("FATAL: Failed to allocate essentials for the system\n");
696 		exit(0);
697 	}
698 	g_evroot->ev = 0;
699 	g_evroot->ft = NULL;
700 	g_evroot->cb = NULL;
701 	TREE_INIT_NODE(g_evroot, link);
702 	TREE_INIT_NODE(g_evroot, invk);
703 	/* add it to the dforest hash table */
704 	dforest_store(g_evroot->ev, g_evroot);
705 	for (i = 0; i < NUM_BEV; i++) {
706 		tree_node_t *ntn;
707 		if (!(ntn = tree_node_new())) {
708 			perror("FATAL: Failed to allocate essentials for the system\n");
709 			exit(0);
710 		}
711 		ntn->ev = 1 << i;
712 		ntn->ft = NULL;
713 		ntn->cb = NULL;
714 		TREE_INIT_NODE(ntn, link);
715 		TREE_INIT_NODE(ntn, invk);
716 		TREE_INSERT_CHILD(g_evroot, ntn, link);
717 		TREE_INSERT_CHILD(g_evroot, ntn, invk);
718 		/* add it to the dforest hash table */
719 		dforest_store(ntn->ev, ntn);
720 	}
721 	g_evid = 1 <<  NUM_BEV;
722 }
723 /*----------------------------------------------------------------------------*/
724 void
725 InitEvent(mtcp_manager_t mtcp)
726 {
727 	mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES);
728 }
729 /*----------------------------------------------------------------------------*/
730 event_t
731 mtcp_alloc_event(event_t event)
732 {
733 	tree_node_t *parent, *new;
734 
735 	if (!(parent = dforest_search(event)))
736 	//	if (!(parent = tree_search(g_evroot, event)))
737 		return MOS_NULL_EVENT;
738 
739 	if ((new = tree_node_new()) == NULL)
740 		return MOS_NULL_EVENT;
741 
742 	new->ev = g_evid++;
743 	new->ft = NULL;
744 	new->cb = NULL;
745 	new->arg.arg = NULL;
746 	new->arg.len = 0;
747 
748 	TREE_INIT_NODE(new, link);
749 	TREE_INIT_NODE(new, invk);
750 
751 	TREE_INSERT_CHILD(parent, new, link);
752 
753 	return new->ev;
754 }
755 /*----------------------------------------------------------------------------*/
756 event_t
757 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg)
758 {
759 #ifdef DBGMSG
760 	__PREPARE_DBGLOGGING();
761 #endif
762 	tree_node_t *parent, *new, *walk;
763 
764 	/* FIX - TODO. This code needs to have proper locking since this
765 	   deals with global variables that are shared across multiple
766 	   threads
767 	*/
768 
769 	if (!filter)
770 		return MOS_NULL_EVENT;
771 	if (arg && arg->len > 0 && !arg->arg)
772 		return MOS_NULL_EVENT;
773 	if (!(parent = dforest_search(event)))
774 	//	if (!(parent = tree_search(g_evroot, event)))
775 		return MOS_NULL_EVENT;
776 	for (walk = TREE_FIRSTBORN(parent, link);
777 	     walk;
778 	     walk = TREE_YOUNGER(walk, link))
779 		if ((walk->ft == filter) &&
780 		    ((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) ||
781 		     (walk->arg.len == arg->len &&
782 		      memcmp(walk->arg.arg, arg->arg, arg->len) == 0)))
783 			return walk->ev;
784 
785 	if ((new = tree_node_new()) == NULL)  {
786 		errno = ENOMEM;
787 		return MOS_NULL_EVENT;
788 	}
789 	new->ev = g_evid++;
790 	new->ft = filter;
791 
792 	/* add it to the dforest hash table */
793 	dforest_store(new->ev, new);
794 
795 	if (arg && arg->arg && arg->len > 0) {
796 		new->arg.arg = malloc(arg->len);
797 		if (new->arg.arg == NULL) {
798 			errno = ENOMEM;
799 			g_evid--;
800 			free(new);
801 			return MOS_NULL_EVENT;
802 		}
803 		memcpy(new->arg.arg, arg->arg, arg->len);
804 		new->arg.len = arg->len;
805 	}
806 
807 	TRACE_DBG("parent: %ld\n", parent->ev);
808 	TREE_INSERT_CHILD(parent, new, link);
809 	TREE_INSERT_CHILD(parent, new, invk);
810 
811 	return new->ev;
812 }
813 /*----------------------------------------------------------------------------*/
814 #define EVENT_STACK_PUSH(s, var)					\
815 	(assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \
816 	 *(++((s)->sp)) = (var))
817 #define EVENT_STACK_POP(s)			\
818 	(*(((s)->sp)--))
819 #define EVENT_STACK_CLR(s)				\
820 	((s)->stack[0] = NULL, (s)->sp = (s)->stack)
821 
822 inline int
823 RaiseEv(kvs_t *store, event_t event)
824 {
825 	tree_node_t *ev = NULL;
826 #if 1
827 	struct {
828 		event_t ev;
829 		uint64_t sid;
830 	} instance;
831 
832 	instance.ev = event;
833 	instance.sid = g_cur_stree->id;
834 
835 	_key_t key = XXH64(&instance, sizeof(instance), XXHSEED);
836 	ev = (tree_node_t *)kvs_search(store, key);
837 #else
838 	tree_node_t *walk;
839 	for (walk = TREE_FIRSTBORN(g_cur_ev, link);
840 	     walk;
841 		 walk = TREE_YOUNGER(walk, link)) {
842 		if (walk->ev == event && IS_FLOATING_EVENT(walk)) {
843 			ev = walk;
844 			break;
845 		}
846 	}
847 #endif
848 
849 	if (!ev)
850 		return -1;
851 
852 	if (!ev->is_in_raiseq) {
853 		ev->is_in_raiseq = true;
854 		EVENT_STACK_PUSH(&g_evstack, ev);
855 	}
856 
857 	return 0;
858 }
859 /*----------------------------------------------------------------------------*/
860 int
861 mtcp_raise_event(mctx_t mctx, event_t event)
862 {
863 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
864 	if (!mtcp)
865 		return -1;
866 
867 	return RaiseEv(mtcp->ev_store, event);
868 }
869 /*----------------------------------------------------------------------------*/
870 #define PRINT_EV(event, field)						\
871 	printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \
872 	       "last: %ld, parent: %ld, ft: %p, cb: %p\n",		\
873 	       #field, (event), (event)->ev,				\
874 	       (event)->field.tn_older   ? (event)->field.tn_older->ev   : -1, \
875 	       (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \
876 	       (event)->field.tn_first   ? (event)->field.tn_first->ev   : -1, \
877 	       (event)->field.tn_last    ? (event)->field.tn_last->ev    : -1, \
878 	       (event)->field.tn_parent  ? (event)->field.tn_parent->ev  : -1, \
879 	       (event)->ft, (event)->cb)
880 inline void
881 HandleCb(mctx_t const mctx, int sock, int side,
882 	 stree_t * const stree, event_t events)
883 {
884 	int i;
885 	bool proceed;
886 	tree_node_t *walk, *bev, *ev;
887 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue;
888 	assert(stree);
889 
890 	g_cur_stree = stree;
891 
892 	for (i = 0; i < NUM_BEV; i++) {
893 		if (!((1L << i) & events))
894 			continue;
895 
896 		if (!(bev = stree->bevs[i]))
897 			continue;
898 
899 		EVENT_STACK_CLR(&g_evstack);
900 		tree_node_t **ptr = g_evstack.sp;
901 		TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) {
902 			//PRINT_EV(walk, link);
903 			//PRINT_EV(walk, invk);
904 
905 			g_cur_ev = walk;
906 
907 			if (walk != bev && walk->ft &&
908 			    walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) {
909 				ptr = g_evstack.sp;
910 				proceed = false;
911 				continue;
912 			}
913 
914 			if (walk->cb)
915 				walk->cb(mctx, sock, side, walk->ev, &walk->arg);
916 
917 			while (ptr < g_evstack.sp) {
918 				ptr++;
919 				TREE_INSERT_CHILD(walk, *ptr, invk);
920 			}
921 		}
922 		g_cur_ev = NULL;
923 
924 		while ((ev = EVENT_STACK_POP(&g_evstack))) {
925 			ev->is_in_raiseq = false;
926 			TREE_DETACH(ev, invk);
927 		}
928 
929 		if (!(events &= ~(1L << i))) {
930 			g_cur_stree = NULL;
931 			return;
932 		}
933 	}
934 
935 	g_cur_stree = NULL;
936 }
937 /*----------------------------------------------------------------------------*/
938 int
939 mtcp_register_callback(mctx_t mctx, int sockid, event_t event,
940 					   int hook_point, callback_t callback)
941 {
942 	socket_map_t socket;
943 	stree_t **pstree;
944 
945 	if (event == MOS_NULL_EVENT ||
946 	    (event < (1L << NUM_BEV) && !POWER_OF_TWO(event)))
947 		return -1;
948 
949 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
950 	if (!mtcp)
951 		return -1;
952 
953 	socket = &mtcp->msmap[sockid];
954 
955 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
956 		if (hook_point == MOS_NULL)
957 			pstree = &socket->monitor_stream->stree_dontcare;
958 		else if (hook_point == MOS_HK_RCV)
959 			pstree = &socket->monitor_stream->stree_pre_rcv;
960 		else if (hook_point == MOS_HK_SND)
961 			pstree = &socket->monitor_stream->stree_post_snd;
962 		else
963 			return -1;
964 
965 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM
966 		   || socket->socktype == MOS_SOCK_MONITOR_RAW) {
967 		if (hook_point == MOS_NULL)
968 			pstree = &socket->monitor_listener->stree_dontcare;
969 		else if (hook_point == MOS_HK_RCV)
970 			pstree = &socket->monitor_listener->stree_pre_rcv;
971 		else if (hook_point == MOS_HK_SND)
972 			pstree = &socket->monitor_listener->stree_post_snd;
973 		else
974 			return -1;
975 	} else {
976 		errno = EINVAL;
977 		return -1;
978 	}
979 
980 	return ModCb(mtcp->ev_store, pstree, event, callback);
981 }
982 /*----------------------------------------------------------------------------*/
983 int
984 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event,
985 			 int hook_point)
986 {
987 	return mtcp_register_callback(mctx, sockid, event, hook_point, NULL);
988 }
989 /*----------------------------------------------------------------------------*/
990 inline void
991 HandleCallback(mtcp_manager_t mtcp, uint32_t hook,
992 	       socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events)
993 {
994 	struct sfbpf_program fcode;
995 
996 	if (!socket)
997 		return;
998 
999 	if (!events)
1000 		return;
1001 	assert(events);
1002 
1003 	/* if client side monitoring is disabled, then skip */
1004 	if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0)
1005 		return;
1006 	/* if server side monitoring is disabled, then skip */
1007 	else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0)
1008 		return;
1009 
1010 #define MSTRM(sock) (sock)->monitor_stream
1011 #define MLSNR(sock) (sock)->monitor_listener
1012 	/* We use `?:` notation instead of `if/else` to make `evp` as const */
1013 	stree_t * const stree =
1014 		(socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ?
1015 		((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv :
1016 		 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd :
1017 		 MSTRM(socket)->stree_dontcare)
1018 		: (socket->socktype == MOS_SOCK_MONITOR_STREAM)
1019 		|| (socket->socktype == MOS_SOCK_MONITOR_RAW) ?
1020 		((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv :
1021 		 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd :
1022 		 MLSNR(socket)->stree_dontcare) :
1023 		NULL;
1024 
1025 	if (!stree)
1026 		return;
1027 
1028 	/* mtcp_bind_monitor_filter()
1029 	   - BPF filter is evaluated only for RAW socket and PASSIVE
1030 	   socket (= orphan filter)
1031 	   - stream syn filter is moved to and evaluated on socket creation
1032 	*/
1033 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
1034 		fcode = MLSNR(socket)->stream_orphan_fcode;
1035 		/* if not match with filter, return */
1036 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1037 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1038 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1039 			return;
1040 	}
1041 	if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
1042 		fcode = MLSNR(socket)->raw_pkt_fcode;
1043 		/* if not match with filter, return */
1044 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
1045 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
1046 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
1047 			return;
1048 	}
1049 
1050 	mctx_t const mctx = g_ctx[mtcp->ctx->cpu];
1051 	mtcp->pctx = pctx; /* for mtcp_getcurpkt() */
1052 
1053 	HandleCb(mctx, socket->id, side, stree, events);
1054 }
1055 /*----------------------------------------------------------------------------*/
1056 #endif
1057