1 #ifdef NEWEV
2 
3 /* ----------------------------------------------------------------------------
4  * Please see 4 Jul. 2015 meeting slides for conceptual explanation.
5  * 'Subtree' has special meaning and its 'ID' is very important concept for
6  * understanding this code.
7  *
8  * Please contact Donghwi Kim for further questions,
9  * via e-mail: [email protected] */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tree.h>
15 #include <key_value_store.h>
16 #include "scalable_event.h"
17 
18 #include "mtcp.h"
19 #include "debug.h"
20 #include "mos_api.h"
21 #include "mtcp_api.h"
22 
23 #define NEWHASH
24 #ifdef NEWHASH
25 #include "mtcp_util.h"
26 #define XXHSEED 18446744073709551557ULL
27 #endif
28 /*----------------------------------------------------------------------------*/
29 #define POWER_OF_TWO(x) (!((x - 1) & (x)))
30 
31 #define MAX_EVENTS  30240
32 #define KVS_BUCKETS 1024
33 #define KVS_ENTRIES 102400
34 #define MAX_DEPTH   MAX_EVENTS
35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb))
36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV))
37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e))
38 
39 struct _tree_node_t *g_evroot;
40 event_t g_evid;
41 
42 static __thread stree_t *g_cur_stree = NULL;
43 static __thread tree_node_t *g_cur_ev = NULL;
44 
45 static __thread struct {
46 	struct _tree_node_t *stack[MAX_EVENTS];
47 	struct _tree_node_t **sp;
48 } g_evstack;
49 
50 #define free(x) do { \
51 	assert(x); \
52 	free(x); \
53 } while (0)
54 /*----------------------------------------------------------------------------*/
55 /** Create a new tree node
56  * @return the new tree node
57  * */
58 inline tree_node_t *
59 tree_node_new()
60 {
61 	return calloc(1, sizeof(tree_node_t));
62 }
63 /*----------------------------------------------------------------------------*/
64 /** Delete a tree node */
65 inline void
66 tree_node_del(tree_node_t *node)
67 {
68 	free(node);
69 }
70 /*----------------------------------------------------------------------------*/
71 /** Delete a tree node and all of its descendants
72  * @param [in] node: target node
73  * */
74 inline void
75 tree_del_recursive(tree_node_t *node)
76 {
77 	struct _tree_node_t *walk = NULL, *del = NULL;
78 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
79 
80 	TREE_DFS_FOREACH(walk, node, &stack, link) {
81 		if (del)
82 			tree_node_del(del);
83 		del = walk;
84 	}
85 	if (del)
86 		tree_node_del(del);
87 }
88 /*----------------------------------------------------------------------------*/
89 /** Search a tree node by event ID
90  * @param [in] root: root node
91  * @param [in] ev: event ID
92  * @return target node
93  *
94  * The search is in DFS mannar
95  * */
96 inline tree_node_t *
97 tree_search(tree_node_t *root, event_t ev)
98 {
99 	struct _tree_node_t *walk = NULL;
100 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
101 
102 	TREE_DFS_FOREACH(walk, root, &stack, link)
103 		if (walk->ev == ev) {
104 			return walk;
105 		}
106 	return NULL;
107 }
108 /*----------------------------------------------------------------------------*/
109 /** Create a new subtree
110  * @param [in] id: subtree ID
111  * @param [in] root: root of the subtree
112  * @return new subtree
113  *
114  * Allocate and initialize subtree structure, stree_t:
115  *   1. Allocate the structure.
116  *   2. Initialize reference counter and fill ID field.
117  *   3. Set 'root' pointer which points the root node of a tree.
118  *      The tree should contain all required tree nodes.
119  *   4. Set built-in event tree node pointers.
120  * */
121 inline stree_t *
122 stree_create(uint64_t id, fevq_t *fevq, tree_node_t *root)
123 {
124 	int i;
125 	tree_node_t *bev;
126 	stree_t *nstree = malloc(sizeof(stree_t));
127 	if (!nstree)
128 		return NULL;
129 
130 	nstree->ref_cnt = 0;
131 	nstree->id = id;
132 	nstree->root = root;
133 	for (i = 0; i < NUM_BEV; i++)
134 		nstree->bevs[i] = NULL;
135 
136 	for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) {
137 		int idx = 0;
138 		while (bev->ev >> (idx + 1))
139 			idx++;
140 		nstree->bevs[idx] = bev;
141 	}
142 
143 	nstree->floating_evq = *fevq;
144 
145 	return nstree;
146 }
147 /*----------------------------------------------------------------------------*/
148 /** Destroy the subtree
149  * @param [in] stree: the target subtree */
150 inline void
151 stree_destroy(kvs_t *store, stree_t *stree)
152 {
153 	tree_node_t *w;
154 	TAILQ_FOREACH(w, &stree->floating_evq, flink)
155 		kvs_remove(store, w->key);
156 	tree_del_recursive(stree->root);
157 	free(stree);
158 }
159 /*----------------------------------------------------------------------------*/
160 /** Increase reference counter of the subtree
161  * @param [in] stree: the target subtree */
162 inline void
163 stree_inc_ref(stree_t *stree)
164 {
165 	stree->ref_cnt++;
166 }
167 /*----------------------------------------------------------------------------*/
168 /** Decrease reference counter of the subtree, and remove it from the key-
169  * value-store if needed
170  * @param [in] store: the key-value-store
171  * @param [in] stree: the target subtree
172  *
173  * In addition to decreasing the reference counter, remove the subtree from
174  * key-value-store if decreased reference counter is zero.
175  * */
176 inline void
177 stree_dec_ref(kvs_t *store, stree_t *stree)
178 {
179 	if (stree && !--stree->ref_cnt) {
180 		kvs_remove(store, stree->id);
181 		stree_destroy(store, stree);
182 	}
183 }
184 /*----------------------------------------------------------------------------*/
185 inline void
186 stree_reg_floating_event(kvs_t *store, uint64_t id,
187 						 fevq_t *fevq, tree_node_t *ev)
188 {
189 	struct {
190 		event_t ev;
191 		uint64_t sid;
192 	} instance;
193 
194 	instance.ev = ev->ev;
195 	instance.sid = id;
196 
197 	ev->key = XXH64(&instance, sizeof(instance), XXHSEED);
198 
199 	kvs_insert(store, ev->key, ev);
200 
201 	TAILQ_INSERT_TAIL(fevq, ev, flink);
202 }
203 /*----------------------------------------------------------------------------*/
204 /** Hash the event ID and callback pair
205  * @param [in] ev: the event ID
206  * @param [in] cb: the callback function pointer
207  * @return hash value
208  *
209  * TODO: Replace 32-bit Jenkins hash with better one.
210  * */
211 inline static uint64_t
212 hash64(event_t ev, callback_t cb)
213 {
214 #ifdef NEWHASH
215 	struct {
216 		event_t ev;
217 		callback_t cb;
218 	} instance;
219 
220 	instance.ev = ev;
221 	instance.cb = cb;
222 
223 	return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED);
224 #else
225 	uint64_t hash = 0;
226 	int i;
227 
228 	for (i = 0; i < sizeof(ev); ++i) {
229 		hash += ((uint8_t *)&ev)[i];
230 		hash += (hash << 10);
231 		hash ^= (hash >> 6);
232 	}
233 	for (i = 0; i < sizeof(cb); ++i) {
234 		hash += ((uint8_t *)&cb)[i];
235 		hash += (hash << 10);
236 		hash ^= (hash >> 6);
237 	}
238 	hash += (hash << 3);
239 	hash ^= (hash >> 11);
240 	hash += (hash << 15);
241 
242 	return hash;
243 #endif
244 }
245 /*----------------------------------------------------------------------------*/
246 /** Register a callback
247  * @param [in] store: the key-value-store for subtrees
248  * @param [in, out] pstree: the target subtree pointer
249  * @param [in] ev: the target event
250  * @param [in] cb: the callback to be registered
251  * @return 0 for success, -1 for failure
252  *
253  * For callback registeration, there can be some cases.
254  *   1. A new tree what we want is already there, so we can use it by simply
255  *      updating reference counters.
256  *   2. A new tree what we want is not there, so we are going to create a
257  *      new subtree. The new subtree will not only have new callback, but also
258  *      inherit previous subtree.
259  *      2.1. The reference counter of previous subtree is 1, so we can avoid
260  *           new tree allocation by reusing tree nodes of previous subtree.
261  *      2.2. The reference counter of previous subtree is not 1, so we should
262  *           copy previous subtree's tree nodes.
263  *   3. Previously, there was no subtree (in other words, this is the first
264  *      callback registeration) so we are going to make a new subtree which
265  *      has only one callback
266  * */
267 inline int
268 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
269 {
270 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
271 	stree_t *stree, *nstree;
272 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
273 	tree_node_t *w, *w2, *w3;
274 	uint64_t id = 0, nid;
275 	fevq_t fevq;
276 
277 	TAILQ_INIT(&fevq);
278 
279 	if ((stree = *pstree)) {
280 		/* XXX: tree_search() is heavy operation, but it is used for only error
281 		 * checking. Is there any way to avoid this?
282 		 * --> We can have a key-value-store for this, but is it too much? */
283 #if 1
284 		if (tree_search(stree->root, ev))
285 			/* Illegal trial of overwriting a previously registerd callback */
286 			return -1;
287 #endif
288 
289 		id = stree->id;
290 	}
291 
292 	/* calculate new subtree ID */
293 	nid = NEWID(id, ev, cb);
294 
295 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
296 			nid, id, ev, cb);
297 
298 	if ((nstree = kvs_search(store, nid))) {
299 		/* case 1. */
300 		TRACE_DBG("Case 1\n");
301 		/* update reference counters */
302 		if (stree)
303 			stree_dec_ref(store, stree);
304 
305 		nstree->ref_cnt++;
306 		*pstree = nstree;
307 	} else if (stree) {
308 		/* case 2. */
309 		TRACE_DBG("Case 2\n");
310 		tree_node_t *ntree = NULL, *sptr;
311 
312 		w = tree_search(g_evroot, ev);
313 		assert(w);
314 
315 		/* create spine: Create a new tree which consists of a tree node with
316 		 * new callback and the tree node's ancestors. */
317 		do {
318 			tree_node_t *ntn;
319 			if (!(ntn = tree_node_new())) {
320 				if (ntree)
321 					tree_del_recursive(ntree);
322 				return -1;
323 			}
324 			ntn->ft = w->ft;
325 			ntn->cb = cb;
326 			ntn->ev = w->ev;
327 			ntn->arg = w->arg;
328 			if (cb)
329 				cb = NULL;
330 
331 			if (IS_FLOATING_EVENT(ntn))
332 				stree_reg_floating_event(store, nid, &fevq, ntn);
333 
334 			TREE_INIT_NODE(ntn, link);
335 			TREE_INIT_NODE(ntn, invk);
336 			if (ntree) {
337 				TREE_INSERT_CHILD(ntn, ntree, link);
338 				if (!IS_FLOATING_EVENT(ntree))
339 					TREE_INSERT_CHILD(ntn, ntree, invk);
340 			}
341 			ntree = ntn;
342 		} while ((w = TREE_PARENT(w, link)));
343 
344 		/* attach flesh: Inherit previous subtree */
345 		sptr = ntree;
346 		if (stree->ref_cnt == 1) {
347 			/* case 2.1. */
348 			TRACE_DBG("Case 2.1\n");
349 			w = stree->root;
350 			while (w) {
351 				tree_node_t *next_walk = NULL,
352 							*next_walk2 = NULL,
353 							*next_sptr = TREE_FIRSTBORN(sptr, link);
354 
355 				for (w2 = TREE_FIRSTBORN(w, link);
356 					 w2 != NULL;
357 					 w2 = next_walk2) {
358 					next_walk2 = TREE_YOUNGER(w2, link);
359 					if (sptr && w2->ev == next_sptr->ev) {
360 
361 						assert(next_walk == NULL);
362 
363 						next_walk = w2;
364 
365 						assert(next_sptr->ev == next_walk->ev);
366 						if (next_sptr->cb != next_walk->cb)
367 							next_sptr->cb = next_walk->cb;
368 					} else {
369 						TREE_DETACH(w2, link);
370 						TREE_DFS_FOREACH(w3, w2, &stack, link)
371 							if (IS_FLOATING_EVENT(w3)) {
372 								kvs_remove(store, w3->key);
373 								stree_reg_floating_event(store, nid, &fevq, w3);
374 							}
375 						if (!IS_FLOATING_EVENT(w2))
376 							TREE_DETACH(w2, invk);
377 						if (next_walk) {
378 							TREE_INSERT_CHILD(sptr, w2, link);
379 							if (!IS_FLOATING_EVENT(w2))
380 								TREE_INSERT_CHILD(sptr, w2, invk);
381 						} else {
382 							tree_node_t *f = TREE_FIRSTBORN(sptr, link);
383 							TREE_INSERT_OLDER(f, w2, link);
384 							if (!IS_FLOATING_EVENT(w2)) {
385 								if ((f = TREE_FIRSTBORN(sptr, invk)))
386 									TREE_INSERT_OLDER(f, w2, invk);
387 								else
388 									TREE_INSERT_CHILD(sptr, w2, invk);
389 							}
390 						}
391 					}
392 				}
393 				w = next_walk;
394 				sptr = next_sptr;
395 				next_walk = NULL;
396 			}
397 		} else {
398 			/* case 2.2. */
399 			TRACE_DBG("Case 2.2\n");
400 			TREE_DFS_FOREACH(w, stree->root, &stack, link) {
401 				tree_node_t *ntn, *ptn;
402 
403 				if (sptr && w->ev == sptr->ev) {
404 					if (w->cb != sptr->cb)
405 						sptr->cb = w->cb;
406 					sptr = TREE_FIRSTBORN(sptr, link);
407 					continue;
408 				}
409 
410 				if (!(ntn = tree_node_new())) {
411 					if (ntree)
412 						tree_del_recursive(ntree);
413 					return -1;
414 				}
415 				ntn->ft = w->ft;
416 				ntn->cb = w->cb;
417 				ntn->ev = w->ev;
418 				ntn->arg = w->arg;
419 
420 				if (IS_FLOATING_EVENT(ntn))
421 					stree_reg_floating_event(store, nid, &fevq, ntn);
422 
423 				TREE_INIT_NODE(ntn, link);
424 				TREE_INIT_NODE(ntn, invk);
425 
426 				if (TREE_PARENT(w, link)) {
427 					ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
428 
429 					assert(ptn);
430 
431 					TREE_INSERT_CHILD(ptn, ntn, link);
432 					if (!IS_FLOATING_EVENT(ntn))
433 						TREE_INSERT_CHILD(ptn, ntn, invk);
434 				} else
435 					assert(0);
436 			}
437 		}
438 
439 		nstree = stree_create(nid, &fevq, ntree);
440 
441 		assert(nstree);
442 
443 		nstree->ref_cnt = 1;
444 
445 		kvs_insert(store, nid, nstree);
446 
447 		/* update reference counters */
448 		stree_dec_ref(store, stree);
449 
450 		*pstree = nstree;
451 	} else /* if (!stree) */ {
452 		/* case 3. */
453 		TRACE_DBG("Case 3\n");
454 		tree_node_t *ntree = NULL;
455 
456 		w = tree_search(g_evroot, ev);
457 		assert(w);
458 
459 		/* create spine: Create a new tree which consists of a tree node with
460 		 * new callback and the tree node's ancestors. */
461 		do {
462 			tree_node_t *ntn;
463 			if (!(ntn = tree_node_new())) {
464 				if (ntree)
465 					tree_del_recursive(ntree);
466 				return -1;
467 			}
468 			ntn->ft = w->ft;
469 			ntn->cb = cb;
470 			ntn->ev = w->ev;
471 			ntn->arg = w->arg;
472 			if (cb)
473 				cb = NULL;
474 
475 			if (IS_FLOATING_EVENT(ntn))
476 				stree_reg_floating_event(store, nid, &fevq, ntn);
477 
478 			TREE_INIT_NODE(ntn, link);
479 			TREE_INIT_NODE(ntn, invk);
480 			if (ntree) {
481 				TREE_INSERT_CHILD(ntn, ntree, link);
482 				if (!IS_FLOATING_EVENT(ntree))
483 					TREE_INSERT_CHILD(ntn, ntree, invk);
484 			}
485 			ntree = ntn;
486 		} while ((w = TREE_PARENT(w, link)));
487 
488 		nstree = stree_create(nid, &fevq, ntree);
489 
490 		assert(nstree);
491 
492 		nstree->ref_cnt = 1;
493 
494 		kvs_insert(store, nid, nstree);
495 
496 		*pstree = nstree;
497 	}
498 
499 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
500 	return 0;
501 }
502 /*----------------------------------------------------------------------------*/
503 /** Unregister a callback
504  * @param [in] store: the key-value-store for subtrees
505  * @param [in, out] pstree: the target subtree pointer
506  * @param [in] ev: the target event
507  * @return 0 for success, -1 for failure
508  *
509  * For callback unregisteration, there can be some cases.
510  *   1. No registered callback will be left after this unregistration.
511  *   2. A new tree what we want is already there, so we can use it by simply
512  *      updating reference counters.
513  *   3. A new tree what we want is not there, so we create a new subtree by
514  *      copying a part of previous subtree
515  * */
516 inline int
517 UnregCb(kvs_t *store, stree_t **pstree, event_t ev)
518 {
519 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
520 	stree_t *stree, *nstree;
521 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
522 	tree_node_t *w, *target;
523 	fevq_t fevq;
524 	uint64_t id = 0, nid;
525 	callback_t cb;
526 
527 	if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) ||
528 		!(cb = target->cb))
529 		/* Illegal trial of unregistering a callback which is never registered
530 		 * before. */
531 		return -1;
532 
533 	id = stree->id;
534 
535 	/* Calculate new subtree ID */
536 	nid = NEWID(id, ev, cb);
537 
538 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
539 			nid, id, ev, cb);
540 
541 	if (nid == 0) {
542 		/* case 1. */
543 		TRACE_DBG("Case 1\n");
544 		/* update reference counters */
545 		if (stree)
546 			stree_dec_ref(store, stree);
547 
548 		*pstree = NULL;
549 	} else if ((nstree = kvs_search(store, nid))) {
550 		/* case 2. */
551 		TRACE_DBG("Case 2\n");
552 		/* update reference counters */
553 		if (stree)
554 			stree_dec_ref(store, stree);
555 
556 		nstree->ref_cnt++;
557 		*pstree = nstree;
558 	} else if (stree) {
559 		/* case 3. */
560 		TRACE_DBG("Case 3\n");
561 		bool proceed;
562 		tree_node_t *ntree = NULL, *sptr;
563 
564 		sptr = TREE_PARENT(target, link);
565 #define TREE_IS_ONLY_CHILD(root, x, field) \
566 		(TREE_PARENT((x), field) ? \
567 		 TREE_PARENT((x), field)->field.tn_first == (x) && \
568 		 TREE_PARENT((x), field)->field.tn_last == (x) : \
569 		 (x) == (root) && (x)->field.tn_younger == NULL)
570 		while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link))
571 			sptr = TREE_PARENT(sptr, link);
572 
573 		assert(sptr);
574 		/* 'sptr == NULL' means the tree will lose the only callback.
575 		 * This case should be handled by 'Case 1' */
576 
577 		TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) {
578 			tree_node_t *ntn, *ptn;
579 
580 			if (w == sptr)
581 				proceed = false;
582 
583 			if (!(ntn = tree_node_new())) {
584 				/* free incomplete ntree */
585 				if (ntree)
586 					tree_del_recursive(ntree);
587 				return -1;
588 			}
589 			ntn->ft = w->ft;
590 			ntn->cb = w->cb;
591 			ntn->ev = w->ev;
592 			ntn->arg = w->arg;
593 
594 			if (IS_FLOATING_EVENT(ntn))
595 				stree_reg_floating_event(store, nid, &fevq, ntn);
596 
597 			TREE_INIT_NODE(ntn, link);
598 			TREE_INIT_NODE(ntn, invk);
599 
600 			if (TREE_PARENT(w, link)) {
601 				assert(ntree);
602 				ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
603 
604 				assert(ptn);
605 
606 				TREE_INSERT_CHILD(ptn, ntn, link);
607 				if (!IS_FLOATING_EVENT(ntn))
608 					TREE_INSERT_CHILD(ptn, ntn, invk);
609 			} else
610 				/* this is the root node */
611 				ntree = ntn;
612 		}
613 
614 		nstree = stree_create(nid, &fevq, ntree);
615 
616 		assert(nstree);
617 
618 		nstree->ref_cnt = 1;
619 
620 		kvs_insert(store, nid, nstree);
621 
622 		/* update reference counters */
623 		stree_dec_ref(store, stree);
624 
625 		*pstree = nstree;
626 	} else /* if (!stree) */
627 		return -1;
628 
629 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
630 	return 0;
631 }
632 /*----------------------------------------------------------------------------*/
633 inline int
634 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
635 {
636 	assert(*pstree || cb);
637 
638 	/* Event ID starts from 1 */
639 	if (ev == 0)
640 		return -1;
641 
642 	if (cb)
643 		return RegCb(store, pstree, ev, cb);
644 	else
645 		return UnregCb(store, pstree, ev);
646 }
647 /*----------------------------------------------------------------------------*/
648 void
649 GlobInitEvent(void)
650 {
651 	int i;
652 
653 	if (!(g_evroot = tree_node_new())) {
654 		perror("FATAL: Failed to allocate essentials for the system\n");
655 		exit(0);
656 	}
657 	g_evroot->ev = 0;
658 	g_evroot->ft = NULL;
659 	g_evroot->cb = NULL;
660 	TREE_INIT_NODE(g_evroot, link);
661 	TREE_INIT_NODE(g_evroot, invk);
662 	for (i = 0; i < NUM_BEV; i++) {
663 		tree_node_t *ntn;
664 		if (!(ntn = tree_node_new())) {
665 			perror("FATAL: Failed to allocate essentials for the system\n");
666 			exit(0);
667 		}
668 		ntn->ev = 1 << i;
669 		ntn->ft = NULL;
670 		ntn->cb = NULL;
671 		TREE_INIT_NODE(ntn, link);
672 		TREE_INIT_NODE(ntn, invk);
673 		TREE_INSERT_CHILD(g_evroot, ntn, link);
674 		TREE_INSERT_CHILD(g_evroot, ntn, invk);
675 	}
676 	g_evid = 1 <<  NUM_BEV;
677 }
678 /*----------------------------------------------------------------------------*/
679 void
680 InitEvent(mtcp_manager_t mtcp)
681 {
682 	mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES);
683 }
684 /*----------------------------------------------------------------------------*/
685 event_t
686 mtcp_alloc_event(event_t event)
687 {
688 	tree_node_t *parent, *new;
689 	if (!(parent = tree_search(g_evroot, event)))
690 		return MOS_NULL_EVENT;
691 
692 	if ((new = tree_node_new()) == NULL)
693 		return MOS_NULL_EVENT;
694 
695 	new->ev = g_evid++;
696 	new->ft = NULL;
697 	new->cb = NULL;
698 	new->arg.arg = NULL;
699 	new->arg.len = 0;
700 
701 	TREE_INIT_NODE(new, link);
702 	TREE_INIT_NODE(new, invk);
703 
704 	TREE_INSERT_CHILD(parent, new, link);
705 
706 	return new->ev;
707 }
708 /*----------------------------------------------------------------------------*/
709 event_t
710 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg)
711 {
712 	tree_node_t *parent, *new, *walk;
713 
714 	if (!filter)
715 		return MOS_NULL_EVENT;
716 	if (arg && arg->len > 0 && !arg->arg)
717 		return MOS_NULL_EVENT;
718 	if (!(parent = tree_search(g_evroot, event)))
719 		return MOS_NULL_EVENT;
720 	for (walk = TREE_FIRSTBORN(parent, link);
721 		 walk;
722 		 walk = TREE_YOUNGER(walk, link))
723 		if ((walk->ft == filter) &&
724 			((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) ||
725 			 (walk->arg.len == arg->len &&
726 			  memcmp(walk->arg.arg, arg->arg, arg->len) == 0)))
727 			return walk->ev;
728 
729 	if ((new = tree_node_new()) == NULL)
730 		return MOS_NULL_EVENT;
731 
732 	new->ev = g_evid++;
733 	new->ft = filter;
734 	new->cb = NULL;
735 	if (!arg || !arg->arg || arg->len <= 0) {
736 		new->arg.arg = NULL;
737 		new->arg.len = 0;
738 	} else {
739 		new->arg.arg = malloc(arg->len);
740 		memcpy(new->arg.arg, arg->arg, arg->len);
741 		new->arg.len = arg->len;
742 	}
743 
744 	TREE_INIT_NODE(new, link);
745 	TREE_INIT_NODE(new, invk);
746 
747 	TRACE_DBG("parent: %ld\n", parent->ev);
748 	TREE_INSERT_CHILD(parent, new, link);
749 	TREE_INSERT_CHILD(parent, new, invk);
750 
751 	return new->ev;
752 }
753 /*----------------------------------------------------------------------------*/
754 #define EVENT_STACK_PUSH(s, var) \
755 	(assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \
756 	 *(++((s)->sp)) = (var))
757 #define EVENT_STACK_POP(s) \
758 	(*(((s)->sp)--))
759 #define EVENT_STACK_CLR(s) \
760 	((s)->stack[0] = NULL, (s)->sp = (s)->stack)
761 
762 inline int
763 RaiseEv(kvs_t *store, event_t event)
764 {
765 	tree_node_t *ev = NULL;
766 #if 1
767 	struct {
768 		event_t ev;
769 		uint64_t sid;
770 	} instance;
771 
772 	instance.ev = event;
773 	instance.sid = g_cur_stree->id;
774 
775 	_key_t key = XXH64(&instance, sizeof(instance), XXHSEED);
776 	ev = (tree_node_t *)kvs_search(store, key);
777 #else
778 	tree_node_t *walk;
779 	for (walk = TREE_FIRSTBORN(g_cur_ev, link);
780 		 walk;
781 		 walk = TREE_YOUNGER(walk, link)) {
782 		if (walk->ev == event && IS_FLOATING_EVENT(walk)) {
783 			ev = walk;
784 			break;
785 		}
786 	}
787 #endif
788 
789 	if (!ev)
790 		return -1;
791 
792 	if (!ev->is_in_raiseq) {
793 		ev->is_in_raiseq = true;
794 		EVENT_STACK_PUSH(&g_evstack, ev);
795 	}
796 
797 	return 0;
798 }
799 int
800 mtcp_raise_event(mctx_t mctx, event_t event)
801 {
802 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
803 	if (!mtcp)
804 		return -1;
805 
806 	return RaiseEv(mtcp->ev_store, event);
807 }
808 /*----------------------------------------------------------------------------*/
809 #define PRINT_EV(event, field) \
810 	printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \
811 			"last: %ld, parent: %ld, ft: %p, cb: %p\n", \
812 		  #field, (event), (event)->ev, \
813 		  (event)->field.tn_older ? (event)->field.tn_older->ev : -1, \
814 		  (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \
815 		  (event)->field.tn_first ? (event)->field.tn_first->ev : -1, \
816 		  (event)->field.tn_last ? (event)->field.tn_last->ev : -1, \
817 		  (event)->field.tn_parent ? (event)->field.tn_parent->ev : -1, \
818 		  (event)->ft, (event)->cb)
819 inline void
820 HandleCb(mctx_t const mctx, int sock, int side,
821 		 stree_t * const stree, event_t events)
822 {
823 	int i;
824 	bool proceed;
825 	tree_node_t *walk, *bev, *ev;
826 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue;
827 	assert(stree);
828 
829 	g_cur_stree = stree;
830 
831 	for (i = 0; i < NUM_BEV; i++) {
832 		if (!((1L << i) & events))
833 			continue;
834 
835 		if (!(bev = stree->bevs[i]))
836 			continue;
837 
838 		EVENT_STACK_CLR(&g_evstack);
839 		tree_node_t **ptr = g_evstack.sp;
840 		TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) {
841 			//PRINT_EV(walk, link);
842 			//PRINT_EV(walk, invk);
843 
844 			g_cur_ev = walk;
845 
846 			if (walk != bev && walk->ft &&
847 				walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) {
848 				ptr = g_evstack.sp;
849 				proceed = false;
850 				continue;
851 			}
852 
853 			if (walk->cb)
854 				walk->cb(mctx, sock, side, walk->ev, &walk->arg);
855 
856 			while (ptr < g_evstack.sp) {
857 				ptr++;
858 				TREE_INSERT_CHILD(walk, *ptr, invk);
859 			}
860 		}
861 		g_cur_ev = NULL;
862 
863 		while ((ev = EVENT_STACK_POP(&g_evstack))) {
864 			ev->is_in_raiseq = false;
865 			TREE_DETACH(ev, invk);
866 		}
867 
868 		if (!(events &= ~(1L << i))) {
869 			g_cur_stree = NULL;
870 			return;
871 		}
872 	}
873 
874 	g_cur_stree = NULL;
875 }
876 /*----------------------------------------------------------------------------*/
877 int
878 mtcp_register_callback(mctx_t mctx, int sockid, event_t event,
879 		                       int hook_point, callback_t callback)
880 {
881 	socket_map_t socket;
882 	stree_t **pstree;
883 
884 	if (event == MOS_NULL_EVENT ||
885 		event < (1L << NUM_BEV) ? !POWER_OF_TWO(event) : 0)
886 		return -1;
887 
888 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
889 	if (!mtcp)
890 		return -1;
891 
892 	socket = &mtcp->msmap[sockid];
893 
894 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
895 		if (hook_point == MOS_NULL)
896 			pstree = &socket->monitor_stream->stree_dontcare;
897 		else if (hook_point == MOS_HK_RCV)
898 			pstree = &socket->monitor_stream->stree_pre_rcv;
899 		else if (hook_point == MOS_HK_SND)
900 			pstree = &socket->monitor_stream->stree_post_snd;
901 		else
902 			return -1;
903 
904 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM
905 			|| socket->socktype == MOS_SOCK_MONITOR_RAW) {
906 		if (hook_point == MOS_NULL)
907 			pstree = &socket->monitor_listener->stree_dontcare;
908 		else if (hook_point == MOS_HK_RCV)
909 			pstree = &socket->monitor_listener->stree_pre_rcv;
910 		else if (hook_point == MOS_HK_SND)
911 			pstree = &socket->monitor_listener->stree_post_snd;
912 		else
913 			return -1;
914 	} else {
915 		errno = EINVAL;
916 		return -1;
917 	}
918 
919 	return ModCb(mtcp->ev_store, pstree, event, callback);
920 }
921 /*----------------------------------------------------------------------------*/
922 int
923 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event,
924 		                       int hook_point)
925 {
926 	return mtcp_register_callback(mctx, sockid, event, hook_point, NULL);
927 }
928 /*----------------------------------------------------------------------------*/
929 inline void
930 HandleCallback(mtcp_manager_t mtcp, uint32_t hook,
931 		socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events)
932 {
933 	struct sfbpf_program fcode;
934 
935 	if (!socket)
936 		return;
937 
938 	if (!events)
939 		return;
940 	assert(events);
941 
942 	/* if client side monitoring is disabled, then skip */
943 	if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0)
944 		return;
945 	/* if server side monitoring is disabled, then skip */
946 	else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0)
947 		return;
948 
949 #define MSTRM(sock) (sock)->monitor_stream
950 #define MLSNR(sock) (sock)->monitor_listener
951 	/* We use `?:` notation instead of `if/else` to make `evp` as const */
952 	stree_t * const stree =
953 		(socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ?
954 		    ((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv :
955 			 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd :
956 									  MSTRM(socket)->stree_dontcare)
957 		: (socket->socktype == MOS_SOCK_MONITOR_STREAM)
958 		  || (socket->socktype == MOS_SOCK_MONITOR_RAW) ?
959 		    ((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv :
960 			 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd :
961 									  MLSNR(socket)->stree_dontcare) :
962 		 NULL;
963 
964 	if (!stree)
965 		return;
966 
967 	/* mtcp_bind_monitor_filter()
968      * - BPF filter is evaluated only for RAW socket and PASSIVE socket (= orphan filter)
969 	 * - stream syn filter is moved to and evaluated on socket creation */
970 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
971 		fcode = MLSNR(socket)->stream_orphan_fcode;
972 		/* if not match with filter, return */
973 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
974 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
975 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
976 			return;
977 	}
978     if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
979 		fcode = MLSNR(socket)->raw_pkt_fcode;
980 		/* if not match with filter, return */
981 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
982 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
983 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
984 			return;
985 	}
986 
987 	mctx_t const mctx = g_ctx[mtcp->ctx->cpu];
988 	mtcp->pctx = pctx; /* for mtcp_getcurpkt() */
989 
990 	HandleCb(mctx, socket->id, side, stree, events);
991 }
992 /*----------------------------------------------------------------------------*/
993 #endif
994