1 #ifdef NEWEV
2 
3 /* ----------------------------------------------------------------------------
4  * Please see 4 Jul. 2015 meeting slides for conceptual explanation.
5  * 'Subtree' has special meaning and its 'ID' is very important concept for
6  * understanding this code.
7  *
8  * Please contact Donghwi Kim for further questions,
9  * via e-mail: [email protected] */
10 #include "debug.h"
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tree.h>
15 #include <key_value_store.h>
16 #include "scalable_event.h"
17 
18 #include "mtcp.h"
19 #include "mos_api.h"
20 #include "mtcp_api.h"
21 
22 #define NEWHASH
23 #ifdef NEWHASH
24 #include "mtcp_util.h"
25 #define XXHSEED 18446744073709551557ULL
26 #endif
27 /*----------------------------------------------------------------------------*/
28 #define POWER_OF_TWO(x) (!((x - 1) & (x)))
29 
30 #define MAX_EVENTS  30240
31 #define KVS_BUCKETS 1024
32 #define KVS_ENTRIES 102400
33 #define MAX_DEPTH   MAX_EVENTS
34 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb))
35 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV))
36 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e))
37 
38 struct _tree_node_t *g_evroot;
39 event_t g_evid;
40 
41 static __thread stree_t *g_cur_stree = NULL;
42 static __thread tree_node_t *g_cur_ev = NULL;
43 
44 static __thread struct {
45 	struct _tree_node_t *stack[MAX_EVENTS];
46 	struct _tree_node_t **sp;
47 } g_evstack;
48 
49 #define free(x) do { \
50 	assert(x); \
51 	free(x); \
52 } while (0)
53 /*----------------------------------------------------------------------------*/
54 /** Create a new tree node
55  * @return the new tree node
56  * */
57 inline tree_node_t *
58 tree_node_new()
59 {
60 	return calloc(1, sizeof(tree_node_t));
61 }
62 /*----------------------------------------------------------------------------*/
63 /** Delete a tree node */
64 inline void
65 tree_node_del(tree_node_t *node)
66 {
67 	free(node);
68 }
69 /*----------------------------------------------------------------------------*/
70 /** Delete a tree node and all of its descendants
71  * @param [in] node: target node
72  * */
73 inline void
74 tree_del_recursive(tree_node_t *node)
75 {
76 	struct _tree_node_t *walk = NULL, *del = NULL;
77 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
78 
79 	TREE_DFS_FOREACH(walk, node, &stack, link) {
80 		if (del)
81 			tree_node_del(del);
82 		del = walk;
83 	}
84 	if (del)
85 		tree_node_del(del);
86 }
87 /*----------------------------------------------------------------------------*/
88 /** Search a tree node by event ID
89  * @param [in] root: root node
90  * @param [in] ev: event ID
91  * @return target node
92  *
93  * The search is in DFS mannar
94  * */
95 inline tree_node_t *
96 tree_search(tree_node_t *root, event_t ev)
97 {
98 	struct _tree_node_t *walk = NULL;
99 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
100 
101 	TREE_DFS_FOREACH(walk, root, &stack, link)
102 		if (walk->ev == ev) {
103 			return walk;
104 		}
105 	return NULL;
106 }
107 /*----------------------------------------------------------------------------*/
108 /** Create a new subtree
109  * @param [in] id: subtree ID
110  * @param [in] root: root of the subtree
111  * @return new subtree
112  *
113  * Allocate and initialize subtree structure, stree_t:
114  *   1. Allocate the structure.
115  *   2. Initialize reference counter and fill ID field.
116  *   3. Set 'root' pointer which points the root node of a tree.
117  *      The tree should contain all required tree nodes.
118  *   4. Set built-in event tree node pointers.
119  * */
120 inline stree_t *
121 stree_create(uint64_t id, fevq_t *fevq, tree_node_t *root)
122 {
123 	int i;
124 	tree_node_t *bev;
125 	stree_t *nstree = malloc(sizeof(stree_t));
126 	if (!nstree)
127 		return NULL;
128 
129 	nstree->ref_cnt = 0;
130 	nstree->id = id;
131 	nstree->root = root;
132 	for (i = 0; i < NUM_BEV; i++)
133 		nstree->bevs[i] = NULL;
134 
135 	for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) {
136 		int idx = 0;
137 		while (bev->ev >> (idx + 1))
138 			idx++;
139 		nstree->bevs[idx] = bev;
140 	}
141 
142 	nstree->floating_evq = *fevq;
143 
144 	return nstree;
145 }
146 /*----------------------------------------------------------------------------*/
147 /** Destroy the subtree
148  * @param [in] stree: the target subtree */
149 inline void
150 stree_destroy(kvs_t *store, stree_t *stree)
151 {
152 	tree_node_t *w;
153 	TAILQ_FOREACH(w, &stree->floating_evq, flink)
154 		kvs_remove(store, w->key);
155 	tree_del_recursive(stree->root);
156 	free(stree);
157 }
158 /*----------------------------------------------------------------------------*/
159 /** Increase reference counter of the subtree
160  * @param [in] stree: the target subtree */
161 inline void
162 stree_inc_ref(stree_t *stree)
163 {
164 	stree->ref_cnt++;
165 }
166 /*----------------------------------------------------------------------------*/
167 /** Decrease reference counter of the subtree, and remove it from the key-
168  * value-store if needed
169  * @param [in] store: the key-value-store
170  * @param [in] stree: the target subtree
171  *
172  * In addition to decreasing the reference counter, remove the subtree from
173  * key-value-store if decreased reference counter is zero.
174  * */
175 inline void
176 stree_dec_ref(kvs_t *store, stree_t *stree)
177 {
178 	if (stree && !--stree->ref_cnt) {
179 		kvs_remove(store, stree->id);
180 		stree_destroy(store, stree);
181 	}
182 }
183 /*----------------------------------------------------------------------------*/
184 inline void
185 stree_reg_floating_event(kvs_t *store, uint64_t id,
186 						 fevq_t *fevq, tree_node_t *ev)
187 {
188 	struct {
189 		event_t ev;
190 		uint64_t sid;
191 	} instance;
192 
193 	instance.ev = ev->ev;
194 	instance.sid = id;
195 
196 	ev->key = XXH64(&instance, sizeof(instance), XXHSEED);
197 
198 	kvs_insert(store, ev->key, ev);
199 
200 	TAILQ_INSERT_TAIL(fevq, ev, flink);
201 }
202 /*----------------------------------------------------------------------------*/
203 /** Hash the event ID and callback pair
204  * @param [in] ev: the event ID
205  * @param [in] cb: the callback function pointer
206  * @return hash value
207  *
208  * TODO: Replace 32-bit Jenkins hash with better one.
209  * */
210 inline static uint64_t
211 hash64(event_t ev, callback_t cb)
212 {
213 #ifdef NEWHASH
214 	struct {
215 		event_t ev;
216 		callback_t cb;
217 	} instance;
218 
219 	instance.ev = ev;
220 	instance.cb = cb;
221 
222 	return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED);
223 #else
224 	uint64_t hash = 0;
225 	int i;
226 
227 	for (i = 0; i < sizeof(ev); ++i) {
228 		hash += ((uint8_t *)&ev)[i];
229 		hash += (hash << 10);
230 		hash ^= (hash >> 6);
231 	}
232 	for (i = 0; i < sizeof(cb); ++i) {
233 		hash += ((uint8_t *)&cb)[i];
234 		hash += (hash << 10);
235 		hash ^= (hash >> 6);
236 	}
237 	hash += (hash << 3);
238 	hash ^= (hash >> 11);
239 	hash += (hash << 15);
240 
241 	return hash;
242 #endif
243 }
244 /*----------------------------------------------------------------------------*/
245 /** Register a callback
246  * @param [in] store: the key-value-store for subtrees
247  * @param [in, out] pstree: the target subtree pointer
248  * @param [in] ev: the target event
249  * @param [in] cb: the callback to be registered
250  * @return 0 for success, -1 for failure
251  *
252  * For callback registeration, there can be some cases.
253  *   1. A new tree what we want is already there, so we can use it by simply
254  *      updating reference counters.
255  *   2. A new tree what we want is not there, so we are going to create a
256  *      new subtree. The new subtree will not only have new callback, but also
257  *      inherit previous subtree.
258  *      2.1. The reference counter of previous subtree is 1, so we can avoid
259  *           new tree allocation by reusing tree nodes of previous subtree.
260  *      2.2. The reference counter of previous subtree is not 1, so we should
261  *           copy previous subtree's tree nodes.
262  *   3. Previously, there was no subtree (in other words, this is the first
263  *      callback registeration) so we are going to make a new subtree which
264  *      has only one callback
265  * */
266 inline int
267 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
268 {
269 #ifdef DBGMSG
270 	__PREPARE_DBGLOGGING();
271 #endif
272 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
273 	stree_t *stree, *nstree;
274 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
275 	tree_node_t *w, *w2, *w3;
276 	uint64_t id = 0, nid;
277 	fevq_t fevq;
278 
279 	TAILQ_INIT(&fevq);
280 
281 	if ((stree = *pstree)) {
282 		/* XXX: tree_search() is heavy operation, but it is used for only error
283 		 * checking. Is there any way to avoid this?
284 		 * --> We can have a key-value-store for this, but is it too much? */
285 #if 1
286 		if (tree_search(stree->root, ev))
287 			/* Illegal trial of overwriting a previously registerd callback */
288 			return -1;
289 #endif
290 
291 		id = stree->id;
292 	}
293 
294 	/* calculate new subtree ID */
295 	nid = NEWID(id, ev, cb);
296 
297 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
298 			nid, id, ev, cb);
299 
300 	if ((nstree = kvs_search(store, nid))) {
301 		/* case 1. */
302 		TRACE_DBG("Case 1\n");
303 		/* update reference counters */
304 		if (stree)
305 			stree_dec_ref(store, stree);
306 
307 		nstree->ref_cnt++;
308 		*pstree = nstree;
309 	} else if (stree) {
310 		/* case 2. */
311 		TRACE_DBG("Case 2\n");
312 		tree_node_t *ntree = NULL, *sptr;
313 
314 		w = tree_search(g_evroot, ev);
315 		assert(w);
316 
317 		/* create spine: Create a new tree which consists of a tree node with
318 		 * new callback and the tree node's ancestors. */
319 		do {
320 			tree_node_t *ntn;
321 			if (!(ntn = tree_node_new())) {
322 				if (ntree)
323 					tree_del_recursive(ntree);
324 				return -1;
325 			}
326 			ntn->ft = w->ft;
327 			ntn->cb = cb;
328 			ntn->ev = w->ev;
329 			ntn->arg = w->arg;
330 			if (cb)
331 				cb = NULL;
332 
333 			if (IS_FLOATING_EVENT(ntn))
334 				stree_reg_floating_event(store, nid, &fevq, ntn);
335 
336 			TREE_INIT_NODE(ntn, link);
337 			TREE_INIT_NODE(ntn, invk);
338 			if (ntree) {
339 				TREE_INSERT_CHILD(ntn, ntree, link);
340 				if (!IS_FLOATING_EVENT(ntree))
341 					TREE_INSERT_CHILD(ntn, ntree, invk);
342 			}
343 			ntree = ntn;
344 		} while ((w = TREE_PARENT(w, link)));
345 
346 		/* attach flesh: Inherit previous subtree */
347 		sptr = ntree;
348 		if (stree->ref_cnt == 1) {
349 			/* case 2.1. */
350 			TRACE_DBG("Case 2.1\n");
351 			w = stree->root;
352 			while (w) {
353 				tree_node_t *next_walk = NULL,
354 							*next_walk2 = NULL,
355 							*next_sptr = TREE_FIRSTBORN(sptr, link);
356 
357 				for (w2 = TREE_FIRSTBORN(w, link);
358 					 w2 != NULL;
359 					 w2 = next_walk2) {
360 					next_walk2 = TREE_YOUNGER(w2, link);
361 					if (sptr && w2->ev == next_sptr->ev) {
362 
363 						assert(next_walk == NULL);
364 
365 						next_walk = w2;
366 
367 						assert(next_sptr->ev == next_walk->ev);
368 						if (next_sptr->cb != next_walk->cb)
369 							next_sptr->cb = next_walk->cb;
370 					} else {
371 						TREE_DETACH(w2, link);
372 						TREE_DFS_FOREACH(w3, w2, &stack, link)
373 							if (IS_FLOATING_EVENT(w3)) {
374 								kvs_remove(store, w3->key);
375 								stree_reg_floating_event(store, nid, &fevq, w3);
376 							}
377 						if (!IS_FLOATING_EVENT(w2))
378 							TREE_DETACH(w2, invk);
379 						if (next_walk) {
380 							TREE_INSERT_CHILD(sptr, w2, link);
381 							if (!IS_FLOATING_EVENT(w2))
382 								TREE_INSERT_CHILD(sptr, w2, invk);
383 						} else {
384 							tree_node_t *f = TREE_FIRSTBORN(sptr, link);
385 							TREE_INSERT_OLDER(f, w2, link);
386 							if (!IS_FLOATING_EVENT(w2)) {
387 								if ((f = TREE_FIRSTBORN(sptr, invk)))
388 									TREE_INSERT_OLDER(f, w2, invk);
389 								else
390 									TREE_INSERT_CHILD(sptr, w2, invk);
391 							}
392 						}
393 					}
394 				}
395 				w = next_walk;
396 				sptr = next_sptr;
397 				next_walk = NULL;
398 			}
399 		} else {
400 			/* case 2.2. */
401 			TRACE_DBG("Case 2.2\n");
402 			TREE_DFS_FOREACH(w, stree->root, &stack, link) {
403 				tree_node_t *ntn, *ptn;
404 
405 				if (sptr && w->ev == sptr->ev) {
406 					if (w->cb != sptr->cb)
407 						sptr->cb = w->cb;
408 					sptr = TREE_FIRSTBORN(sptr, link);
409 					continue;
410 				}
411 
412 				if (!(ntn = tree_node_new())) {
413 					if (ntree)
414 						tree_del_recursive(ntree);
415 					return -1;
416 				}
417 				ntn->ft = w->ft;
418 				ntn->cb = w->cb;
419 				ntn->ev = w->ev;
420 				ntn->arg = w->arg;
421 
422 				if (IS_FLOATING_EVENT(ntn))
423 					stree_reg_floating_event(store, nid, &fevq, ntn);
424 
425 				TREE_INIT_NODE(ntn, link);
426 				TREE_INIT_NODE(ntn, invk);
427 
428 				if (TREE_PARENT(w, link)) {
429 					ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
430 
431 					assert(ptn);
432 
433 					TREE_INSERT_CHILD(ptn, ntn, link);
434 					if (!IS_FLOATING_EVENT(ntn))
435 						TREE_INSERT_CHILD(ptn, ntn, invk);
436 				} else
437 					assert(0);
438 			}
439 		}
440 
441 		nstree = stree_create(nid, &fevq, ntree);
442 
443 		assert(nstree);
444 
445 		nstree->ref_cnt = 1;
446 
447 		kvs_insert(store, nid, nstree);
448 
449 		/* update reference counters */
450 		stree_dec_ref(store, stree);
451 
452 		*pstree = nstree;
453 	} else /* if (!stree) */ {
454 		/* case 3. */
455 		TRACE_DBG("Case 3\n");
456 		tree_node_t *ntree = NULL;
457 
458 		w = tree_search(g_evroot, ev);
459 		assert(w);
460 
461 		/* create spine: Create a new tree which consists of a tree node with
462 		 * new callback and the tree node's ancestors. */
463 		do {
464 			tree_node_t *ntn;
465 			if (!(ntn = tree_node_new())) {
466 				if (ntree)
467 					tree_del_recursive(ntree);
468 				return -1;
469 			}
470 			ntn->ft = w->ft;
471 			ntn->cb = cb;
472 			ntn->ev = w->ev;
473 			ntn->arg = w->arg;
474 			if (cb)
475 				cb = NULL;
476 
477 			if (IS_FLOATING_EVENT(ntn))
478 				stree_reg_floating_event(store, nid, &fevq, ntn);
479 
480 			TREE_INIT_NODE(ntn, link);
481 			TREE_INIT_NODE(ntn, invk);
482 			if (ntree) {
483 				TREE_INSERT_CHILD(ntn, ntree, link);
484 				if (!IS_FLOATING_EVENT(ntree))
485 					TREE_INSERT_CHILD(ntn, ntree, invk);
486 			}
487 			ntree = ntn;
488 		} while ((w = TREE_PARENT(w, link)));
489 
490 		nstree = stree_create(nid, &fevq, ntree);
491 
492 		assert(nstree);
493 
494 		nstree->ref_cnt = 1;
495 
496 		kvs_insert(store, nid, nstree);
497 
498 		*pstree = nstree;
499 	}
500 
501 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
502 	return 0;
503 }
504 /*----------------------------------------------------------------------------*/
505 /** Unregister a callback
506  * @param [in] store: the key-value-store for subtrees
507  * @param [in, out] pstree: the target subtree pointer
508  * @param [in] ev: the target event
509  * @return 0 for success, -1 for failure
510  *
511  * For callback unregisteration, there can be some cases.
512  *   1. No registered callback will be left after this unregistration.
513  *   2. A new tree what we want is already there, so we can use it by simply
514  *      updating reference counters.
515  *   3. A new tree what we want is not there, so we create a new subtree by
516  *      copying a part of previous subtree
517  * */
518 inline int
519 UnregCb(kvs_t *store, stree_t **pstree, event_t ev)
520 {
521 #ifdef DBGMSG
522 	__PREPARE_DBGLOGGING();
523 #endif
524 	TRACE_DBG("/>>>>>> start <<<<<<\\\n");
525 	stree_t *stree, *nstree;
526 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack;
527 	tree_node_t *w, *target;
528 	fevq_t fevq;
529 	uint64_t id = 0, nid;
530 	callback_t cb;
531 
532 	if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) ||
533 		!(cb = target->cb))
534 		/* Illegal trial of unregistering a callback which is never registered
535 		 * before. */
536 		return -1;
537 
538 	id = stree->id;
539 
540 	/* Calculate new subtree ID */
541 	nid = NEWID(id, ev, cb);
542 
543 	TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n",
544 			nid, id, ev, cb);
545 
546 	if (nid == 0) {
547 		/* case 1. */
548 		TRACE_DBG("Case 1\n");
549 		/* update reference counters */
550 		if (stree)
551 			stree_dec_ref(store, stree);
552 
553 		*pstree = NULL;
554 	} else if ((nstree = kvs_search(store, nid))) {
555 		/* case 2. */
556 		TRACE_DBG("Case 2\n");
557 		/* update reference counters */
558 		if (stree)
559 			stree_dec_ref(store, stree);
560 
561 		nstree->ref_cnt++;
562 		*pstree = nstree;
563 	} else if (stree) {
564 		/* case 3. */
565 		TRACE_DBG("Case 3\n");
566 		bool proceed;
567 		tree_node_t *ntree = NULL, *sptr;
568 
569 		sptr = TREE_PARENT(target, link);
570 #define TREE_IS_ONLY_CHILD(root, x, field) \
571 		(TREE_PARENT((x), field) ? \
572 		 TREE_PARENT((x), field)->field.tn_first == (x) && \
573 		 TREE_PARENT((x), field)->field.tn_last == (x) : \
574 		 (x) == (root) && (x)->field.tn_younger == NULL)
575 		while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link))
576 			sptr = TREE_PARENT(sptr, link);
577 
578 		assert(sptr);
579 		/* 'sptr == NULL' means the tree will lose the only callback.
580 		 * This case should be handled by 'Case 1' */
581 
582 		TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) {
583 			tree_node_t *ntn, *ptn;
584 
585 			if (w == sptr)
586 				proceed = false;
587 
588 			if (!(ntn = tree_node_new())) {
589 				/* free incomplete ntree */
590 				if (ntree)
591 					tree_del_recursive(ntree);
592 				return -1;
593 			}
594 			ntn->ft = w->ft;
595 			ntn->cb = w->cb;
596 			ntn->ev = w->ev;
597 			ntn->arg = w->arg;
598 
599 			if (IS_FLOATING_EVENT(ntn))
600 				stree_reg_floating_event(store, nid, &fevq, ntn);
601 
602 			TREE_INIT_NODE(ntn, link);
603 			TREE_INIT_NODE(ntn, invk);
604 
605 			if (TREE_PARENT(w, link)) {
606 				assert(ntree);
607 				ptn = tree_search(ntree, TREE_PARENT(w, link)->ev);
608 
609 				assert(ptn);
610 
611 				TREE_INSERT_CHILD(ptn, ntn, link);
612 				if (!IS_FLOATING_EVENT(ntn))
613 					TREE_INSERT_CHILD(ptn, ntn, invk);
614 			} else
615 				/* this is the root node */
616 				ntree = ntn;
617 		}
618 
619 		nstree = stree_create(nid, &fevq, ntree);
620 
621 		assert(nstree);
622 
623 		nstree->ref_cnt = 1;
624 
625 		kvs_insert(store, nid, nstree);
626 
627 		/* update reference counters */
628 		stree_dec_ref(store, stree);
629 
630 		*pstree = nstree;
631 	} else /* if (!stree) */
632 		return -1;
633 
634 	TRACE_DBG("\\>>>>>>  end  <<<<<</\n");
635 	return 0;
636 }
637 /*----------------------------------------------------------------------------*/
638 inline int
639 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb)
640 {
641 	assert(*pstree || cb);
642 
643 	/* Event ID starts from 1 */
644 	if (ev == 0)
645 		return -1;
646 
647 	if (cb)
648 		return RegCb(store, pstree, ev, cb);
649 	else
650 		return UnregCb(store, pstree, ev);
651 }
652 /*----------------------------------------------------------------------------*/
653 void
654 GlobInitEvent(void)
655 {
656 	int i;
657 
658 	if (!(g_evroot = tree_node_new())) {
659 		perror("FATAL: Failed to allocate essentials for the system\n");
660 		exit(0);
661 	}
662 	g_evroot->ev = 0;
663 	g_evroot->ft = NULL;
664 	g_evroot->cb = NULL;
665 	TREE_INIT_NODE(g_evroot, link);
666 	TREE_INIT_NODE(g_evroot, invk);
667 	for (i = 0; i < NUM_BEV; i++) {
668 		tree_node_t *ntn;
669 		if (!(ntn = tree_node_new())) {
670 			perror("FATAL: Failed to allocate essentials for the system\n");
671 			exit(0);
672 		}
673 		ntn->ev = 1 << i;
674 		ntn->ft = NULL;
675 		ntn->cb = NULL;
676 		TREE_INIT_NODE(ntn, link);
677 		TREE_INIT_NODE(ntn, invk);
678 		TREE_INSERT_CHILD(g_evroot, ntn, link);
679 		TREE_INSERT_CHILD(g_evroot, ntn, invk);
680 	}
681 	g_evid = 1 <<  NUM_BEV;
682 }
683 /*----------------------------------------------------------------------------*/
684 void
685 InitEvent(mtcp_manager_t mtcp)
686 {
687 	mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES);
688 }
689 /*----------------------------------------------------------------------------*/
690 event_t
691 mtcp_alloc_event(event_t event)
692 {
693 	tree_node_t *parent, *new;
694 	if (!(parent = tree_search(g_evroot, event)))
695 		return MOS_NULL_EVENT;
696 
697 	if ((new = tree_node_new()) == NULL)
698 		return MOS_NULL_EVENT;
699 
700 	new->ev = g_evid++;
701 	new->ft = NULL;
702 	new->cb = NULL;
703 	new->arg.arg = NULL;
704 	new->arg.len = 0;
705 
706 	TREE_INIT_NODE(new, link);
707 	TREE_INIT_NODE(new, invk);
708 
709 	TREE_INSERT_CHILD(parent, new, link);
710 
711 	return new->ev;
712 }
713 /*----------------------------------------------------------------------------*/
714 event_t
715 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg)
716 {
717 #ifdef DBGMSG
718 	__PREPARE_DBGLOGGING();
719 #endif
720 	tree_node_t *parent, *new, *walk;
721 
722 	if (!filter)
723 		return MOS_NULL_EVENT;
724 	if (arg && arg->len > 0 && !arg->arg)
725 		return MOS_NULL_EVENT;
726 	if (!(parent = tree_search(g_evroot, event)))
727 		return MOS_NULL_EVENT;
728 	for (walk = TREE_FIRSTBORN(parent, link);
729 		 walk;
730 		 walk = TREE_YOUNGER(walk, link))
731 		if ((walk->ft == filter) &&
732 			((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) ||
733 			 (walk->arg.len == arg->len &&
734 			  memcmp(walk->arg.arg, arg->arg, arg->len) == 0)))
735 			return walk->ev;
736 
737 	if ((new = tree_node_new()) == NULL)
738 		return MOS_NULL_EVENT;
739 
740 	new->ev = g_evid++;
741 	new->ft = filter;
742 	new->cb = NULL;
743 	if (!arg || !arg->arg || arg->len <= 0) {
744 		new->arg.arg = NULL;
745 		new->arg.len = 0;
746 	} else {
747 		new->arg.arg = malloc(arg->len);
748 		memcpy(new->arg.arg, arg->arg, arg->len);
749 		new->arg.len = arg->len;
750 	}
751 
752 	TREE_INIT_NODE(new, link);
753 	TREE_INIT_NODE(new, invk);
754 
755 	TRACE_DBG("parent: %ld\n", parent->ev);
756 	TREE_INSERT_CHILD(parent, new, link);
757 	TREE_INSERT_CHILD(parent, new, invk);
758 
759 	return new->ev;
760 }
761 /*----------------------------------------------------------------------------*/
762 #define EVENT_STACK_PUSH(s, var) \
763 	(assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \
764 	 *(++((s)->sp)) = (var))
765 #define EVENT_STACK_POP(s) \
766 	(*(((s)->sp)--))
767 #define EVENT_STACK_CLR(s) \
768 	((s)->stack[0] = NULL, (s)->sp = (s)->stack)
769 
770 inline int
771 RaiseEv(kvs_t *store, event_t event)
772 {
773 	tree_node_t *ev = NULL;
774 #if 1
775 	struct {
776 		event_t ev;
777 		uint64_t sid;
778 	} instance;
779 
780 	instance.ev = event;
781 	instance.sid = g_cur_stree->id;
782 
783 	_key_t key = XXH64(&instance, sizeof(instance), XXHSEED);
784 	ev = (tree_node_t *)kvs_search(store, key);
785 #else
786 	tree_node_t *walk;
787 	for (walk = TREE_FIRSTBORN(g_cur_ev, link);
788 		 walk;
789 		 walk = TREE_YOUNGER(walk, link)) {
790 		if (walk->ev == event && IS_FLOATING_EVENT(walk)) {
791 			ev = walk;
792 			break;
793 		}
794 	}
795 #endif
796 
797 	if (!ev)
798 		return -1;
799 
800 	if (!ev->is_in_raiseq) {
801 		ev->is_in_raiseq = true;
802 		EVENT_STACK_PUSH(&g_evstack, ev);
803 	}
804 
805 	return 0;
806 }
807 int
808 mtcp_raise_event(mctx_t mctx, event_t event)
809 {
810 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
811 	if (!mtcp)
812 		return -1;
813 
814 	return RaiseEv(mtcp->ev_store, event);
815 }
816 /*----------------------------------------------------------------------------*/
817 #define PRINT_EV(event, field) \
818 	printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \
819 			"last: %ld, parent: %ld, ft: %p, cb: %p\n", \
820 		  #field, (event), (event)->ev, \
821 		  (event)->field.tn_older ? (event)->field.tn_older->ev : -1, \
822 		  (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \
823 		  (event)->field.tn_first ? (event)->field.tn_first->ev : -1, \
824 		  (event)->field.tn_last ? (event)->field.tn_last->ev : -1, \
825 		  (event)->field.tn_parent ? (event)->field.tn_parent->ev : -1, \
826 		  (event)->ft, (event)->cb)
827 inline void
828 HandleCb(mctx_t const mctx, int sock, int side,
829 		 stree_t * const stree, event_t events)
830 {
831 	int i;
832 	bool proceed;
833 	tree_node_t *walk, *bev, *ev;
834 	TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue;
835 	assert(stree);
836 
837 	g_cur_stree = stree;
838 
839 	for (i = 0; i < NUM_BEV; i++) {
840 		if (!((1L << i) & events))
841 			continue;
842 
843 		if (!(bev = stree->bevs[i]))
844 			continue;
845 
846 		EVENT_STACK_CLR(&g_evstack);
847 		tree_node_t **ptr = g_evstack.sp;
848 		TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) {
849 			//PRINT_EV(walk, link);
850 			//PRINT_EV(walk, invk);
851 
852 			g_cur_ev = walk;
853 
854 			if (walk != bev && walk->ft &&
855 				walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) {
856 				ptr = g_evstack.sp;
857 				proceed = false;
858 				continue;
859 			}
860 
861 			if (walk->cb)
862 				walk->cb(mctx, sock, side, walk->ev, &walk->arg);
863 
864 			while (ptr < g_evstack.sp) {
865 				ptr++;
866 				TREE_INSERT_CHILD(walk, *ptr, invk);
867 			}
868 		}
869 		g_cur_ev = NULL;
870 
871 		while ((ev = EVENT_STACK_POP(&g_evstack))) {
872 			ev->is_in_raiseq = false;
873 			TREE_DETACH(ev, invk);
874 		}
875 
876 		if (!(events &= ~(1L << i))) {
877 			g_cur_stree = NULL;
878 			return;
879 		}
880 	}
881 
882 	g_cur_stree = NULL;
883 }
884 /*----------------------------------------------------------------------------*/
885 int
886 mtcp_register_callback(mctx_t mctx, int sockid, event_t event,
887 		                       int hook_point, callback_t callback)
888 {
889 	socket_map_t socket;
890 	stree_t **pstree;
891 
892 	if (event == MOS_NULL_EVENT ||
893 		event < (1L << NUM_BEV) ? !POWER_OF_TWO(event) : 0)
894 		return -1;
895 
896 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
897 	if (!mtcp)
898 		return -1;
899 
900 	socket = &mtcp->msmap[sockid];
901 
902 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
903 		if (hook_point == MOS_NULL)
904 			pstree = &socket->monitor_stream->stree_dontcare;
905 		else if (hook_point == MOS_HK_RCV)
906 			pstree = &socket->monitor_stream->stree_pre_rcv;
907 		else if (hook_point == MOS_HK_SND)
908 			pstree = &socket->monitor_stream->stree_post_snd;
909 		else
910 			return -1;
911 
912 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM
913 			|| socket->socktype == MOS_SOCK_MONITOR_RAW) {
914 		if (hook_point == MOS_NULL)
915 			pstree = &socket->monitor_listener->stree_dontcare;
916 		else if (hook_point == MOS_HK_RCV)
917 			pstree = &socket->monitor_listener->stree_pre_rcv;
918 		else if (hook_point == MOS_HK_SND)
919 			pstree = &socket->monitor_listener->stree_post_snd;
920 		else
921 			return -1;
922 	} else {
923 		errno = EINVAL;
924 		return -1;
925 	}
926 
927 	return ModCb(mtcp->ev_store, pstree, event, callback);
928 }
929 /*----------------------------------------------------------------------------*/
930 int
931 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event,
932 		                       int hook_point)
933 {
934 	return mtcp_register_callback(mctx, sockid, event, hook_point, NULL);
935 }
936 /*----------------------------------------------------------------------------*/
937 inline void
938 HandleCallback(mtcp_manager_t mtcp, uint32_t hook,
939 		socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events)
940 {
941 	struct sfbpf_program fcode;
942 
943 	if (!socket)
944 		return;
945 
946 	if (!events)
947 		return;
948 	assert(events);
949 
950 	/* if client side monitoring is disabled, then skip */
951 	if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0)
952 		return;
953 	/* if server side monitoring is disabled, then skip */
954 	else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0)
955 		return;
956 
957 #define MSTRM(sock) (sock)->monitor_stream
958 #define MLSNR(sock) (sock)->monitor_listener
959 	/* We use `?:` notation instead of `if/else` to make `evp` as const */
960 	stree_t * const stree =
961 		(socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ?
962 		    ((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv :
963 			 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd :
964 									  MSTRM(socket)->stree_dontcare)
965 		: (socket->socktype == MOS_SOCK_MONITOR_STREAM)
966 		  || (socket->socktype == MOS_SOCK_MONITOR_RAW) ?
967 		    ((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv :
968 			 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd :
969 									  MLSNR(socket)->stree_dontcare) :
970 		 NULL;
971 
972 	if (!stree)
973 		return;
974 
975 	/* mtcp_bind_monitor_filter()
976      * - BPF filter is evaluated only for RAW socket and PASSIVE socket (= orphan filter)
977 	 * - stream syn filter is moved to and evaluated on socket creation */
978 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
979 		fcode = MLSNR(socket)->stream_orphan_fcode;
980 		/* if not match with filter, return */
981 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
982 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
983 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
984 			return;
985 	}
986     if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
987 		fcode = MLSNR(socket)->raw_pkt_fcode;
988 		/* if not match with filter, return */
989 		if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode,
990 		    (uint8_t *)pctx->p.iph - sizeof(struct ethhdr),
991 	        pctx->p.ip_len + sizeof(struct ethhdr)) == 0)
992 			return;
993 	}
994 
995 	mctx_t const mctx = g_ctx[mtcp->ctx->cpu];
996 	mtcp->pctx = pctx; /* for mtcp_getcurpkt() */
997 
998 	HandleCb(mctx, socket->id, side, stree, events);
999 }
1000 /*----------------------------------------------------------------------------*/
1001 #endif
1002