1 #ifdef NEWEV 2 3 /* ---------------------------------------------------------------------------- 4 * Please see 4 Jul. 2015 meeting slides for conceptual explanation. 5 * 'Subtree' has special meaning and its 'ID' is very important concept for 6 * understanding this code. 7 * 8 * Please contact Donghwi Kim for further questions, 9 * via e-mail: [email protected] 10 */ 11 #include "debug.h" 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <tree.h> 16 #include <key_value_store.h> 17 #include "scalable_event.h" 18 19 #include "mtcp.h" 20 #include "mos_api.h" 21 #include "mtcp_api.h" 22 23 #define NEWHASH 24 #ifdef NEWHASH 25 #include "mtcp_util.h" 26 #define XXHSEED 18446744073709551557ULL 27 #endif 28 /*----------------------------------------------------------------------------*/ 29 #define POWER_OF_TWO(x) (!((x - 1) & (x))) 30 31 #define MAX_EVENTS 30240 32 #define KVS_BUCKETS 1024 33 #define KVS_ENTRIES 102400 34 #define MAX_DEPTH MAX_EVENTS 35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb)) 36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV)) 37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e)) 38 39 struct _tree_node_t *g_evroot; 40 event_t g_evid; 41 42 static __thread stree_t *g_cur_stree = NULL; 43 static __thread tree_node_t *g_cur_ev = NULL; 44 45 static __thread struct { 46 struct _tree_node_t *stack[MAX_EVENTS]; 47 struct _tree_node_t **sp; 48 } g_evstack; 49 50 #define free(x) do { \ 51 assert(x); \ 52 free(x); \ 53 } while (0) 54 55 /*----------------------------------------------------------------------------*/ 56 struct devent { 57 event_t ev; /* event id = key */ 58 tree_node_t *ptr; /* ptr to the event node in the d-forest = value */ 59 struct devent *next; /* next pointer */ 60 }; 61 #define MAX_BUCKET 2048 62 #define HASHVAL(x) ((x) & (MAX_BUCKET-1)) 63 struct devent* g_dfTable[MAX_BUCKET] = {0}; /* d-forest hash table */ 64 /*----------------------------------------------------------------------------*/ 65 /* simple hash table implementation */ 66 static bool 67 dforest_store(event_t ev, tree_node_t* ptr) 68 { 69 struct devent *de, *p; 70 int idx; 71 72 if ((de = malloc(sizeof(*de))) == NULL) { 73 assert(0); /* should not happen! */ 74 return false; 75 } 76 de->ev = ev; 77 de->ptr = ptr; 78 de->next = NULL; 79 80 idx = HASHVAL(ev); 81 if (g_dfTable[idx] == NULL) { 82 g_dfTable[idx] = de; 83 } 84 else { 85 p = g_dfTable[idx]; 86 while (p->next) 87 p = p->next; 88 p->next = de; 89 } 90 return true; 91 } 92 /*----------------------------------------------------------------------------*/ 93 static tree_node_t * 94 dforest_search(event_t ev) 95 { 96 int idx; 97 struct devent *p; 98 99 idx = HASHVAL(ev); 100 if (g_dfTable[idx] == NULL) 101 return NULL; 102 103 for (p = g_dfTable[idx]; 104 p != NULL; 105 p = p->next) { 106 if (p->ev == ev) 107 return p->ptr; 108 } 109 return NULL; 110 } 111 /*----------------------------------------------------------------------------*/ 112 /** Create a new tree node 113 * @return the new tree node 114 * */ 115 inline tree_node_t * 116 tree_node_new() 117 { 118 return calloc(1, sizeof(tree_node_t)); 119 } 120 /*----------------------------------------------------------------------------*/ 121 /** Delete a tree node */ 122 inline void 123 tree_node_del(tree_node_t *node) 124 { 125 free(node); 126 } 127 /*----------------------------------------------------------------------------*/ 128 /** Delete a tree node and all of its descendants 129 * @param [in] node: target node 130 * */ 131 inline void 132 tree_del_recursive(tree_node_t *node) 133 { 134 struct _tree_node_t *walk = NULL, *del = NULL; 135 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 136 137 TREE_DFS_FOREACH(walk, node, &stack, link) { 138 if (del) 139 tree_node_del(del); 140 del = walk; 141 } 142 if (del) 143 tree_node_del(del); 144 } 145 /*----------------------------------------------------------------------------*/ 146 /** Search a tree node by event ID 147 * @param [in] root: root node 148 * @param [in] ev: event ID 149 * @return target node 150 * 151 * The search is in DFS mannar 152 * */ 153 inline tree_node_t * 154 tree_search(tree_node_t *root, event_t ev) 155 { 156 struct _tree_node_t *walk = NULL; 157 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 158 159 TREE_DFS_FOREACH(walk, root, &stack, link) 160 if (walk->ev == ev) { 161 return walk; 162 } 163 return NULL; 164 } 165 /*----------------------------------------------------------------------------*/ 166 /** Create a new subtree 167 * @param [in] id: subtree ID 168 * @param [in] root: root of the subtree 169 * @return new subtree 170 * 171 * Allocate and initialize subtree structure, stree_t: 172 * 1. Allocate the structure. 173 * 2. Initialize reference counter and fill ID field. 174 * 3. Set 'root' pointer which points the root node of a tree. 175 * The tree should contain all required tree nodes. 176 * 4. Set built-in event tree node pointers. 177 * */ 178 inline stree_t * 179 stree_create(uint64_t id, tree_node_t *root) 180 { 181 tree_node_t *bev; 182 stree_t *nstree = malloc(sizeof(stree_t)); 183 if (!nstree) 184 return NULL; 185 186 nstree->ref_cnt = 0; 187 nstree->id = id; 188 nstree->root = root; 189 memset(nstree->bevs, 0, sizeof(nstree->bevs)); 190 191 for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) { 192 int idx = 0; 193 while (bev->ev >> (idx + 1)) 194 idx++; 195 nstree->bevs[idx] = bev; 196 } 197 198 return nstree; 199 } 200 /*----------------------------------------------------------------------------*/ 201 /** Destroy the subtree 202 * @param [in] stree: the target subtree */ 203 inline void 204 stree_destroy(kvs_t *store, stree_t *stree) 205 { 206 tree_del_recursive(stree->root); 207 free(stree); 208 } 209 /*----------------------------------------------------------------------------*/ 210 /** Increase reference counter of the subtree 211 * @param [in] stree: the target subtree */ 212 inline void 213 stree_inc_ref(stree_t *stree) 214 { 215 stree->ref_cnt++; 216 } 217 /*----------------------------------------------------------------------------*/ 218 /** Decrease reference counter of the subtree, and remove it from the key- 219 * value-store if needed 220 * @param [in] store: the key-value-store 221 * @param [in] stree: the target subtree 222 * 223 * In addition to decreasing the reference counter, remove the subtree from 224 * key-value-store if decreased reference counter is zero. 225 * */ 226 inline void 227 stree_dec_ref(kvs_t *store, stree_t *stree) 228 { 229 if (stree && !--stree->ref_cnt) { 230 kvs_remove(store, stree->id); 231 stree_destroy(store, stree); 232 } 233 } 234 /*----------------------------------------------------------------------------*/ 235 /** Hash the event ID and callback pair 236 * @param [in] ev: the event ID 237 * @param [in] cb: the callback function pointer 238 * @return hash value 239 * 240 * TODO: Replace 32-bit Jenkins hash with better one. 241 * */ 242 inline static uint64_t 243 hash64(event_t ev, callback_t cb) 244 { 245 #ifdef NEWHASH 246 struct { 247 event_t ev; 248 callback_t cb; 249 } instance; 250 251 instance.ev = ev; 252 instance.cb = cb; 253 254 return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED); 255 #else 256 uint64_t hash = 0; 257 int i; 258 259 for (i = 0; i < sizeof(ev); ++i) { 260 hash += ((uint8_t *)&ev)[i]; 261 hash += (hash << 10); 262 hash ^= (hash >> 6); 263 } 264 for (i = 0; i < sizeof(cb); ++i) { 265 hash += ((uint8_t *)&cb)[i]; 266 hash += (hash << 10); 267 hash ^= (hash >> 6); 268 } 269 hash += (hash << 3); 270 hash ^= (hash >> 11); 271 hash += (hash << 15); 272 273 return hash; 274 #endif 275 } 276 277 /*-------------------------------------------------------------------------*/ 278 /* Create a new tree that consists of a tree node (w) with a new 279 * callback and take a snapshot of the subtree from w all the way up to 280 * the root node by traversing the ancestors of w. 281 */ 282 /*-------------------------------------------------------------------------*/ 283 static tree_node_t* 284 create_spine(tree_node_t* w, callback_t cb) 285 { 286 tree_node_t *ntn, *ntree = NULL; 287 288 if ((ntn = tree_node_new()) == NULL) 289 return NULL; 290 291 /* the leaf event node with callback function */ 292 ntn->ft = w->ft; 293 ntn->cb = cb; 294 ntn->ev = w->ev; 295 ntn->arg = w->arg; 296 ntree = ntn; /* we start from the leaf */ 297 298 do { 299 if (!(ntn = tree_node_new())) { 300 if (ntree) 301 tree_del_recursive(ntree); 302 return NULL; 303 } 304 ntn->ft = w->ft; 305 ntn->ev = w->ev; 306 ntn->arg = w->arg; 307 308 TREE_INSERT_CHILD(ntn, ntree, link); 309 if (!IS_FLOATING_EVENT(ntree)) 310 TREE_INSERT_CHILD(ntn, ntree, invk); 311 312 /* ntn becomes the new tree */ 313 ntree = ntn; 314 } while ((w = TREE_PARENT(w, link))); 315 316 return ntree; 317 } 318 #if 0 319 /*----------------------------------------------------------------------------*/ 320 static stree_t * 321 clone_stree(stree_t * stree) 322 { 323 stree_t *newstree, *walk; 324 tree_node_t *newtree; 325 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 326 struct _tree_node_t *walk = NULL, *del = NULL; 327 328 /* clone the tree */ 329 TREE_DFS_FOREACH(walk, stree->root, &stack, link) { 330 new_tree_node_new() 331 if (del) 332 tree_node_del(del); 333 del = walk; 334 } 335 if (del) 336 tree_node_del(del); 337 338 339 TREE_DFS_FOREACH(w, stree->root, &stack, link) { 340 tree_node_t *ntn, *ptn = NULL; 341 } 342 } 343 #endif 344 /*----------------------------------------------------------------------------*/ 345 /** Register a callback 346 * @param [in] store: the key-value-store for subtrees 347 * @param [in, out] pstree: the target subtree pointer 348 * @param [in] ev: the target event 349 * @param [in] cb: the callback to be registered 350 * @return 0 for success, -1 for failure 351 * 352 * For callback registeration, there can be some cases. 353 * 1. A new tree what we want is already there, so we can use it by simply 354 * updating reference counters. 355 * 2. A new tree what we want is not there, so we are going to create a 356 * new subtree. The new subtree will not only have new callback, but also 357 * inherit previous subtree. 358 * 2.1. The reference counter of previous subtree is 1, so we can avoid 359 * new tree allocation by reusing tree nodes of previous subtree. 360 * 2.2. The reference counter of previous subtree is not 1, so we should 361 * copy previous subtree's tree nodes. 362 * 3. Previously, there was no subtree (in other words, this is the first 363 * callback registeration) so we are going to make a new subtree which 364 * has only one callback 365 * */ 366 inline int 367 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb) 368 { 369 #ifdef DBGMSG 370 __PREPARE_DBGLOGGING(); 371 #endif 372 TRACE_DBG("/>>>>>> start <<<<<<\\\n"); 373 stree_t *stree, *nstree; 374 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 375 tree_node_t *w, *w2, *ntree; 376 uint64_t id = 0, nid; 377 378 if ((stree = *pstree)) { 379 /* XXX: tree_search() is heavy operation, but it is used for only error 380 * checking. Is there any way to avoid this? 381 * --> We can have a key-value-store for this, but is it too much? */ 382 #if 0 383 if (tree_search(stree->root, ev)) 384 /* Illegal trial of overwriting a previously registerd callback */ 385 return -1; 386 #endif 387 388 id = stree->id; 389 } 390 391 /* calculate new subtree ID */ 392 nid = NEWID(id, ev, cb); 393 394 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", nid, id, ev, cb); 395 396 if ((nstree = kvs_search(store, nid))) { /* case 1. */ 397 TRACE_DBG("Case 1\n"); 398 /* update reference counters */ 399 if (stree) 400 stree_dec_ref(store, stree); 401 402 nstree->ref_cnt++; 403 *pstree = nstree; 404 #if 0 405 fprintf(stderr, "sizeof(tcp_stream)=%ld sizeof(tcp_send_vars)=%ld" 406 "sizeof(tcp_recv_vars)=%ld\n", 407 sizeof(tcp_stream), 408 sizeof(struct tcp_send_vars), sizeof(struct tcp_recv_vars)); 409 #endif 410 return 0; 411 } 412 413 /* find the event node with ev and take a snapshot of the ancestors */ 414 w = dforest_search(ev); 415 // w = tree_search(g_evroot, ev); 416 assert(w); 417 if ((ntree = create_spine(w, cb)) == NULL) 418 return (-1); 419 420 if (stree) { /* case 2. */ 421 TRACE_DBG("Case 2\n"); 422 tree_node_t *sptr; 423 424 /* attach flesh: Inherit previous subtree */ 425 sptr = ntree; 426 427 #if 0 428 /* new code */ 429 if (stree->ref_cnt > 1) { 430 /* take a snapshot of the entire forest */ 431 curf = clone_tree(stree); 432 curf->root->ref_cnt = 1; 433 stree->ref_cnt--; 434 } else { 435 assert(stree->ref_cnt == 1); 436 curf = stree; 437 /* remove the current id from hash table */ 438 kvs_remove(store, stree->id); 439 } 440 441 /* merge sptr into curf->root (existing tree) */ 442 #endif 443 444 if (stree->ref_cnt == 1) { 445 /* case 2.1. */ 446 TRACE_DBG("Case 2.1\n"); 447 w = stree->root; 448 while (w) { 449 tree_node_t *next_walk = NULL, *next_walk2 = NULL, 450 *next_sptr = TREE_FIRSTBORN(sptr, link); 451 452 for (w2 = TREE_FIRSTBORN(w, link); 453 w2 != NULL; 454 w2 = next_walk2) { 455 next_walk2 = TREE_YOUNGER(w2, link); 456 if (sptr && w2->ev == next_sptr->ev) { 457 assert(next_walk == NULL); 458 next_walk = w2; 459 if (next_sptr->cb != next_walk->cb) 460 next_sptr->cb = next_walk->cb; 461 } 462 else { 463 TREE_DETACH(w2, link); 464 if (!IS_FLOATING_EVENT(w2)) 465 TREE_DETACH(w2, invk); 466 if (next_walk) { 467 TREE_INSERT_CHILD(sptr, w2, link); 468 if (!IS_FLOATING_EVENT(w2)) 469 TREE_INSERT_CHILD(sptr, w2, invk); 470 } else { 471 tree_node_t *f = TREE_FIRSTBORN(sptr, link); 472 TREE_INSERT_OLDER(f, w2, link); 473 if (!IS_FLOATING_EVENT(w2)) { 474 if ((f = TREE_FIRSTBORN(sptr, invk))) 475 TREE_INSERT_OLDER(f, w2, invk); 476 else 477 TREE_INSERT_CHILD(sptr, w2, invk); 478 } 479 } 480 } 481 } 482 w = next_walk; 483 sptr = next_sptr; 484 next_walk = NULL; 485 } 486 } else { // stree->ref_count != 1 487 /* case 2.2. */ 488 TRACE_DBG("Case 2.2\n"); 489 TREE_DFS_FOREACH(w, stree->root, &stack, link) { 490 tree_node_t *ntn, *ptn = NULL; 491 492 if (sptr && w->ev == sptr->ev) { 493 if (w->cb != sptr->cb) 494 sptr->cb = w->cb; 495 sptr = TREE_FIRSTBORN(sptr, link); 496 continue; 497 } 498 499 if (!(ntn = tree_node_new())) { 500 if (ntree) 501 tree_del_recursive(ntree); 502 return -1; 503 } 504 ntn->ft = w->ft; 505 ntn->cb = w->cb; 506 ntn->ev = w->ev; 507 ntn->arg = w->arg; 508 509 if (TREE_PARENT(w, link)) { 510 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev); 511 assert(ptn); 512 TREE_INSERT_CHILD(ptn, ntn, link); 513 if (!IS_FLOATING_EVENT(ntn)) 514 TREE_INSERT_CHILD(ptn, ntn, invk); 515 } else { 516 if (ntree) 517 tree_del_recursive(ntree); 518 free(ntn); 519 TRACE_ERROR("Can't find parent\n"); 520 assert(0); 521 exit(EXIT_FAILURE); 522 } 523 } 524 } 525 } 526 527 /* case 3 if (stree == NULL) */ 528 nstree = stree_create(nid, ntree); 529 if (nstree == NULL) { 530 TRACE_ERROR("Failed to create stree!\n"); 531 assert(nstree); 532 } 533 nstree->ref_cnt = 1; 534 kvs_insert(store, nid, nstree); 535 /* update reference counters */ 536 if (stree) 537 stree_dec_ref(store, stree); 538 *pstree = nstree; 539 540 TRACE_DBG("\\>>>>>> end <<<<<</\n"); 541 return 0; 542 } 543 /*----------------------------------------------------------------------------*/ 544 /** Unregister a callback 545 * @param [in] store: the key-value-store for subtrees 546 * @param [in, out] pstree: the target subtree pointer 547 * @param [in] ev: the target event 548 * @return 0 for success, -1 for failure 549 * 550 * For callback unregisteration, there can be some cases. 551 * 1. No registered callback will be left after this unregistration. 552 * 2. A new tree what we want is already there, so we can use it by simply 553 * updating reference counters. 554 * 3. A new tree what we want is not there, so we create a new subtree by 555 * copying a part of previous subtree 556 * */ 557 inline int 558 UnregCb(kvs_t *store, stree_t **pstree, event_t ev) 559 { 560 #ifdef DBGMSG 561 __PREPARE_DBGLOGGING(); 562 #endif 563 TRACE_DBG("/>>>>>> start <<<<<<\\\n"); 564 stree_t *stree, *nstree; 565 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 566 tree_node_t *w, *target; 567 uint64_t id = 0, nid; 568 callback_t cb; 569 570 if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) || 571 !(cb = target->cb)) 572 /* Illegal trial of unregistering a callback which is never registered 573 * before. */ 574 return -1; 575 576 id = stree->id; 577 578 /* Calculate new subtree ID */ 579 nid = NEWID(id, ev, cb); 580 581 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", 582 nid, id, ev, cb); 583 584 if (nid == 0) { 585 /* case 1. */ 586 TRACE_DBG("Case 1\n"); 587 /* update reference counters */ 588 if (stree) 589 stree_dec_ref(store, stree); 590 591 *pstree = NULL; 592 } else if ((nstree = kvs_search(store, nid))) { 593 /* case 2. */ 594 TRACE_DBG("Case 2\n"); 595 /* update reference counters */ 596 if (stree) 597 stree_dec_ref(store, stree); 598 599 nstree->ref_cnt++; 600 *pstree = nstree; 601 } else if (stree) { 602 /* case 3. */ 603 TRACE_DBG("Case 3\n"); 604 bool proceed; 605 tree_node_t *ntree = NULL, *sptr; 606 607 sptr = TREE_PARENT(target, link); 608 #define TREE_IS_ONLY_CHILD(root, x, field) \ 609 (TREE_PARENT((x), field) ? \ 610 TREE_PARENT((x), field)->field.tn_first == (x) && \ 611 TREE_PARENT((x), field)->field.tn_last == (x) : \ 612 (x) == (root) && (x)->field.tn_younger == NULL) 613 while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link)) 614 sptr = TREE_PARENT(sptr, link); 615 616 assert(sptr); 617 /* 'sptr == NULL' means the tree will lose the only callback. 618 * This case should be handled by 'Case 1' */ 619 620 TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) { 621 tree_node_t *ntn, *ptn; 622 623 if (w == sptr) 624 proceed = false; 625 626 if (!(ntn = tree_node_new())) { 627 /* free incomplete ntree */ 628 if (ntree) 629 tree_del_recursive(ntree); 630 return -1; 631 } 632 ntn->ft = w->ft; 633 ntn->cb = w->cb; 634 ntn->ev = w->ev; 635 ntn->arg = w->arg; 636 637 if (TREE_PARENT(w, link)) { 638 assert(ntree); 639 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev); 640 641 assert(ptn); 642 643 TREE_INSERT_CHILD(ptn, ntn, link); 644 if (!IS_FLOATING_EVENT(ntn)) 645 TREE_INSERT_CHILD(ptn, ntn, invk); 646 } else 647 /* this is the root node */ 648 ntree = ntn; 649 } 650 651 nstree = stree_create(nid, ntree); 652 if (nstree == NULL) { 653 TRACE_ERROR("Failed to create stree!\n"); 654 assert(nstree); 655 } 656 657 nstree->ref_cnt = 1; 658 659 kvs_insert(store, nid, nstree); 660 661 /* update reference counters */ 662 stree_dec_ref(store, stree); 663 664 *pstree = nstree; 665 } else /* if (!stree) */ 666 return -1; 667 668 TRACE_DBG("\\>>>>>> end <<<<<</\n"); 669 return 0; 670 } 671 /*----------------------------------------------------------------------------*/ 672 inline int 673 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb) 674 { 675 assert(*pstree || cb); 676 677 /* Event ID starts from 1 */ 678 if (ev == 0) 679 return -1; 680 681 if (cb) 682 return RegCb(store, pstree, ev, cb); 683 else 684 return UnregCb(store, pstree, ev); 685 } 686 /*----------------------------------------------------------------------------*/ 687 void 688 GlobInitEvent(void) 689 { 690 int i; 691 692 if (!(g_evroot = tree_node_new())) { 693 perror("FATAL: Failed to allocate essentials for the system\n"); 694 exit(0); 695 } 696 g_evroot->ev = 0; 697 g_evroot->ft = NULL; 698 g_evroot->cb = NULL; 699 TREE_INIT_NODE(g_evroot, link); 700 TREE_INIT_NODE(g_evroot, invk); 701 /* add it to the dforest hash table */ 702 dforest_store(g_evroot->ev, g_evroot); 703 for (i = 0; i < NUM_BEV; i++) { 704 tree_node_t *ntn; 705 if (!(ntn = tree_node_new())) { 706 perror("FATAL: Failed to allocate essentials for the system\n"); 707 exit(0); 708 } 709 ntn->ev = 1 << i; 710 ntn->ft = NULL; 711 ntn->cb = NULL; 712 TREE_INIT_NODE(ntn, link); 713 TREE_INIT_NODE(ntn, invk); 714 TREE_INSERT_CHILD(g_evroot, ntn, link); 715 TREE_INSERT_CHILD(g_evroot, ntn, invk); 716 /* add it to the dforest hash table */ 717 dforest_store(ntn->ev, ntn); 718 } 719 g_evid = 1 << NUM_BEV; 720 } 721 /*----------------------------------------------------------------------------*/ 722 void 723 InitEvent(mtcp_manager_t mtcp) 724 { 725 mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES); 726 } 727 /*----------------------------------------------------------------------------*/ 728 event_t 729 mtcp_alloc_event(event_t event) 730 { 731 tree_node_t *parent, *new; 732 733 if (!(parent = dforest_search(event))) 734 // if (!(parent = tree_search(g_evroot, event))) 735 return MOS_NULL_EVENT; 736 737 if ((new = tree_node_new()) == NULL) 738 return MOS_NULL_EVENT; 739 740 new->ev = g_evid++; 741 new->ft = NULL; 742 new->cb = NULL; 743 new->arg.arg = NULL; 744 new->arg.len = 0; 745 746 TREE_INIT_NODE(new, link); 747 TREE_INIT_NODE(new, invk); 748 749 TREE_INSERT_CHILD(parent, new, link); 750 751 return new->ev; 752 } 753 /*----------------------------------------------------------------------------*/ 754 event_t 755 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg) 756 { 757 #ifdef DBGMSG 758 __PREPARE_DBGLOGGING(); 759 #endif 760 tree_node_t *parent, *new, *walk; 761 762 /* FIX - TODO. This code needs to have proper locking since this 763 deals with global variables that are shared across multiple 764 threads 765 */ 766 767 if (!filter) 768 return MOS_NULL_EVENT; 769 if (arg && arg->len > 0 && !arg->arg) 770 return MOS_NULL_EVENT; 771 if (!(parent = dforest_search(event))) 772 // if (!(parent = tree_search(g_evroot, event))) 773 return MOS_NULL_EVENT; 774 for (walk = TREE_FIRSTBORN(parent, link); 775 walk; 776 walk = TREE_YOUNGER(walk, link)) 777 if ((walk->ft == filter) && 778 ((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) || 779 (walk->arg.len == arg->len && 780 memcmp(walk->arg.arg, arg->arg, arg->len) == 0))) 781 return walk->ev; 782 783 if ((new = tree_node_new()) == NULL) { 784 errno = ENOMEM; 785 return MOS_NULL_EVENT; 786 } 787 new->ev = g_evid++; 788 new->ft = filter; 789 790 /* add it to the dforest hash table */ 791 dforest_store(new->ev, new); 792 793 if (arg && arg->arg && arg->len > 0) { 794 new->arg.arg = malloc(arg->len); 795 if (new->arg.arg == NULL) { 796 errno = ENOMEM; 797 g_evid--; 798 free(new); 799 return MOS_NULL_EVENT; 800 } 801 memcpy(new->arg.arg, arg->arg, arg->len); 802 new->arg.len = arg->len; 803 } 804 805 TRACE_DBG("parent: %ld\n", parent->ev); 806 TREE_INSERT_CHILD(parent, new, link); 807 TREE_INSERT_CHILD(parent, new, invk); 808 809 return new->ev; 810 } 811 /*----------------------------------------------------------------------------*/ 812 #define EVENT_STACK_PUSH(s, var) \ 813 (assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \ 814 *(++((s)->sp)) = (var)) 815 #define EVENT_STACK_POP(s) \ 816 (*(((s)->sp)--)) 817 #define EVENT_STACK_CLR(s) \ 818 ((s)->stack[0] = NULL, (s)->sp = (s)->stack) 819 820 inline int 821 RaiseEv(kvs_t *store, event_t event) 822 { 823 tree_node_t *ev = NULL; 824 #if 1 825 struct { 826 event_t ev; 827 uint64_t sid; 828 } instance; 829 830 instance.ev = event; 831 instance.sid = g_cur_stree->id; 832 833 _key_t key = XXH64(&instance, sizeof(instance), XXHSEED); 834 ev = (tree_node_t *)kvs_search(store, key); 835 #else 836 tree_node_t *walk; 837 for (walk = TREE_FIRSTBORN(g_cur_ev, link); 838 walk; 839 walk = TREE_YOUNGER(walk, link)) { 840 if (walk->ev == event && IS_FLOATING_EVENT(walk)) { 841 ev = walk; 842 break; 843 } 844 } 845 #endif 846 847 if (!ev) 848 return -1; 849 850 if (!ev->is_in_raiseq) { 851 ev->is_in_raiseq = true; 852 EVENT_STACK_PUSH(&g_evstack, ev); 853 } 854 855 return 0; 856 } 857 /*----------------------------------------------------------------------------*/ 858 int 859 mtcp_raise_event(mctx_t mctx, event_t event) 860 { 861 mtcp_manager_t mtcp = GetMTCPManager(mctx); 862 if (!mtcp) 863 return -1; 864 865 return RaiseEv(mtcp->ev_store, event); 866 } 867 /*----------------------------------------------------------------------------*/ 868 #define PRINT_EV(event, field) \ 869 printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \ 870 "last: %ld, parent: %ld, ft: %p, cb: %p\n", \ 871 #field, (event), (event)->ev, \ 872 (event)->field.tn_older ? (event)->field.tn_older->ev : -1, \ 873 (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \ 874 (event)->field.tn_first ? (event)->field.tn_first->ev : -1, \ 875 (event)->field.tn_last ? (event)->field.tn_last->ev : -1, \ 876 (event)->field.tn_parent ? (event)->field.tn_parent->ev : -1, \ 877 (event)->ft, (event)->cb) 878 inline void 879 HandleCb(mctx_t const mctx, int sock, int side, 880 stree_t * const stree, event_t events) 881 { 882 int i; 883 bool proceed; 884 tree_node_t *walk, *bev, *ev; 885 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue; 886 assert(stree); 887 888 g_cur_stree = stree; 889 890 for (i = 0; i < NUM_BEV; i++) { 891 if (!((1L << i) & events)) 892 continue; 893 894 if (!(bev = stree->bevs[i])) 895 continue; 896 897 EVENT_STACK_CLR(&g_evstack); 898 tree_node_t **ptr = g_evstack.sp; 899 TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) { 900 //PRINT_EV(walk, link); 901 //PRINT_EV(walk, invk); 902 903 g_cur_ev = walk; 904 905 if (walk != bev && walk->ft && 906 walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) { 907 ptr = g_evstack.sp; 908 proceed = false; 909 continue; 910 } 911 912 if (walk->cb) 913 walk->cb(mctx, sock, side, walk->ev, &walk->arg); 914 915 while (ptr < g_evstack.sp) { 916 ptr++; 917 TREE_INSERT_CHILD(walk, *ptr, invk); 918 } 919 } 920 g_cur_ev = NULL; 921 922 while ((ev = EVENT_STACK_POP(&g_evstack))) { 923 ev->is_in_raiseq = false; 924 TREE_DETACH(ev, invk); 925 } 926 927 if (!(events &= ~(1L << i))) { 928 g_cur_stree = NULL; 929 return; 930 } 931 } 932 933 g_cur_stree = NULL; 934 } 935 /*----------------------------------------------------------------------------*/ 936 int 937 mtcp_register_callback(mctx_t mctx, int sockid, event_t event, 938 int hook_point, callback_t callback) 939 { 940 socket_map_t socket; 941 stree_t **pstree; 942 943 if (event == MOS_NULL_EVENT || 944 (event < (1L << NUM_BEV) && !POWER_OF_TWO(event))) 945 return -1; 946 947 mtcp_manager_t mtcp = GetMTCPManager(mctx); 948 if (!mtcp) 949 return -1; 950 951 socket = &mtcp->msmap[sockid]; 952 953 if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) { 954 if (hook_point == MOS_NULL) 955 pstree = &socket->monitor_stream->stree_dontcare; 956 else if (hook_point == MOS_HK_RCV) 957 pstree = &socket->monitor_stream->stree_pre_rcv; 958 else if (hook_point == MOS_HK_SND) 959 pstree = &socket->monitor_stream->stree_post_snd; 960 else 961 return -1; 962 963 } else if (socket->socktype == MOS_SOCK_MONITOR_STREAM 964 || socket->socktype == MOS_SOCK_MONITOR_RAW) { 965 if (hook_point == MOS_NULL) 966 pstree = &socket->monitor_listener->stree_dontcare; 967 else if (hook_point == MOS_HK_RCV) 968 pstree = &socket->monitor_listener->stree_pre_rcv; 969 else if (hook_point == MOS_HK_SND) 970 pstree = &socket->monitor_listener->stree_post_snd; 971 else 972 return -1; 973 } else { 974 errno = EINVAL; 975 return -1; 976 } 977 978 return ModCb(mtcp->ev_store, pstree, event, callback); 979 } 980 /*----------------------------------------------------------------------------*/ 981 int 982 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event, 983 int hook_point) 984 { 985 return mtcp_register_callback(mctx, sockid, event, hook_point, NULL); 986 } 987 /*----------------------------------------------------------------------------*/ 988 inline void 989 HandleCallback(mtcp_manager_t mtcp, uint32_t hook, 990 socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events) 991 { 992 struct sfbpf_program fcode; 993 994 if (!socket) 995 return; 996 997 if (!events) 998 return; 999 assert(events); 1000 1001 /* if client side monitoring is disabled, then skip */ 1002 if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0) 1003 return; 1004 /* if server side monitoring is disabled, then skip */ 1005 else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0) 1006 return; 1007 1008 #define MSTRM(sock) (sock)->monitor_stream 1009 #define MLSNR(sock) (sock)->monitor_listener 1010 /* We use `?:` notation instead of `if/else` to make `evp` as const */ 1011 stree_t * const stree = 1012 (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ? 1013 ((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv : 1014 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd : 1015 MSTRM(socket)->stree_dontcare) 1016 : (socket->socktype == MOS_SOCK_MONITOR_STREAM) 1017 || (socket->socktype == MOS_SOCK_MONITOR_RAW) ? 1018 ((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv : 1019 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd : 1020 MLSNR(socket)->stree_dontcare) : 1021 NULL; 1022 1023 if (!stree) 1024 return; 1025 1026 /* mtcp_bind_monitor_filter() 1027 - BPF filter is evaluated only for RAW socket and PASSIVE 1028 socket (= orphan filter) 1029 - stream syn filter is moved to and evaluated on socket creation 1030 */ 1031 if (socket->socktype == MOS_SOCK_MONITOR_STREAM) { 1032 fcode = MLSNR(socket)->stream_orphan_fcode; 1033 /* if not match with filter, return */ 1034 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode, 1035 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr), 1036 pctx->p.ip_len + sizeof(struct ethhdr)) == 0) 1037 return; 1038 } 1039 if (socket->socktype == MOS_SOCK_MONITOR_RAW) { 1040 fcode = MLSNR(socket)->raw_pkt_fcode; 1041 /* if not match with filter, return */ 1042 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode, 1043 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr), 1044 pctx->p.ip_len + sizeof(struct ethhdr)) == 0) 1045 return; 1046 } 1047 1048 mctx_t const mctx = g_ctx[mtcp->ctx->cpu]; 1049 mtcp->pctx = pctx; /* for mtcp_getcurpkt() */ 1050 1051 HandleCb(mctx, socket->id, side, stree, events); 1052 } 1053 /*----------------------------------------------------------------------------*/ 1054 #endif 1055