1 #ifdef NEWEV 2 3 /* ---------------------------------------------------------------------------- 4 * Please see 4 Jul. 2015 meeting slides for conceptual explanation. 5 * 'Subtree' has special meaning and its 'ID' is very important concept for 6 * understanding this code. 7 * 8 * Please contact Donghwi Kim for further questions, 9 * via e-mail: [email protected] 10 */ 11 #include "debug.h" 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <tree.h> 16 #include <key_value_store.h> 17 #include "scalable_event.h" 18 19 #include "mtcp.h" 20 #include "mos_api.h" 21 #include "mtcp_api.h" 22 23 #define NEWHASH 24 #ifdef NEWHASH 25 #include "mtcp_util.h" 26 #define XXHSEED 18446744073709551557ULL 27 #endif 28 /*----------------------------------------------------------------------------*/ 29 #define POWER_OF_TWO(x) (!((x - 1) & (x))) 30 31 #define MAX_EVENTS 30240 32 #define KVS_BUCKETS 1024 33 #define KVS_ENTRIES 102400 34 #define MAX_DEPTH MAX_EVENTS 35 #define NEWID(id, ev, cb) (id ^ hash64(ev, cb)) 36 #define IS_UDE(e) ((e)->ev >= (1 << NUM_BEV)) 37 #define IS_FLOATING_EVENT(e) (!(e)->ft && IS_UDE(e)) 38 39 struct _tree_node_t *g_evroot; 40 event_t g_evid; 41 42 static __thread stree_t *g_cur_stree = NULL; 43 static __thread tree_node_t *g_cur_ev = NULL; 44 45 static __thread struct { 46 struct _tree_node_t *stack[MAX_EVENTS]; 47 struct _tree_node_t **sp; 48 } g_evstack; 49 50 #define free(x) do { \ 51 assert(x); \ 52 free(x); \ 53 } while (0) 54 55 /*----------------------------------------------------------------------------*/ 56 struct devent { 57 event_t ev; /* event id = key */ 58 tree_node_t *ptr; /* ptr to the event node in the d-forest = value */ 59 struct devent *next; /* next pointer */ 60 }; 61 #define MAX_BUCKET 2048 62 #define HASHVAL(x) ((x) & (MAX_BUCKET-1)) 63 struct devent* g_dfTable[MAX_BUCKET] = {0}; /* d-forest hash table */ 64 /*----------------------------------------------------------------------------*/ 65 /* simple hash table implementation */ 66 static bool 67 dforest_store(event_t ev, tree_node_t* ptr) 68 { 69 struct devent *de, *p; 70 int idx; 71 72 if ((de = malloc(sizeof(*de))) == NULL) { 73 assert(0); /* should not happen! */ 74 return false; 75 } 76 de->ev = ev; 77 de->ptr = ptr; 78 de->next = NULL; 79 80 idx = HASHVAL(ev); 81 if (g_dfTable[idx] == NULL) { 82 g_dfTable[idx] = de; 83 } 84 else { 85 p = g_dfTable[idx]; 86 while (p->next) 87 p = p->next; 88 p->next = de; 89 } 90 return true; 91 } 92 /*----------------------------------------------------------------------------*/ 93 static tree_node_t * 94 dforest_search(event_t ev) 95 { 96 int idx; 97 struct devent *p; 98 99 idx = HASHVAL(ev); 100 if (g_dfTable[idx] == NULL) 101 return NULL; 102 103 for (p = g_dfTable[idx]; 104 p != NULL; 105 p = p->next) { 106 if (p->ev == ev) 107 return p->ptr; 108 } 109 return NULL; 110 } 111 /*----------------------------------------------------------------------------*/ 112 /** Create a new tree node 113 * @return the new tree node 114 * */ 115 inline tree_node_t * 116 tree_node_new() 117 { 118 return calloc(1, sizeof(tree_node_t)); 119 } 120 /*----------------------------------------------------------------------------*/ 121 /** Delete a tree node */ 122 inline void 123 tree_node_del(tree_node_t *node) 124 { 125 free(node); 126 } 127 /*----------------------------------------------------------------------------*/ 128 /** Delete a tree node and all of its descendants 129 * @param [in] node: target node 130 * */ 131 inline void 132 tree_del_recursive(tree_node_t *node) 133 { 134 struct _tree_node_t *walk = NULL, *del = NULL; 135 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 136 137 TREE_DFS_FOREACH(walk, node, &stack, link) { 138 if (del) 139 tree_node_del(del); 140 del = walk; 141 } 142 if (del) 143 tree_node_del(del); 144 } 145 /*----------------------------------------------------------------------------*/ 146 /** Search a tree node by event ID 147 * @param [in] root: root node 148 * @param [in] ev: event ID 149 * @return target node 150 * 151 * The search is in DFS mannar 152 * */ 153 inline tree_node_t * 154 tree_search(tree_node_t *root, event_t ev) 155 { 156 struct _tree_node_t *walk = NULL; 157 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 158 159 TREE_DFS_FOREACH(walk, root, &stack, link) 160 if (walk->ev == ev) { 161 return walk; 162 } 163 return NULL; 164 } 165 /*----------------------------------------------------------------------------*/ 166 /** Create a new subtree 167 * @param [in] id: subtree ID 168 * @param [in] root: root of the subtree 169 * @return new subtree 170 * 171 * Allocate and initialize subtree structure, stree_t: 172 * 1. Allocate the structure. 173 * 2. Initialize reference counter and fill ID field. 174 * 3. Set 'root' pointer which points the root node of a tree. 175 * The tree should contain all required tree nodes. 176 * 4. Set built-in event tree node pointers. 177 * */ 178 inline stree_t * 179 stree_create(uint64_t id, tree_node_t *root) 180 { 181 tree_node_t *bev; 182 stree_t *nstree = malloc(sizeof(stree_t)); 183 if (!nstree) 184 return NULL; 185 186 nstree->ref_cnt = 0; 187 nstree->id = id; 188 nstree->root = root; 189 memset(nstree->bevs, 0, sizeof(nstree->bevs)); 190 191 for (bev = TREE_FIRSTBORN(root, link); bev; bev = TREE_YOUNGER(bev, link)) { 192 int idx = 0; 193 while (bev->ev >> (idx + 1)) 194 idx++; 195 nstree->bevs[idx] = bev; 196 } 197 198 return nstree; 199 } 200 /*----------------------------------------------------------------------------*/ 201 /** Destroy the subtree 202 * @param [in] stree: the target subtree */ 203 inline void 204 stree_destroy(kvs_t *store, stree_t *stree) 205 { 206 tree_del_recursive(stree->root); 207 free(stree); 208 } 209 /*----------------------------------------------------------------------------*/ 210 /** Increase reference counter of the subtree 211 * @param [in] stree: the target subtree */ 212 inline void 213 stree_inc_ref(stree_t *stree) 214 { 215 stree->ref_cnt++; 216 } 217 /*----------------------------------------------------------------------------*/ 218 /** Decrease reference counter of the subtree, and remove it from the key- 219 * value-store if needed 220 * @param [in] store: the key-value-store 221 * @param [in] stree: the target subtree 222 * 223 * In addition to decreasing the reference counter, remove the subtree from 224 * key-value-store if decreased reference counter is zero. 225 * */ 226 inline void 227 stree_dec_ref(kvs_t *store, stree_t *stree) 228 { 229 if (stree && !--stree->ref_cnt) { 230 kvs_remove(store, stree->id); 231 stree_destroy(store, stree); 232 } 233 } 234 /*----------------------------------------------------------------------------*/ 235 /** Hash the event ID and callback pair 236 * @param [in] ev: the event ID 237 * @param [in] cb: the callback function pointer 238 * @return hash value 239 * 240 * TODO: Replace 32-bit Jenkins hash with better one. 241 * */ 242 inline static uint64_t 243 hash64(event_t ev, callback_t cb) 244 { 245 #ifdef NEWHASH 246 struct { 247 event_t ev; 248 callback_t cb; 249 } instance; 250 251 instance.ev = ev; 252 instance.cb = cb; 253 254 return XXH64(&instance, sizeof(event_t) + sizeof(callback_t), XXHSEED); 255 #else 256 uint64_t hash = 0; 257 int i; 258 259 for (i = 0; i < sizeof(ev); ++i) { 260 hash += ((uint8_t *)&ev)[i]; 261 hash += (hash << 10); 262 hash ^= (hash >> 6); 263 } 264 for (i = 0; i < sizeof(cb); ++i) { 265 hash += ((uint8_t *)&cb)[i]; 266 hash += (hash << 10); 267 hash ^= (hash >> 6); 268 } 269 hash += (hash << 3); 270 hash ^= (hash >> 11); 271 hash += (hash << 15); 272 273 return hash; 274 #endif 275 } 276 277 /*-------------------------------------------------------------------------*/ 278 /* Create a new tree that consists of a tree node (w) with a new 279 * callback and take a snapshot of the subtree from w all the way up to 280 * the root node by traversing the ancestors of w. 281 */ 282 /*-------------------------------------------------------------------------*/ 283 static tree_node_t* 284 create_spine(tree_node_t* w, callback_t cb) 285 { 286 tree_node_t *ntn, *ntree = NULL; 287 288 if ((ntn = tree_node_new()) == NULL) 289 return NULL; 290 291 /* the leaf event node with callback function */ 292 ntn->ft = w->ft; 293 ntn->cb = cb; 294 ntn->ev = w->ev; 295 ntn->arg = w->arg; 296 ntree = ntn; /* we start from the leaf */ 297 298 do { 299 if (!(ntn = tree_node_new())) { 300 if (ntree) 301 tree_del_recursive(ntree); 302 return NULL; 303 } 304 ntn->ft = w->ft; 305 ntn->ev = w->ev; 306 ntn->arg = w->arg; 307 308 TREE_INSERT_CHILD(ntn, ntree, link); 309 if (!IS_FLOATING_EVENT(ntree)) 310 TREE_INSERT_CHILD(ntn, ntree, invk); 311 312 /* ntn becomes the new tree */ 313 ntree = ntn; 314 } while ((w = TREE_PARENT(w, link))); 315 316 return ntree; 317 } 318 #if 0 319 /*----------------------------------------------------------------------------*/ 320 static stree_t * 321 clone_stree(stree_t * stree) 322 { 323 stree_t *newstree, *walk; 324 tree_node_t *newtree; 325 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 326 struct _tree_node_t *walk = NULL, *del = NULL; 327 328 /* clone the tree */ 329 TREE_DFS_FOREACH(walk, stree->root, &stack, link) { 330 new_tree_node_new() 331 if (del) 332 tree_node_del(del); 333 del = walk; 334 } 335 if (del) 336 tree_node_del(del); 337 338 339 TREE_DFS_FOREACH(w, stree->root, &stack, link) { 340 tree_node_t *ntn, *ptn = NULL; 341 } 342 } 343 #endif 344 /*----------------------------------------------------------------------------*/ 345 /** Register a callback 346 * @param [in] store: the key-value-store for subtrees 347 * @param [in, out] pstree: the target subtree pointer 348 * @param [in] ev: the target event 349 * @param [in] cb: the callback to be registered 350 * @return 0 for success, -1 for failure 351 * 352 * For callback registeration, there can be some cases. 353 * 1. A new tree what we want is already there, so we can use it by simply 354 * updating reference counters. 355 * 2. A new tree what we want is not there, so we are going to create a 356 * new subtree. The new subtree will not only have new callback, but also 357 * inherit previous subtree. 358 * 2.1. The reference counter of previous subtree is 1, so we can avoid 359 * new tree allocation by reusing tree nodes of previous subtree. 360 * 2.2. The reference counter of previous subtree is not 1, so we should 361 * copy previous subtree's tree nodes. 362 * 3. Previously, there was no subtree (in other words, this is the first 363 * callback registeration) so we are going to make a new subtree which 364 * has only one callback 365 * */ 366 inline int 367 RegCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb) 368 { 369 #ifdef DBGMSG 370 __PREPARE_DBGLOGGING(); 371 #endif 372 TRACE_DBG("/>>>>>> start <<<<<<\\\n"); 373 stree_t *stree, *nstree; 374 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 375 tree_node_t *w, *w2, *ntree; 376 uint64_t id = 0, nid; 377 378 if ((stree = *pstree)) { 379 /* XXX: tree_search() is heavy operation, but it is used for only error 380 * checking. Is there any way to avoid this? 381 * --> We can have a key-value-store for this, but is it too much? */ 382 #if 0 383 if (tree_search(stree->root, ev)) 384 /* Illegal trial of overwriting a previously registerd callback */ 385 return -1; 386 #endif 387 388 id = stree->id; 389 } 390 391 /* calculate new subtree ID */ 392 nid = NEWID(id, ev, cb); 393 394 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", nid, id, ev, cb); 395 396 if ((nstree = kvs_search(store, nid))) { /* case 1. */ 397 TRACE_DBG("Case 1\n"); 398 /* update reference counters */ 399 if (stree) 400 stree_dec_ref(store, stree); 401 402 nstree->ref_cnt++; 403 *pstree = nstree; 404 #if 0 405 fprintf(stderr, "sizeof(tcp_stream)=%ld sizeof(tcp_send_vars)=%ld" 406 "sizeof(tcp_recv_vars)=%ld\n", 407 sizeof(tcp_stream), 408 sizeof(struct tcp_send_vars), sizeof(struct tcp_recv_vars)); 409 #endif 410 return 0; 411 } 412 413 /* find the event node with ev and take a snapshot of the ancestors */ 414 w = dforest_search(ev); 415 // w = tree_search(g_evroot, ev); 416 assert(w); 417 if ((ntree = create_spine(w, cb)) == NULL) 418 return (-1); 419 420 if (stree) { /* case 2. */ 421 TRACE_DBG("Case 2\n"); 422 tree_node_t *sptr; 423 424 /* attach flesh: Inherit previous subtree */ 425 sptr = ntree; 426 427 #if 0 428 /* new code */ 429 if (stree->ref_cnt > 1) { 430 /* take a snapshot of the entire forest */ 431 curf = clone_tree(stree); 432 curf->root->ref_cnt = 1; 433 stree->ref_cnt--; 434 } else { 435 assert(stree->ref_cnt == 1); 436 curf = stree; 437 /* remove the current id from hash table */ 438 kvs_remove(store, stree->id); 439 } 440 441 /* merge sptr into curf->root (existing tree) */ 442 #endif 443 444 if (stree->ref_cnt == 1) { 445 /* case 2.1. */ 446 TRACE_DBG("Case 2.1\n"); 447 w = stree->root; 448 while (w) { 449 tree_node_t *next_walk = NULL, *next_walk2 = NULL, 450 *next_sptr = TREE_FIRSTBORN(sptr, link); 451 452 for (w2 = TREE_FIRSTBORN(w, link); 453 w2 != NULL; 454 w2 = next_walk2) { 455 next_walk2 = TREE_YOUNGER(w2, link); 456 if (sptr && w2->ev == next_sptr->ev) { 457 assert(next_walk == NULL); 458 next_walk = w2; 459 if (next_sptr->cb != next_walk->cb) 460 next_sptr->cb = next_walk->cb; 461 } 462 else { 463 TREE_DETACH(w2, link); 464 if (!IS_FLOATING_EVENT(w2)) 465 TREE_DETACH(w2, invk); 466 if (next_walk) { 467 TREE_INSERT_CHILD(sptr, w2, link); 468 if (!IS_FLOATING_EVENT(w2)) 469 TREE_INSERT_CHILD(sptr, w2, invk); 470 } else { 471 tree_node_t *f = TREE_FIRSTBORN(sptr, link); 472 TREE_INSERT_OLDER(f, w2, link); 473 if (!IS_FLOATING_EVENT(w2)) { 474 if ((f = TREE_FIRSTBORN(sptr, invk))) 475 TREE_INSERT_OLDER(f, w2, invk); 476 else 477 TREE_INSERT_CHILD(sptr, w2, invk); 478 } 479 } 480 } 481 } 482 w = next_walk; 483 sptr = next_sptr; 484 next_walk = NULL; 485 } 486 } else { // stree->ref_count != 1 487 /* case 2.2. */ 488 TRACE_DBG("Case 2.2\n"); 489 TREE_DFS_FOREACH(w, stree->root, &stack, link) { 490 tree_node_t *ntn, *ptn = NULL; 491 492 if (sptr && w->ev == sptr->ev) { 493 if (w->cb != sptr->cb) 494 sptr->cb = w->cb; 495 sptr = TREE_FIRSTBORN(sptr, link); 496 continue; 497 } 498 499 if (!(ntn = tree_node_new())) { 500 if (ntree) 501 tree_del_recursive(ntree); 502 return -1; 503 } 504 ntn->ft = w->ft; 505 ntn->cb = w->cb; 506 ntn->ev = w->ev; 507 ntn->arg = w->arg; 508 509 if (TREE_PARENT(w, link)) { 510 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev); 511 assert(ptn); 512 TREE_INSERT_CHILD(ptn, ntn, link); 513 if (!IS_FLOATING_EVENT(ntn)) 514 TREE_INSERT_CHILD(ptn, ntn, invk); 515 } else { 516 if (ntree) 517 tree_del_recursive(ntree); 518 free(ntn); 519 TRACE_ERROR("Can't find parent\n"); 520 assert(0); 521 exit(EXIT_FAILURE); 522 } 523 } 524 } 525 } 526 527 /* case 3 if (stree == NULL) */ 528 nstree = stree_create(nid, ntree); 529 if (nstree == NULL) { 530 TRACE_ERROR("Failed to create stree!\n"); 531 assert(nstree); 532 exit(EXIT_FAILURE); 533 } 534 nstree->ref_cnt = 1; 535 kvs_insert(store, nid, nstree); 536 /* update reference counters */ 537 if (stree) 538 stree_dec_ref(store, stree); 539 *pstree = nstree; 540 541 TRACE_DBG("\\>>>>>> end <<<<<</\n"); 542 return 0; 543 } 544 /*----------------------------------------------------------------------------*/ 545 /** Unregister a callback 546 * @param [in] store: the key-value-store for subtrees 547 * @param [in, out] pstree: the target subtree pointer 548 * @param [in] ev: the target event 549 * @return 0 for success, -1 for failure 550 * 551 * For callback unregisteration, there can be some cases. 552 * 1. No registered callback will be left after this unregistration. 553 * 2. A new tree what we want is already there, so we can use it by simply 554 * updating reference counters. 555 * 3. A new tree what we want is not there, so we create a new subtree by 556 * copying a part of previous subtree 557 * */ 558 inline int 559 UnregCb(kvs_t *store, stree_t **pstree, event_t ev) 560 { 561 #ifdef DBGMSG 562 __PREPARE_DBGLOGGING(); 563 #endif 564 TRACE_DBG("/>>>>>> start <<<<<<\\\n"); 565 stree_t *stree, *nstree; 566 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) stack; 567 tree_node_t *w, *target; 568 uint64_t id = 0, nid; 569 callback_t cb; 570 571 if (!(stree = *pstree) || !(target = tree_search(stree->root, ev)) || 572 !(cb = target->cb)) 573 /* Illegal trial of unregistering a callback which is never registered 574 * before. */ 575 return -1; 576 577 id = stree->id; 578 579 /* Calculate new subtree ID */ 580 nid = NEWID(id, ev, cb); 581 582 TRACE_DBG("nid: 0x%lX, id: 0x%lx, ev: %ld, cb: %p\n", 583 nid, id, ev, cb); 584 585 if (nid == 0) { 586 /* case 1. */ 587 TRACE_DBG("Case 1\n"); 588 /* update reference counters */ 589 if (stree) 590 stree_dec_ref(store, stree); 591 592 *pstree = NULL; 593 } else if ((nstree = kvs_search(store, nid))) { 594 /* case 2. */ 595 TRACE_DBG("Case 2\n"); 596 /* update reference counters */ 597 if (stree) 598 stree_dec_ref(store, stree); 599 600 nstree->ref_cnt++; 601 *pstree = nstree; 602 } else if (stree) { 603 /* case 3. */ 604 TRACE_DBG("Case 3\n"); 605 bool proceed; 606 tree_node_t *ntree = NULL, *sptr; 607 608 sptr = TREE_PARENT(target, link); 609 #define TREE_IS_ONLY_CHILD(root, x, field) \ 610 (TREE_PARENT((x), field) ? \ 611 TREE_PARENT((x), field)->field.tn_first == (x) && \ 612 TREE_PARENT((x), field)->field.tn_last == (x) : \ 613 (x) == (root) && (x)->field.tn_younger == NULL) 614 while (sptr && !sptr->cb && TREE_IS_ONLY_CHILD(stree->root, sptr, link)) 615 sptr = TREE_PARENT(sptr, link); 616 617 assert(sptr); 618 /* 'sptr == NULL' means the tree will lose the only callback. 619 * This case should be handled by 'Case 1' */ 620 621 TREE_DFS_FOREACH_SELECTIVE(w, stree->root, &stack, link, proceed) { 622 tree_node_t *ntn, *ptn; 623 624 if (w == sptr) 625 proceed = false; 626 627 if (!(ntn = tree_node_new())) { 628 /* free incomplete ntree */ 629 if (ntree) 630 tree_del_recursive(ntree); 631 return -1; 632 } 633 ntn->ft = w->ft; 634 ntn->cb = w->cb; 635 ntn->ev = w->ev; 636 ntn->arg = w->arg; 637 638 if (TREE_PARENT(w, link)) { 639 assert(ntree); 640 ptn = tree_search(ntree, TREE_PARENT(w, link)->ev); 641 642 assert(ptn); 643 644 TREE_INSERT_CHILD(ptn, ntn, link); 645 if (!IS_FLOATING_EVENT(ntn)) 646 TREE_INSERT_CHILD(ptn, ntn, invk); 647 } else 648 /* this is the root node */ 649 ntree = ntn; 650 } 651 652 nstree = stree_create(nid, ntree); 653 if (nstree == NULL) { 654 TRACE_ERROR("Failed to create stree!\n"); 655 assert(nstree); 656 exit(EXIT_FAILURE); 657 } 658 659 nstree->ref_cnt = 1; 660 661 kvs_insert(store, nid, nstree); 662 663 /* update reference counters */ 664 stree_dec_ref(store, stree); 665 666 *pstree = nstree; 667 } else /* if (!stree) */ 668 return -1; 669 670 TRACE_DBG("\\>>>>>> end <<<<<</\n"); 671 return 0; 672 } 673 /*----------------------------------------------------------------------------*/ 674 inline int 675 ModCb(kvs_t *store, stree_t **pstree, event_t ev, callback_t cb) 676 { 677 assert(*pstree || cb); 678 679 /* Event ID starts from 1 */ 680 if (ev == 0) 681 return -1; 682 683 if (cb) 684 return RegCb(store, pstree, ev, cb); 685 else 686 return UnregCb(store, pstree, ev); 687 } 688 /*----------------------------------------------------------------------------*/ 689 void 690 GlobInitEvent(void) 691 { 692 int i; 693 694 if (!(g_evroot = tree_node_new())) { 695 perror("FATAL: Failed to allocate essentials for the system\n"); 696 exit(0); 697 } 698 g_evroot->ev = 0; 699 g_evroot->ft = NULL; 700 g_evroot->cb = NULL; 701 TREE_INIT_NODE(g_evroot, link); 702 TREE_INIT_NODE(g_evroot, invk); 703 /* add it to the dforest hash table */ 704 dforest_store(g_evroot->ev, g_evroot); 705 for (i = 0; i < NUM_BEV; i++) { 706 tree_node_t *ntn; 707 if (!(ntn = tree_node_new())) { 708 perror("FATAL: Failed to allocate essentials for the system\n"); 709 exit(0); 710 } 711 ntn->ev = 1 << i; 712 ntn->ft = NULL; 713 ntn->cb = NULL; 714 TREE_INIT_NODE(ntn, link); 715 TREE_INIT_NODE(ntn, invk); 716 TREE_INSERT_CHILD(g_evroot, ntn, link); 717 TREE_INSERT_CHILD(g_evroot, ntn, invk); 718 /* add it to the dforest hash table */ 719 dforest_store(ntn->ev, ntn); 720 } 721 g_evid = 1 << NUM_BEV; 722 } 723 /*----------------------------------------------------------------------------*/ 724 void 725 InitEvent(mtcp_manager_t mtcp) 726 { 727 mtcp->ev_store = kvs_create(KVS_BUCKETS, KVS_ENTRIES); 728 } 729 /*----------------------------------------------------------------------------*/ 730 event_t 731 mtcp_alloc_event(event_t event) 732 { 733 tree_node_t *parent, *new; 734 735 if (!(parent = dforest_search(event))) 736 // if (!(parent = tree_search(g_evroot, event))) 737 return MOS_NULL_EVENT; 738 739 if ((new = tree_node_new()) == NULL) 740 return MOS_NULL_EVENT; 741 742 new->ev = g_evid++; 743 new->ft = NULL; 744 new->cb = NULL; 745 new->arg.arg = NULL; 746 new->arg.len = 0; 747 748 TREE_INIT_NODE(new, link); 749 TREE_INIT_NODE(new, invk); 750 751 TREE_INSERT_CHILD(parent, new, link); 752 753 return new->ev; 754 } 755 /*----------------------------------------------------------------------------*/ 756 event_t 757 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg) 758 { 759 #ifdef DBGMSG 760 __PREPARE_DBGLOGGING(); 761 #endif 762 tree_node_t *parent, *new, *walk; 763 764 /* FIX - TODO. This code needs to have proper locking since this 765 deals with global variables that are shared across multiple 766 threads 767 */ 768 769 if (!filter) 770 return MOS_NULL_EVENT; 771 if (arg && arg->len > 0 && !arg->arg) 772 return MOS_NULL_EVENT; 773 if (!(parent = dforest_search(event))) 774 // if (!(parent = tree_search(g_evroot, event))) 775 return MOS_NULL_EVENT; 776 for (walk = TREE_FIRSTBORN(parent, link); 777 walk; 778 walk = TREE_YOUNGER(walk, link)) 779 if ((walk->ft == filter) && 780 ((walk->arg.arg == NULL && (!arg || !arg->arg || arg->len <= 0)) || 781 (walk->arg.len == arg->len && 782 memcmp(walk->arg.arg, arg->arg, arg->len) == 0))) 783 return walk->ev; 784 785 if ((new = tree_node_new()) == NULL) { 786 errno = ENOMEM; 787 return MOS_NULL_EVENT; 788 } 789 new->ev = g_evid++; 790 new->ft = filter; 791 792 /* add it to the dforest hash table */ 793 dforest_store(new->ev, new); 794 795 if (arg && arg->arg && arg->len > 0) { 796 new->arg.arg = malloc(arg->len); 797 if (new->arg.arg == NULL) { 798 errno = ENOMEM; 799 g_evid--; 800 free(new); 801 return MOS_NULL_EVENT; 802 } 803 memcpy(new->arg.arg, arg->arg, arg->len); 804 new->arg.len = arg->len; 805 } 806 807 TRACE_DBG("parent: %ld\n", parent->ev); 808 TREE_INSERT_CHILD(parent, new, link); 809 TREE_INSERT_CHILD(parent, new, invk); 810 811 return new->ev; 812 } 813 /*----------------------------------------------------------------------------*/ 814 #define EVENT_STACK_PUSH(s, var) \ 815 (assert(&(s)->stack[sizeof((s)->stack) / sizeof(void *)] > (s)->sp), \ 816 *(++((s)->sp)) = (var)) 817 #define EVENT_STACK_POP(s) \ 818 (*(((s)->sp)--)) 819 #define EVENT_STACK_CLR(s) \ 820 ((s)->stack[0] = NULL, (s)->sp = (s)->stack) 821 822 inline int 823 RaiseEv(kvs_t *store, event_t event) 824 { 825 tree_node_t *ev = NULL; 826 #if 1 827 struct { 828 event_t ev; 829 uint64_t sid; 830 } instance; 831 832 instance.ev = event; 833 instance.sid = g_cur_stree->id; 834 835 _key_t key = XXH64(&instance, sizeof(instance), XXHSEED); 836 ev = (tree_node_t *)kvs_search(store, key); 837 #else 838 tree_node_t *walk; 839 for (walk = TREE_FIRSTBORN(g_cur_ev, link); 840 walk; 841 walk = TREE_YOUNGER(walk, link)) { 842 if (walk->ev == event && IS_FLOATING_EVENT(walk)) { 843 ev = walk; 844 break; 845 } 846 } 847 #endif 848 849 if (!ev) 850 return -1; 851 852 if (!ev->is_in_raiseq) { 853 ev->is_in_raiseq = true; 854 EVENT_STACK_PUSH(&g_evstack, ev); 855 } 856 857 return 0; 858 } 859 /*----------------------------------------------------------------------------*/ 860 int 861 mtcp_raise_event(mctx_t mctx, event_t event) 862 { 863 mtcp_manager_t mtcp = GetMTCPManager(mctx); 864 if (!mtcp) 865 return -1; 866 867 return RaiseEv(mtcp->ev_store, event); 868 } 869 /*----------------------------------------------------------------------------*/ 870 #define PRINT_EV(event, field) \ 871 printf("[%s](node: %p) ev: %ld, older: %ld, younger: %ld, child: %ld, " \ 872 "last: %ld, parent: %ld, ft: %p, cb: %p\n", \ 873 #field, (event), (event)->ev, \ 874 (event)->field.tn_older ? (event)->field.tn_older->ev : -1, \ 875 (event)->field.tn_younger ? (event)->field.tn_younger->ev : -1, \ 876 (event)->field.tn_first ? (event)->field.tn_first->ev : -1, \ 877 (event)->field.tn_last ? (event)->field.tn_last->ev : -1, \ 878 (event)->field.tn_parent ? (event)->field.tn_parent->ev : -1, \ 879 (event)->ft, (event)->cb) 880 inline void 881 HandleCb(mctx_t const mctx, int sock, int side, 882 stree_t * const stree, event_t events) 883 { 884 int i; 885 bool proceed; 886 tree_node_t *walk, *bev, *ev; 887 TREE_SCRATCH(_tree_node_t, MAX_DEPTH) queue; 888 assert(stree); 889 890 g_cur_stree = stree; 891 892 for (i = 0; i < NUM_BEV; i++) { 893 if (!((1L << i) & events)) 894 continue; 895 896 if (!(bev = stree->bevs[i])) 897 continue; 898 899 EVENT_STACK_CLR(&g_evstack); 900 tree_node_t **ptr = g_evstack.sp; 901 TREE_DFS_FOREACH_SELECTIVE(walk, bev, &queue, invk, proceed) { 902 //PRINT_EV(walk, link); 903 //PRINT_EV(walk, invk); 904 905 g_cur_ev = walk; 906 907 if (walk != bev && walk->ft && 908 walk->ft(mctx, sock, side, walk->ev, &walk->arg) == false) { 909 ptr = g_evstack.sp; 910 proceed = false; 911 continue; 912 } 913 914 if (walk->cb) 915 walk->cb(mctx, sock, side, walk->ev, &walk->arg); 916 917 while (ptr < g_evstack.sp) { 918 ptr++; 919 TREE_INSERT_CHILD(walk, *ptr, invk); 920 } 921 } 922 g_cur_ev = NULL; 923 924 while ((ev = EVENT_STACK_POP(&g_evstack))) { 925 ev->is_in_raiseq = false; 926 TREE_DETACH(ev, invk); 927 } 928 929 if (!(events &= ~(1L << i))) { 930 g_cur_stree = NULL; 931 return; 932 } 933 } 934 935 g_cur_stree = NULL; 936 } 937 /*----------------------------------------------------------------------------*/ 938 int 939 mtcp_register_callback(mctx_t mctx, int sockid, event_t event, 940 int hook_point, callback_t callback) 941 { 942 socket_map_t socket; 943 stree_t **pstree; 944 event_t root_event = event; 945 tree_node_t *ev_node; 946 947 /* (1) check if event variable is NULL 948 * (2) any event within the built-in event range should be 2^N 949 */ 950 if (event == MOS_NULL_EVENT || 951 (event < (1L << NUM_BEV) && !POWER_OF_TWO(event))) 952 return -1; 953 954 /* for UDEs, retrieve the root built-in event */ 955 if (root_event >= (1L << NUM_BEV)) { 956 if ((ev_node = dforest_search(event)) == NULL) 957 return -1; 958 root_event = (ev_node)->link.tn_parent->ev; 959 /* the root built-in event should not be NULL */ 960 if (root_event == MOS_NULL_EVENT) 961 return -1; 962 } 963 964 /* check if there is any invalid event-hook mapping */ 965 if (hook_point == MOS_NULL) { 966 if (root_event & (MOS_ON_CONN_START | MOS_ON_REXMIT | 967 MOS_ON_TCP_STATE_CHANGE | MOS_ON_CONN_END)) { 968 errno = EINVAL; 969 return -1; 970 } 971 } else if (hook_point == MOS_HK_RCV || hook_point == MOS_HK_SND) { 972 if (root_event & (MOS_ON_CONN_NEW_DATA | MOS_ON_ORPHAN | MOS_ON_ERROR)) { 973 errno = EINVAL; 974 return -1; 975 } 976 } else { 977 /* invalid hook point */ 978 errno = EINVAL; 979 return -1; 980 } 981 982 mtcp_manager_t mtcp = GetMTCPManager(mctx); 983 if (!mtcp) 984 return -1; 985 986 socket = &mtcp->msmap[sockid]; 987 988 if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) { 989 if (hook_point == MOS_NULL) 990 pstree = &socket->monitor_stream->stree_dontcare; 991 else if (hook_point == MOS_HK_RCV) 992 pstree = &socket->monitor_stream->stree_pre_rcv; 993 else if (hook_point == MOS_HK_SND) 994 pstree = &socket->monitor_stream->stree_post_snd; 995 else 996 return -1; 997 998 } else if (socket->socktype == MOS_SOCK_MONITOR_STREAM 999 || socket->socktype == MOS_SOCK_MONITOR_RAW) { 1000 if (hook_point == MOS_NULL) 1001 pstree = &socket->monitor_listener->stree_dontcare; 1002 else if (hook_point == MOS_HK_RCV) 1003 pstree = &socket->monitor_listener->stree_pre_rcv; 1004 else if (hook_point == MOS_HK_SND) 1005 pstree = &socket->monitor_listener->stree_post_snd; 1006 else 1007 return -1; 1008 } else { 1009 errno = EINVAL; 1010 return -1; 1011 } 1012 1013 return ModCb(mtcp->ev_store, pstree, event, callback); 1014 } 1015 /*----------------------------------------------------------------------------*/ 1016 int 1017 mtcp_unregister_callback(mctx_t mctx, int sockid, event_t event, 1018 int hook_point) 1019 { 1020 return mtcp_register_callback(mctx, sockid, event, hook_point, NULL); 1021 } 1022 /*----------------------------------------------------------------------------*/ 1023 inline void 1024 HandleCallback(mtcp_manager_t mtcp, uint32_t hook, 1025 socket_map_t socket, int side, struct pkt_ctx *pctx, event_t events) 1026 { 1027 struct sfbpf_program fcode; 1028 1029 if (!socket) 1030 return; 1031 1032 if (!events) 1033 return; 1034 assert(events); 1035 1036 /* if client side monitoring is disabled, then skip */ 1037 if (side == MOS_SIDE_CLI && socket->monitor_stream->client_mon == 0) 1038 return; 1039 /* if server side monitoring is disabled, then skip */ 1040 else if (side == MOS_SIDE_SVR && socket->monitor_stream->server_mon == 0) 1041 return; 1042 1043 #define MSTRM(sock) (sock)->monitor_stream 1044 #define MLSNR(sock) (sock)->monitor_listener 1045 /* We use `?:` notation instead of `if/else` to make `evp` as const */ 1046 stree_t * const stree = 1047 (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) ? 1048 ((hook == MOS_HK_RCV) ? MSTRM(socket)->stree_pre_rcv : 1049 (hook == MOS_HK_SND) ? MSTRM(socket)->stree_post_snd : 1050 MSTRM(socket)->stree_dontcare) 1051 : (socket->socktype == MOS_SOCK_MONITOR_STREAM) 1052 || (socket->socktype == MOS_SOCK_MONITOR_RAW) ? 1053 ((hook == MOS_HK_RCV) ? MLSNR(socket)->stree_pre_rcv : 1054 (hook == MOS_HK_SND) ? MLSNR(socket)->stree_post_snd : 1055 MLSNR(socket)->stree_dontcare) : 1056 NULL; 1057 1058 if (!stree) 1059 return; 1060 1061 /* mtcp_bind_monitor_filter() 1062 - BPF filter is evaluated only for RAW socket and PASSIVE 1063 socket (= orphan filter) 1064 - stream syn filter is moved to and evaluated on socket creation 1065 */ 1066 if (socket->socktype == MOS_SOCK_MONITOR_STREAM) { 1067 fcode = MLSNR(socket)->stream_orphan_fcode; 1068 /* if not match with filter, return */ 1069 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode, 1070 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr), 1071 pctx->p.ip_len + sizeof(struct ethhdr)) == 0) 1072 return; 1073 } 1074 if (socket->socktype == MOS_SOCK_MONITOR_RAW) { 1075 fcode = MLSNR(socket)->raw_pkt_fcode; 1076 /* if not match with filter, return */ 1077 if (ISSET_BPFFILTER(fcode) && pctx && EVAL_BPFFILTER(fcode, 1078 (uint8_t *)pctx->p.iph - sizeof(struct ethhdr), 1079 pctx->p.ip_len + sizeof(struct ethhdr)) == 0) 1080 return; 1081 } 1082 1083 mctx_t const mctx = g_ctx[mtcp->ctx->cpu]; 1084 mtcp->pctx = pctx; /* for mtcp_getcurpkt() */ 1085 1086 HandleCb(mctx, socket->id, side, stree, events); 1087 } 1088 /*----------------------------------------------------------------------------*/ 1089 #endif 1090