1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 27 #include <rte_common.h> 28 #include <rte_byteorder.h> 29 #include <rte_log.h> 30 #include <rte_memory.h> 31 #include <rte_memcpy.h> 32 #include <rte_memzone.h> 33 #include <rte_config.h> 34 #include <rte_eal.h> 35 #include <rte_pci.h> 36 #include <rte_mbuf.h> 37 #include <rte_memory.h> 38 #include <rte_lcore.h> 39 #include <rte_launch.h> 40 #include <rte_ethdev.h> 41 #include <rte_debug.h> 42 #include <rte_common.h> 43 #include <rte_ether.h> 44 #include <rte_malloc.h> 45 #include <rte_cycles.h> 46 #include <rte_timer.h> 47 #include <rte_thash.h> 48 #include <rte_ip.h> 49 #include <rte_tcp.h> 50 #include <rte_udp.h> 51 52 #include "ff_dpdk_if.h" 53 #include "ff_dpdk_pcap.h" 54 #include "ff_dpdk_kni.h" 55 #include "ff_config.h" 56 #include "ff_veth.h" 57 #include "ff_host_interface.h" 58 #include "ff_msg.h" 59 #include "ff_api.h" 60 61 #define MEMPOOL_CACHE_SIZE 256 62 63 #define ARP_RING_SIZE 2048 64 65 #define MSG_RING_SIZE 32 66 67 /* 68 * Configurable number of RX/TX ring descriptors 69 */ 70 #define RX_QUEUE_SIZE 512 71 #define TX_QUEUE_SIZE 256 72 73 #define MAX_PKT_BURST 32 74 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 75 76 /* 77 * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. 78 */ 79 #define MAX_TX_BURST (MAX_PKT_BURST / 2) 80 81 #define NB_SOCKETS 8 82 83 /* Configure how many packets ahead to prefetch, when reading packets */ 84 #define PREFETCH_OFFSET 3 85 86 #define MAX_RX_QUEUE_PER_LCORE 16 87 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 88 #define MAX_RX_QUEUE_PER_PORT 128 89 90 #define BITS_PER_HEX 4 91 92 static int enable_kni; 93 static int kni_accept; 94 95 static struct rte_timer freebsd_clock; 96 97 // Mellanox Linux's driver key 98 static uint8_t default_rsskey_40bytes[40] = { 99 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b, 100 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb, 101 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c, 102 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9, 103 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc 104 }; 105 106 static struct rte_eth_conf default_port_conf = { 107 .rxmode = { 108 .mq_mode = ETH_MQ_RX_RSS, 109 .max_rx_pkt_len = ETHER_MAX_LEN, 110 .split_hdr_size = 0, /**< hdr buf size */ 111 .header_split = 0, /**< Header Split disabled */ 112 .hw_ip_checksum = 0, /**< IP checksum offload disabled */ 113 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 114 .hw_vlan_strip = 0, /**< VLAN strip disabled. */ 115 .hw_vlan_extend = 0, /**< Extended VLAN disabled. */ 116 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 117 .hw_strip_crc = 0, /**< CRC stripped by hardware */ 118 .enable_lro = 0, /**< LRO disabled */ 119 }, 120 .rx_adv_conf = { 121 .rss_conf = { 122 .rss_key = default_rsskey_40bytes, 123 .rss_key_len = 40, 124 .rss_hf = ETH_RSS_PROTO_MASK, 125 }, 126 }, 127 .txmode = { 128 .mq_mode = ETH_MQ_TX_NONE, 129 }, 130 }; 131 132 struct mbuf_table { 133 uint16_t len; 134 struct rte_mbuf *m_table[MAX_PKT_BURST]; 135 }; 136 137 struct lcore_rx_queue { 138 uint8_t port_id; 139 uint8_t queue_id; 140 } __rte_cache_aligned; 141 142 struct lcore_conf { 143 uint16_t proc_id; 144 uint16_t nb_procs; 145 uint16_t socket_id; 146 uint16_t nb_rx_queue; 147 uint16_t *lcore_proc; 148 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 149 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 150 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; 151 char *pcap[RTE_MAX_ETHPORTS]; 152 } __rte_cache_aligned; 153 154 static struct lcore_conf lcore_conf; 155 156 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 157 158 static struct rte_ring **arp_ring[RTE_MAX_LCORE]; 159 160 struct ff_msg_ring { 161 char ring_name[2][RTE_RING_NAMESIZE]; 162 /* ring[0] for lcore recv msg, other send */ 163 /* ring[1] for lcore send msg, other read */ 164 struct rte_ring *ring[2]; 165 } __rte_cache_aligned; 166 167 static struct ff_msg_ring msg_ring[RTE_MAX_LCORE]; 168 static struct rte_mempool *message_pool; 169 170 struct ff_dpdk_if_context { 171 void *sc; 172 void *ifp; 173 uint16_t port_id; 174 struct ff_hw_features hw_features; 175 } __rte_cache_aligned; 176 177 static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS]; 178 179 extern void ff_hardclock(void); 180 181 static void 182 freebsd_hardclock_job(__rte_unused struct rte_timer *timer, 183 __rte_unused void *arg) { 184 ff_hardclock(); 185 } 186 187 struct ff_dpdk_if_context * 188 ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg) 189 { 190 struct ff_dpdk_if_context *ctx; 191 192 ctx = calloc(1, sizeof(struct ff_dpdk_if_context)); 193 if (ctx == NULL) 194 return NULL; 195 196 ctx->sc = sc; 197 ctx->ifp = ifp; 198 ctx->port_id = cfg->port_id; 199 ctx->hw_features = cfg->hw_features; 200 201 return ctx; 202 } 203 204 void 205 ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx) 206 { 207 free(ctx); 208 } 209 210 static void 211 check_all_ports_link_status(void) 212 { 213 #define CHECK_INTERVAL 100 /* 100ms */ 214 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 215 216 uint8_t portid, count, all_ports_up, print_flag = 0; 217 struct rte_eth_link link; 218 219 printf("\nChecking link status"); 220 fflush(stdout); 221 222 int i, nb_ports; 223 nb_ports = ff_global_cfg.dpdk.nb_ports; 224 for (count = 0; count <= MAX_CHECK_TIME; count++) { 225 all_ports_up = 1; 226 for (i = 0; i < nb_ports; i++) { 227 uint8_t portid = ff_global_cfg.dpdk.port_cfgs[i].port_id; 228 memset(&link, 0, sizeof(link)); 229 rte_eth_link_get_nowait(portid, &link); 230 231 /* print link status if flag set */ 232 if (print_flag == 1) { 233 if (link.link_status) { 234 printf("Port %d Link Up - speed %u " 235 "Mbps - %s\n", (int)portid, 236 (unsigned)link.link_speed, 237 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 238 ("full-duplex") : ("half-duplex\n")); 239 } else { 240 printf("Port %d Link Down\n", (int)portid); 241 } 242 continue; 243 } 244 /* clear all_ports_up flag if any link down */ 245 if (link.link_status == 0) { 246 all_ports_up = 0; 247 break; 248 } 249 } 250 251 /* after finally printing all link status, get out */ 252 if (print_flag == 1) 253 break; 254 255 if (all_ports_up == 0) { 256 printf("."); 257 fflush(stdout); 258 rte_delay_ms(CHECK_INTERVAL); 259 } 260 261 /* set the print_flag if all ports up or timeout */ 262 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 263 print_flag = 1; 264 printf("done\n"); 265 } 266 } 267 } 268 269 static int 270 xdigit2val(unsigned char c) 271 { 272 int val; 273 274 if (isdigit(c)) 275 val = c - '0'; 276 else if (isupper(c)) 277 val = c - 'A' + 10; 278 else 279 val = c - 'a' + 10; 280 return val; 281 } 282 283 static int 284 parse_lcore_mask(const char *coremask, uint16_t *lcore_proc, 285 uint16_t nb_procs) 286 { 287 int i, j, idx = 0; 288 unsigned count = 0; 289 char c; 290 int val; 291 292 if (coremask == NULL) 293 return -1; 294 295 /* Remove all blank characters ahead and after. 296 * Remove 0x/0X if exists. 297 */ 298 while (isblank(*coremask)) 299 coremask++; 300 if (coremask[0] == '0' && ((coremask[1] == 'x') 301 || (coremask[1] == 'X'))) 302 coremask += 2; 303 304 i = strlen(coremask); 305 while ((i > 0) && isblank(coremask[i - 1])) 306 i--; 307 308 if (i == 0) 309 return -1; 310 311 for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE && count < nb_procs; i--) { 312 c = coremask[i]; 313 if (isxdigit(c) == 0) { 314 return -1; 315 } 316 val = xdigit2val(c); 317 for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE && count < nb_procs; 318 j++, idx++) { 319 if ((1 << j) & val) { 320 if (!lcore_config[idx].detected) { 321 RTE_LOG(ERR, EAL, "lcore %u unavailable\n", idx); 322 return -1; 323 } 324 lcore_proc[count] = idx; 325 count++; 326 } 327 } 328 } 329 330 for (; i >= 0; i--) 331 if (coremask[i] != '0') 332 return -1; 333 334 if (count < nb_procs) 335 return -1; 336 337 return 0; 338 } 339 340 static int 341 init_lcore_conf(void) 342 { 343 uint8_t nb_ports = rte_eth_dev_count(); 344 if (nb_ports == 0) { 345 rte_exit(EXIT_FAILURE, "No probed ethernet devices\n"); 346 } 347 348 lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id; 349 lcore_conf.nb_procs = ff_global_cfg.dpdk.nb_procs; 350 lcore_conf.lcore_proc = rte_zmalloc(NULL, 351 sizeof(uint16_t)*lcore_conf.nb_procs, 0); 352 if (lcore_conf.lcore_proc == NULL) { 353 rte_exit(EXIT_FAILURE, "rte_zmalloc lcore_proc failed\n"); 354 } 355 356 int ret = parse_lcore_mask(ff_global_cfg.dpdk.lcore_mask, 357 lcore_conf.lcore_proc, lcore_conf.nb_procs); 358 if (ret < 0) { 359 rte_exit(EXIT_FAILURE, "parse_lcore_mask failed:%s\n", 360 ff_global_cfg.dpdk.lcore_mask); 361 } 362 363 uint16_t socket_id = 0; 364 if (ff_global_cfg.dpdk.numa_on) { 365 socket_id = rte_lcore_to_socket_id(rte_lcore_id()); 366 } 367 368 lcore_conf.socket_id = socket_id; 369 370 /* Currently, proc id 1:1 map to rx/tx queue id per port. */ 371 uint8_t port_id, enabled_ports = 0; 372 for (port_id = 0; port_id < nb_ports; port_id++) { 373 if (ff_global_cfg.dpdk.port_mask && 374 (ff_global_cfg.dpdk.port_mask & (1 << port_id)) == 0) { 375 printf("\nSkipping disabled port %d\n", port_id); 376 continue; 377 } 378 379 if (port_id >= ff_global_cfg.dpdk.nb_ports) { 380 printf("\nSkipping non-configured port %d\n", port_id); 381 break; 382 } 383 384 uint16_t nb_rx_queue = lcore_conf.nb_rx_queue; 385 lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id; 386 lcore_conf.rx_queue_list[nb_rx_queue].queue_id = lcore_conf.proc_id; 387 lcore_conf.nb_rx_queue++; 388 389 lcore_conf.tx_queue_id[port_id] = lcore_conf.proc_id; 390 lcore_conf.pcap[port_id] = ff_global_cfg.dpdk.port_cfgs[enabled_ports].pcap; 391 392 ff_global_cfg.dpdk.port_cfgs[enabled_ports].port_id = port_id; 393 394 enabled_ports++; 395 } 396 397 ff_global_cfg.dpdk.nb_ports = enabled_ports; 398 399 return 0; 400 } 401 402 static int 403 init_mem_pool(void) 404 { 405 uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports; 406 uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs; 407 uint32_t nb_tx_queue = nb_lcores; 408 uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores; 409 410 unsigned nb_mbuf = RTE_MAX ( 411 (nb_rx_queue*RX_QUEUE_SIZE + 412 nb_ports*nb_lcores*MAX_PKT_BURST + 413 nb_ports*nb_tx_queue*TX_QUEUE_SIZE + 414 nb_lcores*MEMPOOL_CACHE_SIZE), 415 (unsigned)8192); 416 417 unsigned socketid = 0; 418 uint16_t i, lcore_id; 419 char s[64]; 420 int numa_on = ff_global_cfg.dpdk.numa_on; 421 422 for (i = 0; i < lcore_conf.nb_procs; i++) { 423 lcore_id = lcore_conf.lcore_proc[i]; 424 if (numa_on) { 425 socketid = rte_lcore_to_socket_id(lcore_id); 426 } 427 428 if (socketid >= NB_SOCKETS) { 429 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 430 socketid, i, NB_SOCKETS); 431 } 432 433 if (pktmbuf_pool[socketid] != NULL) { 434 continue; 435 } 436 437 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 438 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 439 pktmbuf_pool[socketid] = 440 rte_pktmbuf_pool_create(s, nb_mbuf, 441 MEMPOOL_CACHE_SIZE, 0, 442 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 443 } else { 444 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 445 pktmbuf_pool[socketid] = rte_mempool_lookup(s); 446 } 447 448 if (pktmbuf_pool[socketid] == NULL) { 449 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid); 450 } else { 451 printf("create mbuf pool on socket %d\n", socketid); 452 } 453 } 454 455 return 0; 456 } 457 458 static struct rte_ring * 459 create_ring(const char *name, unsigned count, int socket_id, unsigned flags) 460 { 461 struct rte_ring *ring; 462 463 if (name == NULL) 464 return NULL; 465 466 /* If already create, just attached it */ 467 if (likely((ring = rte_ring_lookup(name)) != NULL)) 468 return ring; 469 470 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 471 return rte_ring_create(name, count, socket_id, flags); 472 } else { 473 return rte_ring_lookup(name); 474 } 475 } 476 477 static int 478 init_arp_ring(void) 479 { 480 int i, j, ret; 481 char name_buf[RTE_RING_NAMESIZE]; 482 int nb_procs = ff_global_cfg.dpdk.nb_procs; 483 int proc_id = ff_global_cfg.dpdk.proc_id; 484 485 /* Allocate arp ring ptr according to eth dev count. */ 486 int nb_ports = rte_eth_dev_count(); 487 for(i = 0; i < nb_procs; ++i) { 488 snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_%d_%d", 489 proc_id, i); 490 491 arp_ring[i] = rte_zmalloc(name_buf, 492 sizeof(struct rte_ring *) * nb_ports, 493 RTE_CACHE_LINE_SIZE); 494 if (arp_ring[i] == NULL) { 495 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 496 "failed\n", name_buf); 497 } 498 } 499 500 unsigned socketid = lcore_conf.socket_id; 501 502 /* Create ring according to ports actually being used. */ 503 nb_ports = ff_global_cfg.dpdk.nb_ports; 504 for (j = 0; j < nb_ports; j++) { 505 uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[j].port_id; 506 507 for(i = 0; i < nb_procs; ++i) { 508 snprintf(name_buf, RTE_RING_NAMESIZE, "arp_ring_%d_%d", i, port_id); 509 arp_ring[i][port_id] = create_ring(name_buf, ARP_RING_SIZE, 510 socketid, RING_F_SC_DEQ); 511 512 if (arp_ring[i][port_id] == NULL) 513 rte_panic("create ring:%s failed!\n", name_buf); 514 515 printf("create ring:%s success, %u ring entries are now free!\n", 516 name_buf, rte_ring_free_count(arp_ring[i][port_id])); 517 } 518 } 519 520 return 0; 521 } 522 523 static void 524 ff_msg_init(struct rte_mempool *mp, 525 __attribute__((unused)) void *opaque_arg, 526 void *obj, __attribute__((unused)) unsigned i) 527 { 528 struct ff_msg *msg = (struct ff_msg *)obj; 529 msg->buf_addr = (char *)msg + sizeof(struct ff_msg); 530 msg->buf_len = mp->elt_size - sizeof(struct ff_msg); 531 } 532 533 static int 534 init_msg_ring(void) 535 { 536 uint16_t i; 537 uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; 538 unsigned socketid = lcore_conf.socket_id; 539 540 /* Create message buffer pool */ 541 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 542 message_pool = rte_mempool_create(FF_MSG_POOL, 543 MSG_RING_SIZE * 2 * nb_procs, 544 MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0, 545 NULL, NULL, ff_msg_init, NULL, 546 socketid, 0); 547 } else { 548 message_pool = rte_mempool_lookup(FF_MSG_POOL); 549 } 550 551 if (message_pool == NULL) { 552 rte_panic("Create msg mempool failed\n"); 553 } 554 555 for(i = 0; i < nb_procs; ++i) { 556 snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE, 557 "%s%u", FF_MSG_RING_IN, i); 558 snprintf(msg_ring[i].ring_name[1], RTE_RING_NAMESIZE, 559 "%s%u", FF_MSG_RING_OUT, i); 560 561 msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0], 562 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 563 if (msg_ring[i].ring[0] == NULL) 564 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); 565 566 msg_ring[i].ring[1] = create_ring(msg_ring[i].ring_name[1], 567 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 568 if (msg_ring[i].ring[1] == NULL) 569 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); 570 } 571 572 return 0; 573 } 574 575 static int 576 init_kni(void) 577 { 578 int nb_ports = rte_eth_dev_count(); 579 kni_accept = 0; 580 if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0) 581 kni_accept = 1; 582 583 ff_kni_init(nb_ports, ff_global_cfg.kni.tcp_port, 584 ff_global_cfg.kni.udp_port); 585 586 unsigned socket_id = lcore_conf.socket_id; 587 struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id]; 588 589 nb_ports = ff_global_cfg.dpdk.nb_ports; 590 int i, ret; 591 for (i = 0; i < nb_ports; i++) { 592 uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id; 593 ff_kni_alloc(port_id, socket_id, mbuf_pool); 594 } 595 596 return 0; 597 } 598 599 static int 600 init_port_start(void) 601 { 602 int nb_ports = ff_global_cfg.dpdk.nb_ports; 603 uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; 604 unsigned socketid = rte_lcore_to_socket_id(rte_lcore_id()); 605 struct rte_mempool *mbuf_pool = pktmbuf_pool[socketid]; 606 uint16_t i; 607 608 for (i = 0; i < nb_ports; i++) { 609 uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id; 610 611 struct rte_eth_dev_info dev_info; 612 rte_eth_dev_info_get(port_id, &dev_info); 613 614 if (nb_procs > dev_info.max_rx_queues) { 615 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n", 616 nb_procs, 617 dev_info.max_rx_queues); 618 } 619 620 if (nb_procs > dev_info.max_tx_queues) { 621 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n", 622 nb_procs, 623 dev_info.max_tx_queues); 624 } 625 626 struct ether_addr addr; 627 rte_eth_macaddr_get(port_id, &addr); 628 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 629 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 630 (unsigned)port_id, 631 addr.addr_bytes[0], addr.addr_bytes[1], 632 addr.addr_bytes[2], addr.addr_bytes[3], 633 addr.addr_bytes[4], addr.addr_bytes[5]); 634 635 rte_memcpy(ff_global_cfg.dpdk.port_cfgs[i].mac, 636 addr.addr_bytes, ETHER_ADDR_LEN); 637 638 /* Clear txq_flags - we do not need multi-mempool and refcnt */ 639 dev_info.default_txconf.txq_flags = ETH_TXQ_FLAGS_NOMULTMEMP | 640 ETH_TXQ_FLAGS_NOREFCOUNT; 641 642 /* Disable features that are not supported by port's HW */ 643 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) { 644 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMUDP; 645 } 646 647 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) { 648 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMTCP; 649 } 650 651 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)) { 652 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP; 653 } 654 655 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)) { 656 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOVLANOFFL; 657 } 658 659 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)) { 660 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOVLANOFFL; 661 } 662 663 if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) && 664 !(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)) { 665 dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; 666 } 667 668 struct rte_eth_conf port_conf = {0}; 669 670 /* Set RSS mode */ 671 port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; 672 port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_PROTO_MASK; 673 port_conf.rx_adv_conf.rss_conf.rss_key = default_rsskey_40bytes; 674 port_conf.rx_adv_conf.rss_conf.rss_key_len = 40; 675 676 /* Set Rx VLAN stripping */ 677 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) { 678 port_conf.rxmode.hw_vlan_strip = 1; 679 } 680 681 /* Enable HW CRC stripping */ 682 port_conf.rxmode.hw_strip_crc = 1; 683 684 /* FIXME: Enable TCP LRO ?*/ 685 #if 0 686 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) { 687 printf("LRO is supported\n"); 688 port_conf.rxmode.enable_lro = 1; 689 ff_global_cfg.dpdk.port_cfgs[i].hw_features.rx_lro = 1; 690 } 691 #endif 692 693 /* Set Rx checksum checking */ 694 if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) && 695 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) && 696 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)) { 697 printf("RX checksum offload supported\n"); 698 port_conf.rxmode.hw_ip_checksum = 1; 699 ff_global_cfg.dpdk.port_cfgs[i].hw_features.rx_csum = 1; 700 } 701 702 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) { 703 printf("TX ip checksum offload supported\n"); 704 ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_csum_ip = 1; 705 } 706 707 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) && 708 (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) { 709 printf("TX TCP&UDP checksum offload supported\n"); 710 ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_csum_l4 = 1; 711 } 712 713 if (ff_global_cfg.dpdk.tso) { 714 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) { 715 printf("TSO is supported\n"); 716 ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_tso = 1; 717 } 718 } else { 719 printf("TSO is disabled\n"); 720 } 721 722 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 723 return 0; 724 } 725 726 /* Currently, proc id 1:1 map to queue id per port. */ 727 int ret = rte_eth_dev_configure(port_id, nb_procs, nb_procs, &port_conf); 728 if (ret != 0) { 729 return ret; 730 } 731 732 uint16_t q; 733 for (q = 0; q < nb_procs; q++) { 734 ret = rte_eth_tx_queue_setup(port_id, q, TX_QUEUE_SIZE, 735 socketid, &dev_info.default_txconf); 736 if (ret < 0) { 737 return ret; 738 } 739 740 ret = rte_eth_rx_queue_setup(port_id, q, RX_QUEUE_SIZE, 741 socketid, &dev_info.default_rxconf, mbuf_pool); 742 if (ret < 0) { 743 return ret; 744 } 745 } 746 747 ret = rte_eth_dev_start(port_id); 748 if (ret < 0) { 749 return ret; 750 } 751 752 /* Enable RX in promiscuous mode for the Ethernet device. */ 753 if (ff_global_cfg.dpdk.promiscuous) { 754 rte_eth_promiscuous_enable(port_id); 755 ret = rte_eth_promiscuous_get(port_id); 756 if (ret == 1) { 757 printf("set port %u to promiscuous mode ok\n", port_id); 758 } else { 759 printf("set port %u to promiscuous mode error\n", port_id); 760 } 761 } 762 763 /* Enable pcap dump */ 764 if (ff_global_cfg.dpdk.port_cfgs[i].pcap) { 765 ff_enable_pcap(ff_global_cfg.dpdk.port_cfgs[i].pcap); 766 } 767 } 768 769 return 0; 770 } 771 772 static int 773 init_freebsd_clock(void) 774 { 775 rte_timer_subsystem_init(); 776 uint64_t hz = rte_get_timer_hz(); 777 uint64_t intrs = MS_PER_S/ff_global_cfg.freebsd.hz; 778 uint64_t tsc = (hz + MS_PER_S - 1) / MS_PER_S*intrs; 779 780 rte_timer_init(&freebsd_clock); 781 rte_timer_reset(&freebsd_clock, tsc, PERIODICAL, 782 rte_lcore_id(), &freebsd_hardclock_job, NULL); 783 784 return 0; 785 } 786 787 int 788 ff_dpdk_init(int argc, char **argv) 789 { 790 if (ff_global_cfg.dpdk.nb_procs < 1 || 791 ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE || 792 ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs || 793 ff_global_cfg.dpdk.proc_id < 0) { 794 printf("param num_procs[%d] or proc_id[%d] error!\n", 795 ff_global_cfg.dpdk.nb_procs, 796 ff_global_cfg.dpdk.proc_id); 797 exit(1); 798 } 799 800 int ret = rte_eal_init(argc, argv); 801 if (ret < 0) { 802 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 803 } 804 805 init_lcore_conf(); 806 807 init_mem_pool(); 808 809 init_arp_ring(); 810 811 init_msg_ring(); 812 813 enable_kni = ff_global_cfg.kni.enable; 814 if (enable_kni) { 815 init_kni(); 816 } 817 818 ret = init_port_start(); 819 if (ret < 0) { 820 rte_exit(EXIT_FAILURE, "init_port_start failed\n"); 821 } 822 823 check_all_ports_link_status(); 824 825 init_freebsd_clock(); 826 827 return 0; 828 } 829 830 static void 831 ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt) 832 { 833 uint8_t rx_csum = ctx->hw_features.rx_csum; 834 if (rx_csum) { 835 if (pkt->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) { 836 return; 837 } 838 } 839 840 /* 841 * FIXME: should we save pkt->vlan_tci 842 * if (pkt->ol_flags & PKT_RX_VLAN_PKT) 843 */ 844 845 void *data = rte_pktmbuf_mtod(pkt, void*); 846 uint16_t len = rte_pktmbuf_data_len(pkt); 847 848 void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum); 849 if (hdr == NULL) { 850 rte_pktmbuf_free(pkt); 851 return; 852 } 853 854 pkt = pkt->next; 855 void *prev = hdr; 856 while(pkt != NULL) { 857 data = rte_pktmbuf_mtod(pkt, void*); 858 len = rte_pktmbuf_data_len(pkt); 859 860 void *mb = ff_mbuf_get(prev, data, len); 861 if (mb == NULL) { 862 ff_mbuf_free(hdr); 863 return; 864 } 865 pkt = pkt->next; 866 prev = mb; 867 } 868 869 ff_veth_process_packet(ctx->ifp, hdr); 870 } 871 872 static enum FilterReturn 873 protocol_filter(const void *data, uint16_t len) 874 { 875 if(len < sizeof(struct ether_hdr)) 876 return FILTER_UNKNOWN; 877 878 const struct ether_hdr *hdr; 879 hdr = (const struct ether_hdr *)data; 880 881 if(ntohs(hdr->ether_type) == ETHER_TYPE_ARP) 882 return FILTER_ARP; 883 884 if (!enable_kni) { 885 return FILTER_UNKNOWN; 886 } 887 888 if(ntohs(hdr->ether_type) != ETHER_TYPE_IPv4) 889 return FILTER_UNKNOWN; 890 891 return ff_kni_proto_filter(data + sizeof(struct ether_hdr), 892 len - sizeof(struct ether_hdr)); 893 } 894 895 static inline void 896 process_packets(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **bufs, 897 uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring) 898 { 899 struct lcore_conf *qconf = &lcore_conf; 900 901 uint16_t i; 902 for (i = 0; i < count; i++) { 903 struct rte_mbuf *rtem = bufs[i]; 904 905 if (unlikely(qconf->pcap[port_id] != NULL)) { 906 ff_dump_packets(qconf->pcap[port_id], rtem); 907 } 908 909 void *data = rte_pktmbuf_mtod(rtem, void*); 910 uint16_t len = rte_pktmbuf_data_len(rtem); 911 912 enum FilterReturn filter = protocol_filter(data, len); 913 if (filter == FILTER_ARP) { 914 struct rte_mempool *mbuf_pool; 915 struct rte_mbuf *mbuf_clone; 916 if (pkts_from_ring == 0) { 917 uint16_t i; 918 for(i = 0; i < qconf->nb_procs; ++i) { 919 if(i == queue_id) 920 continue; 921 922 mbuf_pool = pktmbuf_pool[rte_lcore_to_socket_id(qconf->lcore_proc[i])]; 923 mbuf_clone = rte_pktmbuf_clone(rtem, mbuf_pool); 924 if(mbuf_clone) { 925 int ret = rte_ring_enqueue(arp_ring[i][port_id], mbuf_clone); 926 if (ret < 0) 927 rte_pktmbuf_free(mbuf_clone); 928 } 929 } 930 } 931 932 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 933 mbuf_pool = pktmbuf_pool[qconf->socket_id]; 934 mbuf_clone = rte_pktmbuf_clone(rtem, mbuf_pool); 935 if(mbuf_clone) { 936 ff_kni_enqueue(port_id, rtem); 937 } 938 } 939 940 ff_veth_input(ctx, rtem); 941 } else if (enable_kni && ((filter == FILTER_KNI && kni_accept) || 942 (filter == FILTER_UNKNOWN && !kni_accept)) ) { 943 ff_kni_enqueue(port_id, rtem); 944 } else { 945 ff_veth_input(ctx, rtem); 946 } 947 } 948 } 949 950 static inline int 951 process_arp_ring(uint8_t port_id, uint16_t queue_id, 952 struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx) 953 { 954 /* read packet from ring buf and to process */ 955 uint16_t nb_rb; 956 nb_rb = rte_ring_dequeue_burst(arp_ring[queue_id][port_id], 957 (void **)pkts_burst, MAX_PKT_BURST); 958 959 if(nb_rb > 0) { 960 process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1); 961 } 962 963 return 0; 964 } 965 966 static inline void 967 handle_sysctl_msg(struct ff_msg *msg, uint16_t proc_id) 968 { 969 int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen, 970 msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new, 971 msg->sysctl.newlen); 972 973 if (ret < 0) { 974 msg->result = errno; 975 } else { 976 msg->result = 0; 977 } 978 979 rte_ring_enqueue(msg_ring[proc_id].ring[1], msg); 980 } 981 982 static inline void 983 handle_default_msg(struct ff_msg *msg, uint16_t proc_id) 984 { 985 msg->result = EINVAL; 986 rte_ring_enqueue(msg_ring[proc_id].ring[1], msg); 987 } 988 989 static inline void 990 handle_msg(struct ff_msg *msg, uint16_t proc_id) 991 { 992 switch (msg->msg_type) { 993 case FF_SYSCTL: 994 handle_sysctl_msg(msg, proc_id); 995 break; 996 default: 997 handle_default_msg(msg, proc_id); 998 break; 999 } 1000 } 1001 1002 static inline int 1003 process_msg_ring(uint16_t proc_id) 1004 { 1005 void *msg; 1006 int ret = rte_ring_dequeue(msg_ring[proc_id].ring[0], &msg); 1007 1008 if (unlikely(ret == 0)) { 1009 handle_msg((struct ff_msg *)msg, proc_id); 1010 } 1011 1012 return 0; 1013 } 1014 1015 /* Send burst of packets on an output interface */ 1016 static inline int 1017 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) 1018 { 1019 struct rte_mbuf **m_table; 1020 int ret; 1021 uint16_t queueid; 1022 1023 queueid = qconf->tx_queue_id[port]; 1024 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 1025 1026 if (unlikely(qconf->pcap[port] != NULL)) { 1027 uint16_t i; 1028 for (i = 0; i < n; i++) { 1029 ff_dump_packets(qconf->pcap[port], m_table[i]); 1030 } 1031 } 1032 1033 ret = rte_eth_tx_burst(port, queueid, m_table, n); 1034 if (unlikely(ret < n)) { 1035 do { 1036 rte_pktmbuf_free(m_table[ret]); 1037 } while (++ret < n); 1038 } 1039 1040 return 0; 1041 } 1042 1043 /* Enqueue a single packet, and send burst if queue is filled */ 1044 static inline int 1045 send_single_packet(struct rte_mbuf *m, uint8_t port) 1046 { 1047 uint16_t len; 1048 struct lcore_conf *qconf; 1049 1050 qconf = &lcore_conf; 1051 len = qconf->tx_mbufs[port].len; 1052 qconf->tx_mbufs[port].m_table[len] = m; 1053 len++; 1054 1055 /* enough pkts to be sent */ 1056 if (unlikely(len == MAX_PKT_BURST)) { 1057 send_burst(qconf, MAX_PKT_BURST, port); 1058 len = 0; 1059 } 1060 1061 qconf->tx_mbufs[port].len = len; 1062 return 0; 1063 } 1064 1065 int 1066 ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m, 1067 int total) 1068 { 1069 struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id]; 1070 struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool); 1071 if (head == NULL) { 1072 ff_mbuf_free(m); 1073 return -1; 1074 } 1075 1076 head->pkt_len = total; 1077 head->nb_segs = 0; 1078 1079 int off = 0; 1080 struct rte_mbuf *cur = head, *prev = NULL; 1081 while(total > 0) { 1082 if (cur == NULL) { 1083 cur = rte_pktmbuf_alloc(mbuf_pool); 1084 if (cur == NULL) { 1085 rte_pktmbuf_free(head); 1086 ff_mbuf_free(m); 1087 return -1; 1088 } 1089 } 1090 1091 void *data = rte_pktmbuf_mtod(cur, void*); 1092 int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total; 1093 int ret = ff_mbuf_copydata(m, data, off, len); 1094 if (ret < 0) { 1095 rte_pktmbuf_free(head); 1096 ff_mbuf_free(m); 1097 return -1; 1098 } 1099 1100 if (prev != NULL) { 1101 prev->next = cur; 1102 } 1103 prev = cur; 1104 1105 cur->data_len = len; 1106 off += len; 1107 total -= len; 1108 head->nb_segs++; 1109 cur = NULL; 1110 } 1111 1112 struct ff_tx_offload offload = {0}; 1113 ff_mbuf_tx_offload(m, &offload); 1114 1115 if (offload.ip_csum) { 1116 head->ol_flags |= PKT_TX_IP_CKSUM; 1117 head->l2_len = sizeof(struct ether_hdr); 1118 head->l3_len = sizeof(struct ipv4_hdr); 1119 } 1120 1121 if (ctx->hw_features.tx_csum_l4) { 1122 if (offload.tcp_csum) { 1123 head->ol_flags |= PKT_TX_TCP_CKSUM; 1124 head->l2_len = sizeof(struct ether_hdr); 1125 head->l3_len = sizeof(struct ipv4_hdr); 1126 } 1127 1128 if (offload.tso_seg_size) { 1129 head->ol_flags |= PKT_TX_TCP_SEG; 1130 head->l4_len = sizeof(struct tcp_hdr); 1131 head->tso_segsz = offload.tso_seg_size; 1132 } 1133 1134 if (offload.udp_csum) { 1135 head->ol_flags |= PKT_TX_UDP_CKSUM; 1136 head->l2_len = sizeof(struct ether_hdr); 1137 head->l3_len = sizeof(struct ipv4_hdr); 1138 } 1139 } 1140 1141 ff_mbuf_free(m); 1142 1143 return send_single_packet(head, ctx->port_id); 1144 } 1145 1146 static int 1147 main_loop(void *arg) 1148 { 1149 struct loop_routine *lr = (struct loop_routine *)arg; 1150 1151 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1152 unsigned lcore_id; 1153 uint64_t prev_tsc, diff_tsc, cur_tsc; 1154 int i, j, nb_rx; 1155 uint8_t port_id, queue_id; 1156 struct lcore_conf *qconf; 1157 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1158 US_PER_S * BURST_TX_DRAIN_US; 1159 struct ff_dpdk_if_context *ctx; 1160 1161 prev_tsc = 0; 1162 1163 lcore_id = rte_lcore_id(); 1164 qconf = &lcore_conf; 1165 1166 if (qconf->nb_rx_queue == 0) { 1167 printf("lcore %u has nothing to do\n", lcore_id); 1168 return 0; 1169 } 1170 1171 while (1) { 1172 cur_tsc = rte_rdtsc(); 1173 if (unlikely(freebsd_clock.expire < cur_tsc)) { 1174 rte_timer_manage(); 1175 } 1176 1177 /* 1178 * TX burst queue drain 1179 */ 1180 diff_tsc = cur_tsc - prev_tsc; 1181 if (unlikely(diff_tsc > drain_tsc)) { 1182 /* 1183 * This could be optimized (use queueid instead of 1184 * portid), but it is not called so often 1185 */ 1186 for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { 1187 if (qconf->tx_mbufs[port_id].len == 0) 1188 continue; 1189 send_burst(qconf, 1190 qconf->tx_mbufs[port_id].len, 1191 port_id); 1192 qconf->tx_mbufs[port_id].len = 0; 1193 } 1194 1195 prev_tsc = cur_tsc; 1196 } 1197 1198 /* 1199 * Read packet from RX queues 1200 */ 1201 for (i = 0; i < qconf->nb_rx_queue; ++i) { 1202 port_id = qconf->rx_queue_list[i].port_id; 1203 queue_id = qconf->rx_queue_list[i].queue_id; 1204 ctx = veth_ctx[port_id]; 1205 1206 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1207 ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST); 1208 } 1209 1210 process_arp_ring(port_id, queue_id, pkts_burst, ctx); 1211 1212 nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst, 1213 MAX_PKT_BURST); 1214 if (nb_rx == 0) 1215 continue; 1216 1217 /* Prefetch first packets */ 1218 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1219 rte_prefetch0(rte_pktmbuf_mtod( 1220 pkts_burst[j], void *)); 1221 } 1222 1223 /* Prefetch and handle already prefetched packets */ 1224 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1225 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1226 j + PREFETCH_OFFSET], void *)); 1227 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1228 } 1229 1230 /* Handle remaining prefetched packets */ 1231 for (; j < nb_rx; j++) { 1232 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1233 } 1234 } 1235 1236 process_msg_ring(qconf->proc_id); 1237 1238 if (likely(lr->loop != NULL)) { 1239 lr->loop(lr->arg); 1240 } 1241 } 1242 } 1243 1244 int 1245 ff_dpdk_if_up(void) { 1246 int nb_ports = ff_global_cfg.dpdk.nb_ports; 1247 int i; 1248 for (i = 0; i < nb_ports; i++) { 1249 uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id; 1250 veth_ctx[port_id] = ff_veth_attach(ff_global_cfg.dpdk.port_cfgs + i); 1251 if (veth_ctx[port_id] == NULL) { 1252 rte_exit(EXIT_FAILURE, "ff_veth_attach failed"); 1253 } 1254 } 1255 1256 return 0; 1257 } 1258 1259 void 1260 ff_dpdk_run(loop_func_t loop, void *arg) { 1261 struct loop_routine *lr = malloc(sizeof(struct loop_routine)); 1262 lr->loop = loop; 1263 lr->arg = arg; 1264 rte_eal_mp_remote_launch(main_loop, lr, CALL_MASTER); 1265 rte_eal_mp_wait_lcore(); 1266 free(lr); 1267 } 1268 1269 void 1270 ff_dpdk_pktmbuf_free(void *m) 1271 { 1272 rte_pktmbuf_free((struct rte_mbuf *)m); 1273 } 1274 1275 static uint32_t 1276 toeplitz_hash(unsigned keylen, const uint8_t *key, 1277 unsigned datalen, const uint8_t *data) 1278 { 1279 uint32_t hash = 0, v; 1280 u_int i, b; 1281 1282 /* XXXRW: Perhaps an assertion about key length vs. data length? */ 1283 1284 v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3]; 1285 for (i = 0; i < datalen; i++) { 1286 for (b = 0; b < 8; b++) { 1287 if (data[i] & (1<<(7-b))) 1288 hash ^= v; 1289 v <<= 1; 1290 if ((i + 4) < keylen && 1291 (key[i+4] & (1<<(7-b)))) 1292 v |= 1; 1293 } 1294 } 1295 return (hash); 1296 } 1297 1298 int 1299 ff_rss_check(uint32_t saddr, uint32_t daddr, uint16_t sport, uint16_t dport) 1300 { 1301 struct lcore_conf *qconf = &lcore_conf; 1302 1303 if (qconf->nb_procs == 1) { 1304 return 1; 1305 } 1306 1307 uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) + 1308 sizeof(dport)]; 1309 1310 unsigned datalen = 0; 1311 1312 bcopy(&saddr, &data[datalen], sizeof(saddr)); 1313 datalen += sizeof(saddr); 1314 1315 bcopy(&daddr, &data[datalen], sizeof(daddr)); 1316 datalen += sizeof(daddr); 1317 1318 bcopy(&sport, &data[datalen], sizeof(sport)); 1319 datalen += sizeof(sport); 1320 1321 bcopy(&dport, &data[datalen], sizeof(dport)); 1322 datalen += sizeof(dport); 1323 1324 uint32_t hash = toeplitz_hash(sizeof(default_rsskey_40bytes), default_rsskey_40bytes, datalen, data); 1325 1326 return (hash % qconf->nb_procs) == qconf->proc_id; 1327 } 1328 1329 1330