1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 #include <assert.h> 27 #include <unistd.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 31 #include <rte_common.h> 32 #include <rte_byteorder.h> 33 #include <rte_log.h> 34 #include <rte_memory.h> 35 #include <rte_memcpy.h> 36 #include <rte_memzone.h> 37 #include <rte_config.h> 38 #include <rte_eal.h> 39 #include <rte_pci.h> 40 #include <rte_mbuf.h> 41 #include <rte_memory.h> 42 #include <rte_lcore.h> 43 #include <rte_launch.h> 44 #include <rte_ethdev.h> 45 #include <rte_debug.h> 46 #include <rte_common.h> 47 #include <rte_ether.h> 48 #include <rte_malloc.h> 49 #include <rte_cycles.h> 50 #include <rte_timer.h> 51 #include <rte_thash.h> 52 #include <rte_ip.h> 53 #include <rte_tcp.h> 54 #include <rte_udp.h> 55 #include <rte_eth_bond.h> 56 57 #include "ff_dpdk_if.h" 58 #include "ff_dpdk_pcap.h" 59 #include "ff_dpdk_kni.h" 60 #include "ff_config.h" 61 #include "ff_veth.h" 62 #include "ff_host_interface.h" 63 #include "ff_msg.h" 64 #include "ff_api.h" 65 #include "ff_memory.h" 66 67 #ifdef FF_KNI 68 #define KNI_MBUF_MAX 2048 69 #define KNI_QUEUE_SIZE 2048 70 71 int enable_kni; 72 static int kni_accept; 73 static int knictl_action = FF_KNICTL_ACTION_DEFAULT; 74 #endif 75 76 static int numa_on; 77 78 static unsigned idle_sleep; 79 static unsigned pkt_tx_delay; 80 81 static struct rte_timer freebsd_clock; 82 83 // Mellanox Linux's driver key 84 static uint8_t default_rsskey_40bytes[40] = { 85 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b, 86 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb, 87 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c, 88 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9, 89 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc 90 }; 91 92 static uint8_t default_rsskey_52bytes[52] = { 93 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 94 0xb6, 0x75, 0xea, 0x5b, 0x12, 0x4f, 0x9f, 0x30, 95 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 96 0xa0, 0x8c, 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 97 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8, 0x55, 98 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 99 0x81, 0x15, 0x03, 0x66 100 }; 101 102 static uint8_t symmetric_rsskey[52] = { 103 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 104 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 105 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 106 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 107 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 108 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 109 0x6d, 0x5a, 0x6d, 0x5a 110 }; 111 112 static int rsskey_len = sizeof(default_rsskey_40bytes); 113 static uint8_t *rsskey = default_rsskey_40bytes; 114 115 struct lcore_conf lcore_conf; 116 117 struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 118 119 static pcblddr_func_t pcblddr_fun; 120 121 static struct rte_ring **dispatch_ring[RTE_MAX_ETHPORTS]; 122 static dispatch_func_t packet_dispatcher; 123 124 static uint16_t rss_reta_size[RTE_MAX_ETHPORTS]; 125 126 #define BOND_DRIVER_NAME "net_bonding" 127 128 static inline int send_single_packet(struct rte_mbuf *m, uint8_t port); 129 130 struct ff_msg_ring { 131 char ring_name[FF_MSG_NUM][RTE_RING_NAMESIZE]; 132 /* ring[0] for lcore recv msg, other send */ 133 /* ring[1] for lcore send msg, other read */ 134 struct rte_ring *ring[FF_MSG_NUM]; 135 } __rte_cache_aligned; 136 137 static struct ff_msg_ring msg_ring[RTE_MAX_LCORE]; 138 static struct rte_mempool *message_pool; 139 static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS]; 140 141 static struct ff_top_args ff_top_status; 142 static struct ff_traffic_args ff_traffic; 143 extern void ff_hardclock(void); 144 145 static void 146 ff_hardclock_job(__rte_unused struct rte_timer *timer, 147 __rte_unused void *arg) { 148 ff_hardclock(); 149 ff_update_current_ts(); 150 } 151 152 struct ff_dpdk_if_context * 153 ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg) 154 { 155 struct ff_dpdk_if_context *ctx; 156 157 ctx = calloc(1, sizeof(struct ff_dpdk_if_context)); 158 if (ctx == NULL) 159 return NULL; 160 161 ctx->sc = sc; 162 ctx->ifp = ifp; 163 ctx->port_id = cfg->port_id; 164 ctx->hw_features = cfg->hw_features; 165 166 return ctx; 167 } 168 169 void 170 ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx) 171 { 172 free(ctx); 173 } 174 175 static void 176 check_all_ports_link_status(void) 177 { 178 #define CHECK_INTERVAL 100 /* 100ms */ 179 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 180 181 uint16_t portid; 182 uint8_t count, all_ports_up, print_flag = 0; 183 struct rte_eth_link link; 184 185 printf("\nChecking link status"); 186 fflush(stdout); 187 188 int i, nb_ports; 189 nb_ports = ff_global_cfg.dpdk.nb_ports; 190 for (count = 0; count <= MAX_CHECK_TIME; count++) { 191 all_ports_up = 1; 192 for (i = 0; i < nb_ports; i++) { 193 uint16_t portid = ff_global_cfg.dpdk.portid_list[i]; 194 memset(&link, 0, sizeof(link)); 195 rte_eth_link_get_nowait(portid, &link); 196 197 /* print link status if flag set */ 198 if (print_flag == 1) { 199 if (link.link_status) { 200 printf("Port %d Link Up - speed %u " 201 "Mbps - %s\n", (int)portid, 202 (unsigned)link.link_speed, 203 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 204 ("full-duplex") : ("half-duplex\n")); 205 } else { 206 printf("Port %d Link Down\n", (int)portid); 207 } 208 continue; 209 } 210 /* clear all_ports_up flag if any link down */ 211 if (link.link_status == 0) { 212 all_ports_up = 0; 213 break; 214 } 215 } 216 217 /* after finally printing all link status, get out */ 218 if (print_flag == 1) 219 break; 220 221 if (all_ports_up == 0) { 222 printf("."); 223 fflush(stdout); 224 rte_delay_ms(CHECK_INTERVAL); 225 } 226 227 /* set the print_flag if all ports up or timeout */ 228 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 229 print_flag = 1; 230 printf("done\n"); 231 } 232 } 233 } 234 235 static int 236 init_lcore_conf(void) 237 { 238 uint8_t nb_dev_ports = rte_eth_dev_count_avail(); 239 if (nb_dev_ports == 0) { 240 rte_exit(EXIT_FAILURE, "No probed ethernet devices\n"); 241 } 242 243 if (ff_global_cfg.dpdk.max_portid >= nb_dev_ports) { 244 rte_exit(EXIT_FAILURE, "this machine doesn't have port %d.\n", 245 ff_global_cfg.dpdk.max_portid); 246 } 247 248 lcore_conf.port_cfgs = ff_global_cfg.dpdk.port_cfgs; 249 lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id; 250 251 uint16_t socket_id = 0; 252 if (numa_on) { 253 socket_id = rte_lcore_to_socket_id(rte_lcore_id()); 254 } 255 256 lcore_conf.socket_id = socket_id; 257 258 uint16_t lcore_id = ff_global_cfg.dpdk.proc_lcore[lcore_conf.proc_id]; 259 if (!rte_lcore_is_enabled(lcore_id)) { 260 rte_exit(EXIT_FAILURE, "lcore %u unavailable\n", lcore_id); 261 } 262 263 int j; 264 for (j = 0; j < ff_global_cfg.dpdk.nb_ports; ++j) { 265 uint16_t port_id = ff_global_cfg.dpdk.portid_list[j]; 266 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 267 268 int queueid = -1; 269 int i; 270 for (i = 0; i < pconf->nb_lcores; i++) { 271 if (pconf->lcore_list[i] == lcore_id) { 272 queueid = i; 273 } 274 } 275 if (queueid < 0) { 276 continue; 277 } 278 printf("lcore: %u, port: %u, queue: %u\n", lcore_id, port_id, queueid); 279 uint16_t nb_rx_queue = lcore_conf.nb_rx_queue; 280 lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id; 281 lcore_conf.rx_queue_list[nb_rx_queue].queue_id = queueid; 282 lcore_conf.nb_rx_queue++; 283 284 lcore_conf.tx_queue_id[port_id] = queueid; 285 lcore_conf.tx_port_id[lcore_conf.nb_tx_port] = port_id; 286 lcore_conf.nb_tx_port++; 287 288 /* Enable pcap dump */ 289 if (ff_global_cfg.pcap.enable) { 290 ff_enable_pcap(ff_global_cfg.pcap.save_path, ff_global_cfg.pcap.snap_len); 291 } 292 293 lcore_conf.nb_queue_list[port_id] = pconf->nb_lcores; 294 } 295 296 if (lcore_conf.nb_rx_queue == 0) { 297 rte_exit(EXIT_FAILURE, "lcore %u has nothing to do\n", lcore_id); 298 } 299 300 return 0; 301 } 302 303 static int 304 init_mem_pool(void) 305 { 306 uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports; 307 uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs; 308 uint32_t nb_tx_queue = nb_lcores; 309 uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores; 310 uint16_t max_portid = ff_global_cfg.dpdk.max_portid; 311 312 unsigned nb_mbuf = RTE_ALIGN_CEIL ( 313 (nb_rx_queue * (max_portid + 1) * 2 * RX_QUEUE_SIZE + 314 nb_ports * (max_portid + 1) * 2 * nb_lcores * MAX_PKT_BURST + 315 nb_ports * (max_portid + 1) * 2 * nb_tx_queue * TX_QUEUE_SIZE + 316 nb_lcores * MEMPOOL_CACHE_SIZE + 317 #ifdef FF_KNI 318 nb_ports * KNI_MBUF_MAX + 319 nb_ports * KNI_QUEUE_SIZE + 320 #endif 321 nb_lcores * nb_ports * DISPATCH_RING_SIZE), 322 (unsigned)8192); 323 324 unsigned socketid = 0; 325 uint16_t i, lcore_id; 326 char s[64]; 327 328 for (i = 0; i < ff_global_cfg.dpdk.nb_procs; i++) { 329 lcore_id = ff_global_cfg.dpdk.proc_lcore[i]; 330 if (numa_on) { 331 socketid = rte_lcore_to_socket_id(lcore_id); 332 } 333 334 if (socketid >= NB_SOCKETS) { 335 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 336 socketid, i, NB_SOCKETS); 337 } 338 339 if (pktmbuf_pool[socketid] != NULL) { 340 continue; 341 } 342 343 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 344 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 345 pktmbuf_pool[socketid] = 346 rte_pktmbuf_pool_create(s, nb_mbuf, 347 MEMPOOL_CACHE_SIZE, 0, 348 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 349 } else { 350 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 351 pktmbuf_pool[socketid] = rte_mempool_lookup(s); 352 } 353 354 if (pktmbuf_pool[socketid] == NULL) { 355 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid); 356 } else { 357 printf("create mbuf pool on socket %d\n", socketid); 358 } 359 360 #ifdef FF_USE_PAGE_ARRAY 361 nb_mbuf = RTE_ALIGN_CEIL ( 362 nb_ports*nb_lcores*MAX_PKT_BURST + 363 nb_ports*nb_tx_queue*TX_QUEUE_SIZE + 364 nb_lcores*MEMPOOL_CACHE_SIZE, 365 (unsigned)4096); 366 ff_init_ref_pool(nb_mbuf, socketid); 367 #endif 368 } 369 370 return 0; 371 } 372 373 static struct rte_ring * 374 create_ring(const char *name, unsigned count, int socket_id, unsigned flags) 375 { 376 struct rte_ring *ring; 377 378 if (name == NULL) { 379 rte_exit(EXIT_FAILURE, "create ring failed, no name!\n"); 380 } 381 382 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 383 ring = rte_ring_create(name, count, socket_id, flags); 384 } else { 385 ring = rte_ring_lookup(name); 386 } 387 388 if (ring == NULL) { 389 rte_exit(EXIT_FAILURE, "create ring:%s failed!\n", name); 390 } 391 392 return ring; 393 } 394 395 static int 396 init_dispatch_ring(void) 397 { 398 int j; 399 char name_buf[RTE_RING_NAMESIZE]; 400 int queueid; 401 402 unsigned socketid = lcore_conf.socket_id; 403 404 /* Create ring according to ports actually being used. */ 405 int nb_ports = ff_global_cfg.dpdk.nb_ports; 406 for (j = 0; j < nb_ports; j++) { 407 uint16_t portid = ff_global_cfg.dpdk.portid_list[j]; 408 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[portid]; 409 int nb_queues = pconf->nb_lcores; 410 if (dispatch_ring[portid] == NULL) { 411 snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_p%d", portid); 412 413 dispatch_ring[portid] = rte_zmalloc(name_buf, 414 sizeof(struct rte_ring *) * nb_queues, 415 RTE_CACHE_LINE_SIZE); 416 if (dispatch_ring[portid] == NULL) { 417 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 418 "failed\n", name_buf); 419 } 420 } 421 422 for(queueid = 0; queueid < nb_queues; ++queueid) { 423 snprintf(name_buf, RTE_RING_NAMESIZE, "dispatch_ring_p%d_q%d", 424 portid, queueid); 425 dispatch_ring[portid][queueid] = create_ring(name_buf, 426 DISPATCH_RING_SIZE, socketid, RING_F_SC_DEQ); 427 428 if (dispatch_ring[portid][queueid] == NULL) 429 rte_panic("create ring:%s failed!\n", name_buf); 430 431 printf("create ring:%s success, %u ring entries are now free!\n", 432 name_buf, rte_ring_free_count(dispatch_ring[portid][queueid])); 433 } 434 } 435 436 return 0; 437 } 438 439 static void 440 ff_msg_init(struct rte_mempool *mp, 441 __attribute__((unused)) void *opaque_arg, 442 void *obj, __attribute__((unused)) unsigned i) 443 { 444 struct ff_msg *msg = (struct ff_msg *)obj; 445 msg->msg_type = FF_UNKNOWN; 446 msg->buf_addr = (char *)msg + sizeof(struct ff_msg); 447 msg->buf_len = mp->elt_size - sizeof(struct ff_msg); 448 msg->original_buf = NULL; 449 msg->original_buf_len = 0; 450 } 451 452 static int 453 init_msg_ring(void) 454 { 455 uint16_t i, j; 456 uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; 457 unsigned socketid = lcore_conf.socket_id; 458 459 /* Create message buffer pool */ 460 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 461 message_pool = rte_mempool_create(FF_MSG_POOL, 462 MSG_RING_SIZE * 2 * nb_procs, 463 MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0, 464 NULL, NULL, ff_msg_init, NULL, 465 socketid, 0); 466 } else { 467 message_pool = rte_mempool_lookup(FF_MSG_POOL); 468 } 469 470 if (message_pool == NULL) { 471 rte_panic("Create msg mempool failed\n"); 472 } 473 474 for(i = 0; i < nb_procs; ++i) { 475 snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE, 476 "%s%u", FF_MSG_RING_IN, i); 477 msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0], 478 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 479 if (msg_ring[i].ring[0] == NULL) 480 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); 481 482 for (j = FF_SYSCTL; j < FF_MSG_NUM; j++) { 483 snprintf(msg_ring[i].ring_name[j], RTE_RING_NAMESIZE, 484 "%s%u_%u", FF_MSG_RING_OUT, i, j); 485 msg_ring[i].ring[j] = create_ring(msg_ring[i].ring_name[j], 486 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 487 if (msg_ring[i].ring[j] == NULL) 488 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[j]); 489 } 490 } 491 492 return 0; 493 } 494 495 #ifdef FF_KNI 496 497 static enum FF_KNICTL_CMD get_kni_action(const char *c){ 498 if (!c) 499 return FF_KNICTL_ACTION_DEFAULT; 500 if (0 == strcasecmp(c, "alltokni")){ 501 return FF_KNICTL_ACTION_ALL_TO_KNI; 502 } else if (0 == strcasecmp(c, "alltoff")){ 503 return FF_KNICTL_ACTION_ALL_TO_FF; 504 } else if (0 == strcasecmp(c, "default")){ 505 return FF_KNICTL_ACTION_DEFAULT; 506 } else { 507 return FF_KNICTL_ACTION_DEFAULT; 508 } 509 } 510 511 static int 512 init_kni(void) 513 { 514 int nb_ports = rte_eth_dev_count_avail(); 515 kni_accept = 0; 516 if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0) 517 kni_accept = 1; 518 519 knictl_action = get_kni_action(ff_global_cfg.kni.kni_action); 520 521 ff_kni_init(nb_ports, ff_global_cfg.kni.tcp_port, 522 ff_global_cfg.kni.udp_port); 523 524 unsigned socket_id = lcore_conf.socket_id; 525 struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id]; 526 527 nb_ports = ff_global_cfg.dpdk.nb_ports; 528 int i, ret; 529 for (i = 0; i < nb_ports; i++) { 530 uint16_t port_id = ff_global_cfg.dpdk.portid_list[i]; 531 ff_kni_alloc(port_id, socket_id, mbuf_pool, KNI_QUEUE_SIZE); 532 } 533 534 return 0; 535 } 536 #endif 537 538 //RSS reta update will failed when enable flow isolate 539 #ifndef FF_FLOW_ISOLATE 540 static void 541 set_rss_table(uint16_t port_id, uint16_t reta_size, uint16_t nb_queues) 542 { 543 if (reta_size == 0) { 544 return; 545 } 546 547 int reta_conf_size = RTE_MAX(1, reta_size / RTE_RETA_GROUP_SIZE); 548 struct rte_eth_rss_reta_entry64 reta_conf[reta_conf_size]; 549 550 /* config HW indirection table */ 551 unsigned i, j, hash=0; 552 for (i = 0; i < reta_conf_size; i++) { 553 reta_conf[i].mask = ~0ULL; 554 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) { 555 reta_conf[i].reta[j] = hash++ % nb_queues; 556 } 557 } 558 559 if (rte_eth_dev_rss_reta_update(port_id, reta_conf, reta_size)) { 560 rte_exit(EXIT_FAILURE, "port[%d], failed to update rss table\n", 561 port_id); 562 } 563 } 564 #endif 565 566 static int 567 init_port_start(void) 568 { 569 int nb_ports = ff_global_cfg.dpdk.nb_ports; 570 unsigned socketid = 0; 571 struct rte_mempool *mbuf_pool; 572 uint16_t i, j; 573 574 for (i = 0; i < nb_ports; i++) { 575 uint16_t port_id, u_port_id = ff_global_cfg.dpdk.portid_list[i]; 576 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[u_port_id]; 577 uint16_t nb_queues = pconf->nb_lcores; 578 579 for (j=0; j<=pconf->nb_slaves; j++) { 580 if (j < pconf->nb_slaves) { 581 port_id = pconf->slave_portid_list[j]; 582 printf("To init %s's %d'st slave port[%d]\n", 583 ff_global_cfg.dpdk.bond_cfgs->name, 584 j, port_id); 585 } else { 586 port_id = u_port_id; 587 } 588 589 struct rte_eth_dev_info dev_info; 590 struct rte_eth_conf port_conf = {0}; 591 struct rte_eth_rxconf rxq_conf; 592 struct rte_eth_txconf txq_conf; 593 594 int ret = rte_eth_dev_info_get(port_id, &dev_info); 595 if (ret != 0) 596 rte_exit(EXIT_FAILURE, 597 "Error during getting device (port %u) info: %s\n", 598 port_id, strerror(-ret)); 599 600 if (nb_queues > dev_info.max_rx_queues) { 601 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n", 602 nb_queues, 603 dev_info.max_rx_queues); 604 } 605 606 if (nb_queues > dev_info.max_tx_queues) { 607 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n", 608 nb_queues, 609 dev_info.max_tx_queues); 610 } 611 612 struct rte_ether_addr addr; 613 rte_eth_macaddr_get(port_id, &addr); 614 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 615 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 616 (unsigned)port_id, 617 addr.addr_bytes[0], addr.addr_bytes[1], 618 addr.addr_bytes[2], addr.addr_bytes[3], 619 addr.addr_bytes[4], addr.addr_bytes[5]); 620 621 rte_memcpy(pconf->mac, 622 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 623 624 /* Set RSS mode */ 625 uint64_t default_rss_hf = ETH_RSS_PROTO_MASK; 626 port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; 627 port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf; 628 if (dev_info.hash_key_size == 52) { 629 rsskey = default_rsskey_52bytes; 630 rsskey_len = 52; 631 } 632 if (ff_global_cfg.dpdk.symmetric_rss) { 633 printf("Use symmetric Receive-side Scaling(RSS) key\n"); 634 rsskey = symmetric_rsskey; 635 } 636 port_conf.rx_adv_conf.rss_conf.rss_key = rsskey; 637 port_conf.rx_adv_conf.rss_conf.rss_key_len = rsskey_len; 638 port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; 639 if (port_conf.rx_adv_conf.rss_conf.rss_hf != 640 ETH_RSS_PROTO_MASK) { 641 printf("Port %u modified RSS hash function based on hardware support," 642 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 643 port_id, default_rss_hf, 644 port_conf.rx_adv_conf.rss_conf.rss_hf); 645 } 646 647 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) { 648 port_conf.txmode.offloads |= 649 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 650 } 651 652 /* Set Rx VLAN stripping */ 653 if (ff_global_cfg.dpdk.vlan_strip) { 654 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) { 655 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 656 } 657 } 658 659 /* Enable HW CRC stripping */ 660 port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC; 661 662 /* FIXME: Enable TCP LRO ?*/ 663 #if 0 664 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) { 665 printf("LRO is supported\n"); 666 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_LRO; 667 pconf->hw_features.rx_lro = 1; 668 } 669 #endif 670 671 /* Set Rx checksum checking */ 672 if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) && 673 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) && 674 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)) { 675 printf("RX checksum offload supported\n"); 676 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM; 677 pconf->hw_features.rx_csum = 1; 678 } 679 680 if (ff_global_cfg.dpdk.tx_csum_offoad_skip == 0) { 681 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) { 682 printf("TX ip checksum offload supported\n"); 683 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; 684 pconf->hw_features.tx_csum_ip = 1; 685 } 686 687 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) && 688 (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) { 689 printf("TX TCP&UDP checksum offload supported\n"); 690 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM; 691 pconf->hw_features.tx_csum_l4 = 1; 692 } 693 } else { 694 printf("TX checksum offoad is disabled\n"); 695 } 696 697 if (ff_global_cfg.dpdk.tso) { 698 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) { 699 printf("TSO is supported\n"); 700 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; 701 pconf->hw_features.tx_tso = 1; 702 } 703 } else { 704 printf("TSO is disabled\n"); 705 } 706 707 if (dev_info.reta_size) { 708 /* reta size must be power of 2 */ 709 assert((dev_info.reta_size & (dev_info.reta_size - 1)) == 0); 710 711 rss_reta_size[port_id] = dev_info.reta_size; 712 printf("port[%d]: rss table size: %d\n", port_id, 713 dev_info.reta_size); 714 } 715 716 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 717 continue; 718 } 719 720 ret = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf); 721 if (ret != 0) { 722 return ret; 723 } 724 725 static uint16_t nb_rxd = RX_QUEUE_SIZE; 726 static uint16_t nb_txd = TX_QUEUE_SIZE; 727 ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); 728 if (ret < 0) 729 printf("Could not adjust number of descriptors " 730 "for port%u (%d)\n", (unsigned)port_id, ret); 731 732 uint16_t q; 733 for (q = 0; q < nb_queues; q++) { 734 if (numa_on) { 735 uint16_t lcore_id = lcore_conf.port_cfgs[port_id].lcore_list[q]; 736 socketid = rte_lcore_to_socket_id(lcore_id); 737 } 738 mbuf_pool = pktmbuf_pool[socketid]; 739 740 txq_conf = dev_info.default_txconf; 741 txq_conf.offloads = port_conf.txmode.offloads; 742 ret = rte_eth_tx_queue_setup(port_id, q, nb_txd, 743 socketid, &txq_conf); 744 if (ret < 0) { 745 return ret; 746 } 747 748 rxq_conf = dev_info.default_rxconf; 749 rxq_conf.offloads = port_conf.rxmode.offloads; 750 ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd, 751 socketid, &rxq_conf, mbuf_pool); 752 if (ret < 0) { 753 return ret; 754 } 755 } 756 757 758 if (strncmp(dev_info.driver_name, BOND_DRIVER_NAME, 759 strlen(dev_info.driver_name)) == 0) { 760 761 rte_eth_macaddr_get(port_id, &addr); 762 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 763 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 764 (unsigned)port_id, 765 addr.addr_bytes[0], addr.addr_bytes[1], 766 addr.addr_bytes[2], addr.addr_bytes[3], 767 addr.addr_bytes[4], addr.addr_bytes[5]); 768 769 rte_memcpy(pconf->mac, 770 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 771 772 int mode, count, x; 773 uint16_t slaves[RTE_MAX_ETHPORTS], len = RTE_MAX_ETHPORTS; 774 775 mode = rte_eth_bond_mode_get(port_id); 776 printf("Port %u, bond mode:%d\n", port_id, mode); 777 778 count = rte_eth_bond_slaves_get(port_id, slaves, len); 779 printf("Port %u, %s's slave ports count:%d\n", port_id, 780 ff_global_cfg.dpdk.bond_cfgs->name, count); 781 for (x=0; x<count; x++) { 782 printf("Port %u, %s's slave port[%u]\n", port_id, 783 ff_global_cfg.dpdk.bond_cfgs->name, slaves[x]); 784 } 785 } 786 787 ret = rte_eth_dev_start(port_id); 788 if (ret < 0) { 789 return ret; 790 } 791 //RSS reta update will failed when enable flow isolate 792 #ifndef FF_FLOW_ISOLATE 793 if (nb_queues > 1) { 794 /* set HW rss hash function to Toeplitz. */ 795 if (!rte_eth_dev_filter_supported(port_id, RTE_ETH_FILTER_HASH)) { 796 struct rte_eth_hash_filter_info info = {0}; 797 info.info_type = RTE_ETH_HASH_FILTER_GLOBAL_CONFIG; 798 info.info.global_conf.hash_func = RTE_ETH_HASH_FUNCTION_TOEPLITZ; 799 800 if (rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_HASH, 801 RTE_ETH_FILTER_SET, &info) < 0) { 802 rte_exit(EXIT_FAILURE, "port[%d] set hash func failed\n", 803 port_id); 804 } 805 } 806 807 set_rss_table(port_id, dev_info.reta_size, nb_queues); 808 } 809 #endif 810 811 /* Enable RX in promiscuous mode for the Ethernet device. */ 812 if (ff_global_cfg.dpdk.promiscuous) { 813 ret = rte_eth_promiscuous_enable(port_id); 814 if (ret == 0) { 815 printf("set port %u to promiscuous mode ok\n", port_id); 816 } else { 817 printf("set port %u to promiscuous mode error\n", port_id); 818 } 819 } 820 } 821 } 822 823 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 824 check_all_ports_link_status(); 825 } 826 827 return 0; 828 } 829 830 static int 831 init_clock(void) 832 { 833 rte_timer_subsystem_init(); 834 uint64_t hz = rte_get_timer_hz(); 835 uint64_t intrs = MS_PER_S/ff_global_cfg.freebsd.hz; 836 uint64_t tsc = (hz + MS_PER_S - 1) / MS_PER_S*intrs; 837 838 rte_timer_init(&freebsd_clock); 839 rte_timer_reset(&freebsd_clock, tsc, PERIODICAL, 840 rte_lcore_id(), &ff_hardclock_job, NULL); 841 842 ff_update_current_ts(); 843 844 return 0; 845 } 846 847 #ifdef FF_FLOW_ISOLATE 848 /** Print a message out of a flow error. */ 849 static int 850 port_flow_complain(struct rte_flow_error *error) 851 { 852 static const char *const errstrlist[] = { 853 [RTE_FLOW_ERROR_TYPE_NONE] = "no error", 854 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", 855 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", 856 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", 857 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", 858 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", 859 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", 860 [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field", 861 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", 862 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", 863 [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification", 864 [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range", 865 [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask", 866 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", 867 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", 868 [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration", 869 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", 870 }; 871 const char *errstr; 872 char buf[32]; 873 int err = rte_errno; 874 875 if ((unsigned int)error->type >= RTE_DIM(errstrlist) || 876 !errstrlist[error->type]) 877 errstr = "unknown type"; 878 else 879 errstr = errstrlist[error->type]; 880 printf("Caught error type %d (%s): %s%s: %s\n", 881 error->type, errstr, 882 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", 883 error->cause), buf) : "", 884 error->message ? error->message : "(no stated reason)", 885 rte_strerror(err)); 886 return -err; 887 } 888 889 static int 890 port_flow_isolate(uint16_t port_id, int set) 891 { 892 struct rte_flow_error error; 893 894 /* Poisoning to make sure PMDs update it in case of error. */ 895 memset(&error, 0x66, sizeof(error)); 896 if (rte_flow_isolate(port_id, set, &error)) 897 return port_flow_complain(&error); 898 printf("Ingress traffic on port %u is %s to the defined flow rules\n", 899 port_id, 900 set ? "now restricted" : "not restricted anymore"); 901 return 0; 902 } 903 904 static int 905 create_tcp_flow(uint16_t port_id, uint16_t tcp_port) { 906 struct rte_flow_attr attr = {.ingress = 1}; 907 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 908 int nb_queues = pconf->nb_lcores; 909 uint16_t queue[RTE_MAX_QUEUES_PER_PORT]; 910 int i = 0, j = 0; 911 for (i = 0, j = 0; i < nb_queues; ++i) 912 queue[j++] = i; 913 struct rte_flow_action_rss rss = { 914 .types = ETH_RSS_NONFRAG_IPV4_TCP, 915 .key_len = rsskey_len, 916 .key = rsskey, 917 .queue_num = j, 918 .queue = queue, 919 }; 920 921 struct rte_eth_dev_info dev_info; 922 int ret = rte_eth_dev_info_get(port_id, &dev_info); 923 if (ret != 0) 924 rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n", port_id, strerror(-ret)); 925 926 struct rte_flow_item pattern[3]; 927 struct rte_flow_action action[2]; 928 struct rte_flow_item_tcp tcp_spec; 929 struct rte_flow_item_tcp tcp_mask = { 930 .hdr = { 931 .src_port = RTE_BE16(0x0000), 932 .dst_port = RTE_BE16(0xffff), 933 }, 934 }; 935 struct rte_flow_error error; 936 937 memset(pattern, 0, sizeof(pattern)); 938 memset(action, 0, sizeof(action)); 939 940 /* set the dst ipv4 packet to the required value */ 941 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 942 943 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 944 tcp_spec.hdr.dst_port = rte_cpu_to_be_16(tcp_port); 945 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 946 pattern[1].spec = &tcp_spec; 947 pattern[1].mask = &tcp_mask; 948 949 /* end the pattern array */ 950 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 951 952 /* create the action */ 953 action[0].type = RTE_FLOW_ACTION_TYPE_RSS; 954 action[0].conf = &rss; 955 action[1].type = RTE_FLOW_ACTION_TYPE_END; 956 957 struct rte_flow *flow; 958 /* validate and create the flow rule */ 959 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 960 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 961 if (!flow) { 962 return port_flow_complain(&error); 963 } 964 } 965 966 memset(pattern, 0, sizeof(pattern)); 967 968 /* set the dst ipv4 packet to the required value */ 969 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 970 971 struct rte_flow_item_tcp tcp_src_mask = { 972 .hdr = { 973 .src_port = RTE_BE16(0xffff), 974 .dst_port = RTE_BE16(0x0000), 975 }, 976 }; 977 978 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 979 tcp_spec.hdr.src_port = rte_cpu_to_be_16(tcp_port); 980 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 981 pattern[1].spec = &tcp_spec; 982 pattern[1].mask = &tcp_src_mask; 983 984 /* end the pattern array */ 985 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 986 987 /* validate and create the flow rule */ 988 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 989 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 990 if (!flow) { 991 return port_flow_complain(&error); 992 } 993 } 994 995 return 1; 996 } 997 998 static int 999 init_flow(uint16_t port_id, uint16_t tcp_port) { 1000 // struct ff_flow_cfg fcfg = ff_global_cfg.dpdk.flow_cfgs[0]; 1001 1002 // int i; 1003 // for (i = 0; i < fcfg.nb_port; i++) { 1004 // if(!create_tcp_flow(fcfg.port_id, fcfg.tcp_ports[i])) { 1005 // return 0; 1006 // } 1007 // } 1008 1009 if(!create_tcp_flow(port_id, tcp_port)) { 1010 rte_exit(EXIT_FAILURE, "create tcp flow failed\n"); 1011 return -1; 1012 } 1013 1014 /* ARP rule */ 1015 struct rte_flow_attr attr = {.ingress = 1}; 1016 struct rte_flow_action_queue queue = {.index = 0}; 1017 1018 struct rte_flow_item pattern_[2]; 1019 struct rte_flow_action action[2]; 1020 struct rte_flow_item_eth eth_type = {.type = RTE_BE16(0x0806)}; 1021 struct rte_flow_item_eth eth_mask = { 1022 .type = RTE_BE16(0xffff) 1023 }; 1024 1025 memset(pattern_, 0, sizeof(pattern_)); 1026 memset(action, 0, sizeof(action)); 1027 1028 pattern_[0].type = RTE_FLOW_ITEM_TYPE_ETH; 1029 pattern_[0].spec = ð_type; 1030 pattern_[0].mask = ð_mask; 1031 1032 pattern_[1].type = RTE_FLOW_ITEM_TYPE_END; 1033 1034 /* create the action */ 1035 action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; 1036 action[0].conf = &queue; 1037 action[1].type = RTE_FLOW_ACTION_TYPE_END; 1038 1039 struct rte_flow *flow; 1040 struct rte_flow_error error; 1041 /* validate and create the flow rule */ 1042 if (!rte_flow_validate(port_id, &attr, pattern_, action, &error)) { 1043 flow = rte_flow_create(port_id, &attr, pattern_, action, &error); 1044 if (!flow) { 1045 return port_flow_complain(&error); 1046 } 1047 } 1048 1049 return 1; 1050 } 1051 1052 #endif 1053 1054 int 1055 ff_dpdk_init(int argc, char **argv) 1056 { 1057 if (ff_global_cfg.dpdk.nb_procs < 1 || 1058 ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE || 1059 ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs || 1060 ff_global_cfg.dpdk.proc_id < 0) { 1061 printf("param num_procs[%d] or proc_id[%d] error!\n", 1062 ff_global_cfg.dpdk.nb_procs, 1063 ff_global_cfg.dpdk.proc_id); 1064 exit(1); 1065 } 1066 1067 int ret = rte_eal_init(argc, argv); 1068 if (ret < 0) { 1069 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1070 } 1071 1072 numa_on = ff_global_cfg.dpdk.numa_on; 1073 1074 idle_sleep = ff_global_cfg.dpdk.idle_sleep; 1075 pkt_tx_delay = ff_global_cfg.dpdk.pkt_tx_delay > BURST_TX_DRAIN_US ? \ 1076 BURST_TX_DRAIN_US : ff_global_cfg.dpdk.pkt_tx_delay; 1077 1078 init_lcore_conf(); 1079 1080 init_mem_pool(); 1081 1082 init_dispatch_ring(); 1083 1084 init_msg_ring(); 1085 1086 #ifdef FF_KNI 1087 enable_kni = ff_global_cfg.kni.enable; 1088 if (enable_kni) { 1089 init_kni(); 1090 } 1091 #endif 1092 1093 #ifdef FF_USE_PAGE_ARRAY 1094 ff_mmap_init(); 1095 #endif 1096 1097 #ifdef FF_FLOW_ISOLATE 1098 // run once in primary process 1099 if (0 == lcore_conf.tx_queue_id[0]){ 1100 ret = port_flow_isolate(0, 1); 1101 if (ret < 0) 1102 rte_exit(EXIT_FAILURE, "init_port_isolate failed\n"); 1103 } 1104 #endif 1105 1106 ret = init_port_start(); 1107 if (ret < 0) { 1108 rte_exit(EXIT_FAILURE, "init_port_start failed\n"); 1109 } 1110 1111 init_clock(); 1112 #ifdef FF_FLOW_ISOLATE 1113 //Only give a example usage: port_id=0, tcp_port= 80. 1114 //Recommend: 1115 //1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id). 1116 //2. using config options `tcp_port` replace magic number of 80 1117 ret = init_flow(0, 80); 1118 if (ret < 0) { 1119 rte_exit(EXIT_FAILURE, "init_port_flow failed\n"); 1120 } 1121 #endif 1122 return 0; 1123 } 1124 1125 static void 1126 ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt) 1127 { 1128 uint8_t rx_csum = ctx->hw_features.rx_csum; 1129 if (rx_csum) { 1130 if (pkt->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) { 1131 rte_pktmbuf_free(pkt); 1132 return; 1133 } 1134 } 1135 1136 void *data = rte_pktmbuf_mtod(pkt, void*); 1137 uint16_t len = rte_pktmbuf_data_len(pkt); 1138 1139 void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum); 1140 if (hdr == NULL) { 1141 rte_pktmbuf_free(pkt); 1142 return; 1143 } 1144 1145 if (pkt->ol_flags & PKT_RX_VLAN_STRIPPED) { 1146 ff_mbuf_set_vlan_info(hdr, pkt->vlan_tci); 1147 } 1148 1149 struct rte_mbuf *pn = pkt->next; 1150 void *prev = hdr; 1151 while(pn != NULL) { 1152 data = rte_pktmbuf_mtod(pn, void*); 1153 len = rte_pktmbuf_data_len(pn); 1154 1155 void *mb = ff_mbuf_get(prev, pn, data, len); 1156 if (mb == NULL) { 1157 ff_mbuf_free(hdr); 1158 rte_pktmbuf_free(pkt); 1159 return; 1160 } 1161 pn = pn->next; 1162 prev = mb; 1163 } 1164 1165 ff_veth_process_packet(ctx->ifp, hdr); 1166 } 1167 1168 static enum FilterReturn 1169 protocol_filter(const void *data, uint16_t len) 1170 { 1171 if(len < RTE_ETHER_ADDR_LEN) 1172 return FILTER_UNKNOWN; 1173 1174 const struct rte_ether_hdr *hdr; 1175 const struct rte_vlan_hdr *vlanhdr; 1176 hdr = (const struct rte_ether_hdr *)data; 1177 uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); 1178 data += RTE_ETHER_HDR_LEN; 1179 len -= RTE_ETHER_HDR_LEN; 1180 1181 if (ether_type == RTE_ETHER_TYPE_VLAN) { 1182 vlanhdr = (struct rte_vlan_hdr *)data; 1183 ether_type = rte_be_to_cpu_16(vlanhdr->eth_proto); 1184 data += sizeof(struct rte_vlan_hdr); 1185 len -= sizeof(struct rte_vlan_hdr); 1186 } 1187 1188 if(ether_type == RTE_ETHER_TYPE_ARP) 1189 return FILTER_ARP; 1190 1191 #ifdef INET6 1192 if (ether_type == RTE_ETHER_TYPE_IPV6) { 1193 return ff_kni_proto_filter(data, 1194 len, ether_type); 1195 } 1196 #endif 1197 1198 #ifndef FF_KNI 1199 return FILTER_UNKNOWN; 1200 #else 1201 if (!enable_kni) { 1202 return FILTER_UNKNOWN; 1203 } 1204 1205 if(ether_type != RTE_ETHER_TYPE_IPV4) 1206 return FILTER_UNKNOWN; 1207 1208 return ff_kni_proto_filter(data, 1209 len, ether_type); 1210 #endif 1211 } 1212 1213 static inline void 1214 pktmbuf_deep_attach(struct rte_mbuf *mi, const struct rte_mbuf *m) 1215 { 1216 struct rte_mbuf *md; 1217 void *src, *dst; 1218 1219 dst = rte_pktmbuf_mtod(mi, void *); 1220 src = rte_pktmbuf_mtod(m, void *); 1221 1222 mi->data_len = m->data_len; 1223 rte_memcpy(dst, src, m->data_len); 1224 1225 mi->port = m->port; 1226 mi->vlan_tci = m->vlan_tci; 1227 mi->vlan_tci_outer = m->vlan_tci_outer; 1228 mi->tx_offload = m->tx_offload; 1229 mi->hash = m->hash; 1230 mi->ol_flags = m->ol_flags; 1231 mi->packet_type = m->packet_type; 1232 } 1233 1234 /* copied from rte_pktmbuf_clone */ 1235 static inline struct rte_mbuf * 1236 pktmbuf_deep_clone(const struct rte_mbuf *md, 1237 struct rte_mempool *mp) 1238 { 1239 struct rte_mbuf *mc, *mi, **prev; 1240 uint32_t pktlen; 1241 uint8_t nseg; 1242 1243 if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL)) 1244 return NULL; 1245 1246 mi = mc; 1247 prev = &mi->next; 1248 pktlen = md->pkt_len; 1249 nseg = 0; 1250 1251 do { 1252 nseg++; 1253 pktmbuf_deep_attach(mi, md); 1254 *prev = mi; 1255 prev = &mi->next; 1256 } while ((md = md->next) != NULL && 1257 (mi = rte_pktmbuf_alloc(mp)) != NULL); 1258 1259 *prev = NULL; 1260 mc->nb_segs = nseg; 1261 mc->pkt_len = pktlen; 1262 1263 /* Allocation of new indirect segment failed */ 1264 if (unlikely (mi == NULL)) { 1265 rte_pktmbuf_free(mc); 1266 return NULL; 1267 } 1268 1269 __rte_mbuf_sanity_check(mc, 1); 1270 return mc; 1271 } 1272 1273 static inline void 1274 process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs, 1275 uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring) 1276 { 1277 struct lcore_conf *qconf = &lcore_conf; 1278 uint16_t nb_queues = qconf->nb_queue_list[port_id]; 1279 1280 uint16_t i; 1281 for (i = 0; i < count; i++) { 1282 struct rte_mbuf *rtem = bufs[i]; 1283 1284 if (unlikely( ff_global_cfg.pcap.enable)) { 1285 if (!pkts_from_ring) { 1286 ff_dump_packets( ff_global_cfg.pcap.save_path, rtem, ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1287 } 1288 } 1289 1290 void *data = rte_pktmbuf_mtod(rtem, void*); 1291 uint16_t len = rte_pktmbuf_data_len(rtem); 1292 1293 if (!pkts_from_ring) { 1294 ff_traffic.rx_packets++; 1295 ff_traffic.rx_bytes += len; 1296 } 1297 1298 if (!pkts_from_ring && packet_dispatcher) { 1299 int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues); 1300 if (ret == FF_DISPATCH_RESPONSE) { 1301 rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len; 1302 1303 /* 1304 * We have not support vlan out strip 1305 */ 1306 if (rtem->vlan_tci) { 1307 data = rte_pktmbuf_prepend(rtem, sizeof(struct rte_vlan_hdr)); 1308 if (data != NULL) { 1309 memmove(data, data + sizeof(struct rte_vlan_hdr), RTE_ETHER_HDR_LEN); 1310 struct rte_ether_hdr *etherhdr = (struct rte_ether_hdr *)data; 1311 struct rte_vlan_hdr *vlanhdr = (struct rte_vlan_hdr *)(data + RTE_ETHER_HDR_LEN); 1312 vlanhdr->vlan_tci = rte_cpu_to_be_16(rtem->vlan_tci); 1313 vlanhdr->eth_proto = etherhdr->ether_type; 1314 etherhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN); 1315 } 1316 } 1317 send_single_packet(rtem, port_id); 1318 continue; 1319 } 1320 1321 if (ret == FF_DISPATCH_ERROR || ret >= nb_queues) { 1322 rte_pktmbuf_free(rtem); 1323 continue; 1324 } 1325 1326 if (ret != queue_id) { 1327 ret = rte_ring_enqueue(dispatch_ring[port_id][ret], rtem); 1328 if (ret < 0) 1329 rte_pktmbuf_free(rtem); 1330 1331 continue; 1332 } 1333 } 1334 1335 enum FilterReturn filter = protocol_filter(data, len); 1336 #ifdef INET6 1337 if (filter == FILTER_ARP || filter == FILTER_NDP) { 1338 #else 1339 if (filter == FILTER_ARP) { 1340 #endif 1341 struct rte_mempool *mbuf_pool; 1342 struct rte_mbuf *mbuf_clone; 1343 if (!pkts_from_ring) { 1344 uint16_t j; 1345 for(j = 0; j < nb_queues; ++j) { 1346 if(j == queue_id) 1347 continue; 1348 1349 unsigned socket_id = 0; 1350 if (numa_on) { 1351 uint16_t lcore_id = qconf->port_cfgs[port_id].lcore_list[j]; 1352 socket_id = rte_lcore_to_socket_id(lcore_id); 1353 } 1354 mbuf_pool = pktmbuf_pool[socket_id]; 1355 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1356 if(mbuf_clone) { 1357 int ret = rte_ring_enqueue(dispatch_ring[port_id][j], 1358 mbuf_clone); 1359 if (ret < 0) 1360 rte_pktmbuf_free(mbuf_clone); 1361 } 1362 } 1363 } 1364 1365 #ifdef FF_KNI 1366 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1367 mbuf_pool = pktmbuf_pool[qconf->socket_id]; 1368 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1369 if(mbuf_clone) { 1370 ff_kni_enqueue(port_id, mbuf_clone); 1371 } 1372 } 1373 #endif 1374 ff_veth_input(ctx, rtem); 1375 #ifdef FF_KNI 1376 } else if (enable_kni) { 1377 if (knictl_action == FF_KNICTL_ACTION_ALL_TO_KNI){ 1378 ff_kni_enqueue(port_id, rtem); 1379 } else if (knictl_action == FF_KNICTL_ACTION_ALL_TO_FF){ 1380 ff_veth_input(ctx, rtem); 1381 } else if (knictl_action == FF_KNICTL_ACTION_DEFAULT){ 1382 if (enable_kni && 1383 ((filter == FILTER_KNI && kni_accept) || 1384 (filter == FILTER_UNKNOWN && !kni_accept)) ) { 1385 ff_kni_enqueue(port_id, rtem); 1386 } else { 1387 ff_veth_input(ctx, rtem); 1388 } 1389 } else { 1390 ff_veth_input(ctx, rtem); 1391 } 1392 #endif 1393 } else { 1394 ff_veth_input(ctx, rtem); 1395 } 1396 } 1397 } 1398 1399 static inline int 1400 process_dispatch_ring(uint16_t port_id, uint16_t queue_id, 1401 struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx) 1402 { 1403 /* read packet from ring buf and to process */ 1404 uint16_t nb_rb; 1405 nb_rb = rte_ring_dequeue_burst(dispatch_ring[port_id][queue_id], 1406 (void **)pkts_burst, MAX_PKT_BURST, NULL); 1407 1408 if(nb_rb > 0) { 1409 process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1); 1410 } 1411 1412 return 0; 1413 } 1414 1415 static inline void 1416 handle_sysctl_msg(struct ff_msg *msg) 1417 { 1418 int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen, 1419 msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new, 1420 msg->sysctl.newlen); 1421 1422 if (ret < 0) { 1423 msg->result = errno; 1424 } else { 1425 msg->result = 0; 1426 } 1427 } 1428 1429 static inline void 1430 handle_ioctl_msg(struct ff_msg *msg) 1431 { 1432 int fd, ret; 1433 #ifdef INET6 1434 if (msg->msg_type == FF_IOCTL6) { 1435 fd = ff_socket(AF_INET6, SOCK_DGRAM, 0); 1436 } else 1437 #endif 1438 fd = ff_socket(AF_INET, SOCK_DGRAM, 0); 1439 1440 if (fd < 0) { 1441 ret = -1; 1442 goto done; 1443 } 1444 1445 ret = ff_ioctl_freebsd(fd, msg->ioctl.cmd, msg->ioctl.data); 1446 1447 ff_close(fd); 1448 1449 done: 1450 if (ret < 0) { 1451 msg->result = errno; 1452 } else { 1453 msg->result = 0; 1454 } 1455 } 1456 1457 static inline void 1458 handle_route_msg(struct ff_msg *msg) 1459 { 1460 int ret = ff_rtioctl(msg->route.fib, msg->route.data, 1461 &msg->route.len, msg->route.maxlen); 1462 if (ret < 0) { 1463 msg->result = errno; 1464 } else { 1465 msg->result = 0; 1466 } 1467 } 1468 1469 static inline void 1470 handle_top_msg(struct ff_msg *msg) 1471 { 1472 msg->top = ff_top_status; 1473 msg->result = 0; 1474 } 1475 1476 #ifdef FF_NETGRAPH 1477 static inline void 1478 handle_ngctl_msg(struct ff_msg *msg) 1479 { 1480 int ret = ff_ngctl(msg->ngctl.cmd, msg->ngctl.data); 1481 if (ret < 0) { 1482 msg->result = errno; 1483 } else { 1484 msg->result = 0; 1485 msg->ngctl.ret = ret; 1486 } 1487 } 1488 #endif 1489 1490 #ifdef FF_IPFW 1491 static inline void 1492 handle_ipfw_msg(struct ff_msg *msg) 1493 { 1494 int fd, ret; 1495 fd = ff_socket(AF_INET, SOCK_RAW, IPPROTO_RAW); 1496 if (fd < 0) { 1497 ret = -1; 1498 goto done; 1499 } 1500 1501 switch (msg->ipfw.cmd) { 1502 case FF_IPFW_GET: 1503 ret = ff_getsockopt_freebsd(fd, msg->ipfw.level, 1504 msg->ipfw.optname, msg->ipfw.optval, 1505 msg->ipfw.optlen); 1506 break; 1507 case FF_IPFW_SET: 1508 ret = ff_setsockopt_freebsd(fd, msg->ipfw.level, 1509 msg->ipfw.optname, msg->ipfw.optval, 1510 *(msg->ipfw.optlen)); 1511 break; 1512 default: 1513 ret = -1; 1514 errno = ENOTSUP; 1515 break; 1516 } 1517 1518 ff_close(fd); 1519 1520 done: 1521 if (ret < 0) { 1522 msg->result = errno; 1523 } else { 1524 msg->result = 0; 1525 } 1526 } 1527 #endif 1528 1529 static inline void 1530 handle_traffic_msg(struct ff_msg *msg) 1531 { 1532 msg->traffic = ff_traffic; 1533 msg->result = 0; 1534 } 1535 1536 #ifdef FF_KNI 1537 static inline void 1538 handle_knictl_msg(struct ff_msg *msg) 1539 { 1540 if (msg->knictl.kni_cmd == FF_KNICTL_CMD_SET){ 1541 switch (msg->knictl.kni_action){ 1542 case FF_KNICTL_ACTION_ALL_TO_FF: knictl_action = FF_KNICTL_ACTION_ALL_TO_FF; msg->result = 0; printf("new kni action: alltoff\n"); break; 1543 case FF_KNICTL_ACTION_ALL_TO_KNI: knictl_action = FF_KNICTL_ACTION_ALL_TO_KNI; msg->result = 0; printf("new kni action: alltokni\n"); break; 1544 case FF_KNICTL_ACTION_DEFAULT: knictl_action = FF_KNICTL_ACTION_DEFAULT; msg->result = 0; printf("new kni action: default\n"); break; 1545 default: msg->result = -1; 1546 } 1547 } 1548 else if (msg->knictl.kni_cmd == FF_KNICTL_CMD_GET){ 1549 msg->knictl.kni_action = knictl_action; 1550 } else { 1551 msg->result = -2; 1552 } 1553 } 1554 #endif 1555 1556 static inline void 1557 handle_default_msg(struct ff_msg *msg) 1558 { 1559 msg->result = ENOTSUP; 1560 } 1561 1562 static inline void 1563 handle_msg(struct ff_msg *msg, uint16_t proc_id) 1564 { 1565 switch (msg->msg_type) { 1566 case FF_SYSCTL: 1567 handle_sysctl_msg(msg); 1568 break; 1569 case FF_IOCTL: 1570 #ifdef INET6 1571 case FF_IOCTL6: 1572 #endif 1573 handle_ioctl_msg(msg); 1574 break; 1575 case FF_ROUTE: 1576 handle_route_msg(msg); 1577 break; 1578 case FF_TOP: 1579 handle_top_msg(msg); 1580 break; 1581 #ifdef FF_NETGRAPH 1582 case FF_NGCTL: 1583 handle_ngctl_msg(msg); 1584 break; 1585 #endif 1586 #ifdef FF_IPFW 1587 case FF_IPFW_CTL: 1588 handle_ipfw_msg(msg); 1589 break; 1590 #endif 1591 case FF_TRAFFIC: 1592 handle_traffic_msg(msg); 1593 break; 1594 #ifdef FF_KNI 1595 case FF_KNICTL: 1596 handle_knictl_msg(msg); 1597 break; 1598 #endif 1599 default: 1600 handle_default_msg(msg); 1601 break; 1602 } 1603 rte_ring_enqueue(msg_ring[proc_id].ring[msg->msg_type], msg); 1604 } 1605 1606 static inline int 1607 process_msg_ring(uint16_t proc_id) 1608 { 1609 void *msg; 1610 int ret = rte_ring_dequeue(msg_ring[proc_id].ring[0], &msg); 1611 1612 if (unlikely(ret == 0)) { 1613 handle_msg((struct ff_msg *)msg, proc_id); 1614 } 1615 1616 return 0; 1617 } 1618 1619 /* Send burst of packets on an output interface */ 1620 static inline int 1621 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) 1622 { 1623 struct rte_mbuf **m_table; 1624 int ret; 1625 uint16_t queueid; 1626 1627 queueid = qconf->tx_queue_id[port]; 1628 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 1629 1630 if (unlikely(ff_global_cfg.pcap.enable)) { 1631 uint16_t i; 1632 for (i = 0; i < n; i++) { 1633 ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i], 1634 ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1635 } 1636 } 1637 1638 ret = rte_eth_tx_burst(port, queueid, m_table, n); 1639 ff_traffic.tx_packets += ret; 1640 uint16_t i; 1641 for (i = 0; i < ret; i++) { 1642 ff_traffic.tx_bytes += rte_pktmbuf_pkt_len(m_table[i]); 1643 #ifdef FF_USE_PAGE_ARRAY 1644 if (qconf->tx_mbufs[port].bsd_m_table[i]) 1645 ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs); 1646 #endif 1647 } 1648 if (unlikely(ret < n)) { 1649 do { 1650 rte_pktmbuf_free(m_table[ret]); 1651 #ifdef FF_USE_PAGE_ARRAY 1652 if ( qconf->tx_mbufs[port].bsd_m_table[ret] ) 1653 ff_mbuf_free(qconf->tx_mbufs[port].bsd_m_table[ret]); 1654 #endif 1655 } while (++ret < n); 1656 } 1657 return 0; 1658 } 1659 1660 /* Enqueue a single packet, and send burst if queue is filled */ 1661 static inline int 1662 send_single_packet(struct rte_mbuf *m, uint8_t port) 1663 { 1664 uint16_t len; 1665 struct lcore_conf *qconf; 1666 1667 qconf = &lcore_conf; 1668 len = qconf->tx_mbufs[port].len; 1669 qconf->tx_mbufs[port].m_table[len] = m; 1670 len++; 1671 1672 /* enough pkts to be sent */ 1673 if (unlikely(len == MAX_PKT_BURST)) { 1674 send_burst(qconf, MAX_PKT_BURST, port); 1675 len = 0; 1676 } 1677 1678 qconf->tx_mbufs[port].len = len; 1679 return 0; 1680 } 1681 1682 int 1683 ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m, 1684 int total) 1685 { 1686 #ifdef FF_USE_PAGE_ARRAY 1687 struct lcore_conf *qconf = &lcore_conf; 1688 int len = 0; 1689 1690 len = ff_if_send_onepkt(ctx, m,total); 1691 if (unlikely(len == MAX_PKT_BURST)) { 1692 send_burst(qconf, MAX_PKT_BURST, ctx->port_id); 1693 len = 0; 1694 } 1695 qconf->tx_mbufs[ctx->port_id].len = len; 1696 return 0; 1697 #endif 1698 struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id]; 1699 struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool); 1700 if (head == NULL) { 1701 ff_mbuf_free(m); 1702 return -1; 1703 } 1704 1705 head->pkt_len = total; 1706 head->nb_segs = 0; 1707 1708 int off = 0; 1709 struct rte_mbuf *cur = head, *prev = NULL; 1710 while(total > 0) { 1711 if (cur == NULL) { 1712 cur = rte_pktmbuf_alloc(mbuf_pool); 1713 if (cur == NULL) { 1714 rte_pktmbuf_free(head); 1715 ff_mbuf_free(m); 1716 return -1; 1717 } 1718 } 1719 1720 if (prev != NULL) { 1721 prev->next = cur; 1722 } 1723 head->nb_segs++; 1724 1725 prev = cur; 1726 void *data = rte_pktmbuf_mtod(cur, void*); 1727 int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total; 1728 int ret = ff_mbuf_copydata(m, data, off, len); 1729 if (ret < 0) { 1730 rte_pktmbuf_free(head); 1731 ff_mbuf_free(m); 1732 return -1; 1733 } 1734 1735 1736 cur->data_len = len; 1737 off += len; 1738 total -= len; 1739 cur = NULL; 1740 } 1741 1742 struct ff_tx_offload offload = {0}; 1743 ff_mbuf_tx_offload(m, &offload); 1744 1745 void *data = rte_pktmbuf_mtod(head, void*); 1746 1747 if (offload.ip_csum) { 1748 /* ipv6 not supported yet */ 1749 struct rte_ipv4_hdr *iph; 1750 int iph_len; 1751 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1752 iph_len = (iph->version_ihl & 0x0f) << 2; 1753 1754 head->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; 1755 head->l2_len = RTE_ETHER_HDR_LEN; 1756 head->l3_len = iph_len; 1757 } 1758 1759 if (ctx->hw_features.tx_csum_l4) { 1760 struct rte_ipv4_hdr *iph; 1761 int iph_len; 1762 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1763 iph_len = (iph->version_ihl & 0x0f) << 2; 1764 1765 if (offload.tcp_csum) { 1766 head->ol_flags |= PKT_TX_TCP_CKSUM; 1767 head->l2_len = RTE_ETHER_HDR_LEN; 1768 head->l3_len = iph_len; 1769 } 1770 1771 /* 1772 * TCP segmentation offload. 1773 * 1774 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag 1775 * implies PKT_TX_TCP_CKSUM) 1776 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 1777 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and 1778 * write the IP checksum to 0 in the packet 1779 * - fill the mbuf offload information: l2_len, 1780 * l3_len, l4_len, tso_segsz 1781 * - calculate the pseudo header checksum without taking ip_len 1782 * in account, and set it in the TCP header. Refer to 1783 * rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum() that can be 1784 * used as helpers. 1785 */ 1786 if (offload.tso_seg_size) { 1787 struct rte_tcp_hdr *tcph; 1788 int tcph_len; 1789 tcph = (struct rte_tcp_hdr *)((char *)iph + iph_len); 1790 tcph_len = (tcph->data_off & 0xf0) >> 2; 1791 tcph->cksum = rte_ipv4_phdr_cksum(iph, PKT_TX_TCP_SEG); 1792 1793 head->ol_flags |= PKT_TX_TCP_SEG; 1794 head->l4_len = tcph_len; 1795 head->tso_segsz = offload.tso_seg_size; 1796 } 1797 1798 if (offload.udp_csum) { 1799 head->ol_flags |= PKT_TX_UDP_CKSUM; 1800 head->l2_len = RTE_ETHER_HDR_LEN; 1801 head->l3_len = iph_len; 1802 } 1803 } 1804 1805 ff_mbuf_free(m); 1806 1807 return send_single_packet(head, ctx->port_id); 1808 } 1809 1810 static int 1811 main_loop(void *arg) 1812 { 1813 struct loop_routine *lr = (struct loop_routine *)arg; 1814 1815 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1816 uint64_t prev_tsc, diff_tsc, cur_tsc, usch_tsc, div_tsc, usr_tsc, sys_tsc, end_tsc, idle_sleep_tsc; 1817 int i, j, nb_rx, idle; 1818 uint16_t port_id, queue_id; 1819 struct lcore_conf *qconf; 1820 uint64_t drain_tsc = 0; 1821 struct ff_dpdk_if_context *ctx; 1822 1823 if (pkt_tx_delay) { 1824 drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * pkt_tx_delay; 1825 } 1826 1827 prev_tsc = 0; 1828 usch_tsc = 0; 1829 1830 qconf = &lcore_conf; 1831 1832 while (1) { 1833 cur_tsc = rte_rdtsc(); 1834 if (unlikely(freebsd_clock.expire < cur_tsc)) { 1835 rte_timer_manage(); 1836 } 1837 1838 idle = 1; 1839 sys_tsc = 0; 1840 usr_tsc = 0; 1841 1842 /* 1843 * TX burst queue drain 1844 */ 1845 diff_tsc = cur_tsc - prev_tsc; 1846 if (unlikely(diff_tsc >= drain_tsc)) { 1847 for (i = 0; i < qconf->nb_tx_port; i++) { 1848 port_id = qconf->tx_port_id[i]; 1849 if (qconf->tx_mbufs[port_id].len == 0) 1850 continue; 1851 1852 idle = 0; 1853 1854 send_burst(qconf, 1855 qconf->tx_mbufs[port_id].len, 1856 port_id); 1857 qconf->tx_mbufs[port_id].len = 0; 1858 } 1859 1860 prev_tsc = cur_tsc; 1861 } 1862 1863 /* 1864 * Read packet from RX queues 1865 */ 1866 for (i = 0; i < qconf->nb_rx_queue; ++i) { 1867 port_id = qconf->rx_queue_list[i].port_id; 1868 queue_id = qconf->rx_queue_list[i].queue_id; 1869 ctx = veth_ctx[port_id]; 1870 1871 #ifdef FF_KNI 1872 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1873 ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST); 1874 } 1875 #endif 1876 1877 process_dispatch_ring(port_id, queue_id, pkts_burst, ctx); 1878 1879 nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst, 1880 MAX_PKT_BURST); 1881 if (nb_rx == 0) 1882 continue; 1883 1884 idle = 0; 1885 1886 /* Prefetch first packets */ 1887 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1888 rte_prefetch0(rte_pktmbuf_mtod( 1889 pkts_burst[j], void *)); 1890 } 1891 1892 /* Prefetch and handle already prefetched packets */ 1893 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1894 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1895 j + PREFETCH_OFFSET], void *)); 1896 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1897 } 1898 1899 /* Handle remaining prefetched packets */ 1900 for (; j < nb_rx; j++) { 1901 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1902 } 1903 } 1904 1905 process_msg_ring(qconf->proc_id); 1906 1907 div_tsc = rte_rdtsc(); 1908 1909 if (likely(lr->loop != NULL && (!idle || cur_tsc - usch_tsc >= drain_tsc))) { 1910 usch_tsc = cur_tsc; 1911 lr->loop(lr->arg); 1912 } 1913 1914 idle_sleep_tsc = rte_rdtsc(); 1915 if (likely(idle && idle_sleep)) { 1916 usleep(idle_sleep); 1917 end_tsc = rte_rdtsc(); 1918 } else { 1919 end_tsc = idle_sleep_tsc; 1920 } 1921 1922 if (usch_tsc == cur_tsc) { 1923 usr_tsc = idle_sleep_tsc - div_tsc; 1924 } 1925 1926 if (!idle) { 1927 sys_tsc = div_tsc - cur_tsc; 1928 ff_top_status.sys_tsc += sys_tsc; 1929 } 1930 1931 ff_top_status.usr_tsc += usr_tsc; 1932 ff_top_status.work_tsc += end_tsc - cur_tsc; 1933 ff_top_status.idle_tsc += end_tsc - cur_tsc - usr_tsc - sys_tsc; 1934 1935 ff_top_status.loops++; 1936 } 1937 1938 return 0; 1939 } 1940 1941 int 1942 ff_dpdk_if_up(void) { 1943 int i; 1944 struct lcore_conf *qconf = &lcore_conf; 1945 for (i = 0; i < qconf->nb_tx_port; i++) { 1946 uint16_t port_id = qconf->tx_port_id[i]; 1947 1948 struct ff_port_cfg *pconf = &qconf->port_cfgs[port_id]; 1949 veth_ctx[port_id] = ff_veth_attach(pconf); 1950 if (veth_ctx[port_id] == NULL) { 1951 rte_exit(EXIT_FAILURE, "ff_veth_attach failed"); 1952 } 1953 } 1954 1955 return 0; 1956 } 1957 1958 void 1959 ff_dpdk_run(loop_func_t loop, void *arg) { 1960 struct loop_routine *lr = rte_malloc(NULL, 1961 sizeof(struct loop_routine), 0); 1962 lr->loop = loop; 1963 lr->arg = arg; 1964 rte_eal_mp_remote_launch(main_loop, lr, CALL_MASTER); 1965 rte_eal_mp_wait_lcore(); 1966 rte_free(lr); 1967 } 1968 1969 void 1970 ff_dpdk_pktmbuf_free(void *m) 1971 { 1972 rte_pktmbuf_free_seg((struct rte_mbuf *)m); 1973 } 1974 1975 static uint32_t 1976 toeplitz_hash(unsigned keylen, const uint8_t *key, 1977 unsigned datalen, const uint8_t *data) 1978 { 1979 uint32_t hash = 0, v; 1980 u_int i, b; 1981 1982 /* XXXRW: Perhaps an assertion about key length vs. data length? */ 1983 1984 v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3]; 1985 for (i = 0; i < datalen; i++) { 1986 for (b = 0; b < 8; b++) { 1987 if (data[i] & (1<<(7-b))) 1988 hash ^= v; 1989 v <<= 1; 1990 if ((i + 4) < keylen && 1991 (key[i+4] & (1<<(7-b)))) 1992 v |= 1; 1993 } 1994 } 1995 return (hash); 1996 } 1997 1998 int 1999 ff_in_pcbladdr(uint16_t family, void *faddr, uint16_t fport, void *laddr) 2000 { 2001 int ret = 0; 2002 uint16_t fa; 2003 2004 if (!pcblddr_fun) 2005 return ret; 2006 2007 if (family == AF_INET) 2008 fa = AF_INET; 2009 else if (family == AF_INET6_FREEBSD) 2010 fa = AF_INET6_LINUX; 2011 else 2012 return EADDRNOTAVAIL; 2013 2014 ret = (*pcblddr_fun)(fa, faddr, fport, laddr); 2015 2016 return ret; 2017 } 2018 2019 void 2020 ff_regist_pcblddr_fun(pcblddr_func_t func) 2021 { 2022 pcblddr_fun = func; 2023 } 2024 2025 int 2026 ff_rss_check(void *softc, uint32_t saddr, uint32_t daddr, 2027 uint16_t sport, uint16_t dport) 2028 { 2029 struct lcore_conf *qconf = &lcore_conf; 2030 struct ff_dpdk_if_context *ctx = ff_veth_softc_to_hostc(softc); 2031 uint16_t nb_queues = qconf->nb_queue_list[ctx->port_id]; 2032 2033 if (nb_queues <= 1) { 2034 return 1; 2035 } 2036 2037 uint16_t reta_size = rss_reta_size[ctx->port_id]; 2038 uint16_t queueid = qconf->tx_queue_id[ctx->port_id]; 2039 2040 uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) + 2041 sizeof(dport)]; 2042 2043 unsigned datalen = 0; 2044 2045 bcopy(&saddr, &data[datalen], sizeof(saddr)); 2046 datalen += sizeof(saddr); 2047 2048 bcopy(&daddr, &data[datalen], sizeof(daddr)); 2049 datalen += sizeof(daddr); 2050 2051 bcopy(&sport, &data[datalen], sizeof(sport)); 2052 datalen += sizeof(sport); 2053 2054 bcopy(&dport, &data[datalen], sizeof(dport)); 2055 datalen += sizeof(dport); 2056 2057 uint32_t hash = 0; 2058 hash = toeplitz_hash(rsskey_len, rsskey, datalen, data); 2059 2060 return ((hash & (reta_size - 1)) % nb_queues) == queueid; 2061 } 2062 2063 void 2064 ff_regist_packet_dispatcher(dispatch_func_t func) 2065 { 2066 packet_dispatcher = func; 2067 } 2068 2069 uint64_t 2070 ff_get_tsc_ns() 2071 { 2072 uint64_t cur_tsc = rte_rdtsc(); 2073 uint64_t hz = rte_get_tsc_hz(); 2074 return ((double)cur_tsc/(double)hz) * NS_PER_S; 2075 } 2076 2077