1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 #include <assert.h> 27 #include <unistd.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 31 #include <rte_common.h> 32 #include <rte_byteorder.h> 33 #include <rte_log.h> 34 #include <rte_memory.h> 35 #include <rte_memcpy.h> 36 #include <rte_memzone.h> 37 #include <rte_config.h> 38 #include <rte_eal.h> 39 #include <rte_pci.h> 40 #include <rte_mbuf.h> 41 #include <rte_memory.h> 42 #include <rte_lcore.h> 43 #include <rte_launch.h> 44 #include <rte_ethdev.h> 45 #include <rte_debug.h> 46 #include <rte_common.h> 47 #include <rte_ether.h> 48 #include <rte_malloc.h> 49 #include <rte_cycles.h> 50 #include <rte_timer.h> 51 #include <rte_thash.h> 52 #include <rte_ip.h> 53 #include <rte_tcp.h> 54 #include <rte_udp.h> 55 #include <rte_eth_bond.h> 56 57 #include "ff_dpdk_if.h" 58 #include "ff_dpdk_pcap.h" 59 #include "ff_dpdk_kni.h" 60 #include "ff_config.h" 61 #include "ff_veth.h" 62 #include "ff_host_interface.h" 63 #include "ff_msg.h" 64 #include "ff_api.h" 65 #include "ff_memory.h" 66 67 #ifdef FF_KNI 68 #define KNI_MBUF_MAX 2048 69 #define KNI_QUEUE_SIZE 2048 70 71 int enable_kni; 72 static int kni_accept; 73 static int knictl_action = FF_KNICTL_ACTION_DEFAULT; 74 #endif 75 76 static int numa_on; 77 78 static unsigned idle_sleep; 79 static unsigned pkt_tx_delay; 80 81 static struct rte_timer freebsd_clock; 82 83 // Mellanox Linux's driver key 84 static uint8_t default_rsskey_40bytes[40] = { 85 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b, 86 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb, 87 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c, 88 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9, 89 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc 90 }; 91 92 static uint8_t default_rsskey_52bytes[52] = { 93 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 94 0xb6, 0x75, 0xea, 0x5b, 0x12, 0x4f, 0x9f, 0x30, 95 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 96 0xa0, 0x8c, 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 97 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8, 0x55, 98 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 99 0x81, 0x15, 0x03, 0x66 100 }; 101 102 static uint8_t symmetric_rsskey[52] = { 103 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 104 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 105 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 106 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 107 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 108 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 109 0x6d, 0x5a, 0x6d, 0x5a 110 }; 111 112 static int rsskey_len = sizeof(default_rsskey_40bytes); 113 static uint8_t *rsskey = default_rsskey_40bytes; 114 115 struct lcore_conf lcore_conf; 116 117 struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 118 119 static pcblddr_func_t pcblddr_fun; 120 121 static struct rte_ring **dispatch_ring[RTE_MAX_ETHPORTS]; 122 static dispatch_func_t packet_dispatcher; 123 124 static uint16_t rss_reta_size[RTE_MAX_ETHPORTS]; 125 126 #define BOND_DRIVER_NAME "net_bonding" 127 128 static inline int send_single_packet(struct rte_mbuf *m, uint8_t port); 129 130 struct ff_msg_ring { 131 char ring_name[FF_MSG_NUM][RTE_RING_NAMESIZE]; 132 /* ring[0] for lcore recv msg, other send */ 133 /* ring[1] for lcore send msg, other read */ 134 struct rte_ring *ring[FF_MSG_NUM]; 135 } __rte_cache_aligned; 136 137 static struct ff_msg_ring msg_ring[RTE_MAX_LCORE]; 138 static struct rte_mempool *message_pool; 139 static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS]; 140 141 static struct ff_top_args ff_top_status; 142 static struct ff_traffic_args ff_traffic; 143 extern void ff_hardclock(void); 144 145 static void 146 ff_hardclock_job(__rte_unused struct rte_timer *timer, 147 __rte_unused void *arg) { 148 ff_hardclock(); 149 ff_update_current_ts(); 150 } 151 152 struct ff_dpdk_if_context * 153 ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg) 154 { 155 struct ff_dpdk_if_context *ctx; 156 157 ctx = calloc(1, sizeof(struct ff_dpdk_if_context)); 158 if (ctx == NULL) 159 return NULL; 160 161 ctx->sc = sc; 162 ctx->ifp = ifp; 163 ctx->port_id = cfg->port_id; 164 ctx->hw_features = cfg->hw_features; 165 166 return ctx; 167 } 168 169 void 170 ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx) 171 { 172 free(ctx); 173 } 174 175 static void 176 check_all_ports_link_status(void) 177 { 178 #define CHECK_INTERVAL 100 /* 100ms */ 179 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 180 181 uint16_t portid; 182 uint8_t count, all_ports_up, print_flag = 0; 183 struct rte_eth_link link; 184 185 printf("\nChecking link status"); 186 fflush(stdout); 187 188 int i, nb_ports; 189 nb_ports = ff_global_cfg.dpdk.nb_ports; 190 for (count = 0; count <= MAX_CHECK_TIME; count++) { 191 all_ports_up = 1; 192 for (i = 0; i < nb_ports; i++) { 193 uint16_t portid = ff_global_cfg.dpdk.portid_list[i]; 194 memset(&link, 0, sizeof(link)); 195 rte_eth_link_get_nowait(portid, &link); 196 197 /* print link status if flag set */ 198 if (print_flag == 1) { 199 if (link.link_status) { 200 printf("Port %d Link Up - speed %u " 201 "Mbps - %s\n", (int)portid, 202 (unsigned)link.link_speed, 203 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 204 ("full-duplex") : ("half-duplex\n")); 205 } else { 206 printf("Port %d Link Down\n", (int)portid); 207 } 208 continue; 209 } 210 /* clear all_ports_up flag if any link down */ 211 if (link.link_status == 0) { 212 all_ports_up = 0; 213 break; 214 } 215 } 216 217 /* after finally printing all link status, get out */ 218 if (print_flag == 1) 219 break; 220 221 if (all_ports_up == 0) { 222 printf("."); 223 fflush(stdout); 224 rte_delay_ms(CHECK_INTERVAL); 225 } 226 227 /* set the print_flag if all ports up or timeout */ 228 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 229 print_flag = 1; 230 printf("done\n"); 231 } 232 } 233 } 234 235 static int 236 init_lcore_conf(void) 237 { 238 uint8_t nb_dev_ports = rte_eth_dev_count_avail(); 239 if (nb_dev_ports == 0) { 240 rte_exit(EXIT_FAILURE, "No probed ethernet devices\n"); 241 } 242 243 if (ff_global_cfg.dpdk.max_portid >= nb_dev_ports) { 244 rte_exit(EXIT_FAILURE, "this machine doesn't have port %d.\n", 245 ff_global_cfg.dpdk.max_portid); 246 } 247 248 lcore_conf.port_cfgs = ff_global_cfg.dpdk.port_cfgs; 249 lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id; 250 251 uint16_t socket_id = 0; 252 if (numa_on) { 253 socket_id = rte_lcore_to_socket_id(rte_lcore_id()); 254 } 255 256 lcore_conf.socket_id = socket_id; 257 258 uint16_t lcore_id = ff_global_cfg.dpdk.proc_lcore[lcore_conf.proc_id]; 259 if (!rte_lcore_is_enabled(lcore_id)) { 260 rte_exit(EXIT_FAILURE, "lcore %u unavailable\n", lcore_id); 261 } 262 263 int j; 264 for (j = 0; j < ff_global_cfg.dpdk.nb_ports; ++j) { 265 uint16_t port_id = ff_global_cfg.dpdk.portid_list[j]; 266 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 267 268 int queueid = -1; 269 int i; 270 for (i = 0; i < pconf->nb_lcores; i++) { 271 if (pconf->lcore_list[i] == lcore_id) { 272 queueid = i; 273 } 274 } 275 if (queueid < 0) { 276 continue; 277 } 278 printf("lcore: %u, port: %u, queue: %u\n", lcore_id, port_id, queueid); 279 uint16_t nb_rx_queue = lcore_conf.nb_rx_queue; 280 lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id; 281 lcore_conf.rx_queue_list[nb_rx_queue].queue_id = queueid; 282 lcore_conf.nb_rx_queue++; 283 284 lcore_conf.tx_queue_id[port_id] = queueid; 285 lcore_conf.tx_port_id[lcore_conf.nb_tx_port] = port_id; 286 lcore_conf.nb_tx_port++; 287 288 /* Enable pcap dump */ 289 if (ff_global_cfg.pcap.enable) { 290 ff_enable_pcap(ff_global_cfg.pcap.save_path, ff_global_cfg.pcap.snap_len); 291 } 292 293 lcore_conf.nb_queue_list[port_id] = pconf->nb_lcores; 294 } 295 296 if (lcore_conf.nb_rx_queue == 0) { 297 rte_exit(EXIT_FAILURE, "lcore %u has nothing to do\n", lcore_id); 298 } 299 300 return 0; 301 } 302 303 static int 304 init_mem_pool(void) 305 { 306 uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports; 307 uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs; 308 uint32_t nb_tx_queue = nb_lcores; 309 uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores; 310 uint16_t max_portid = ff_global_cfg.dpdk.max_portid; 311 312 unsigned nb_mbuf = RTE_ALIGN_CEIL ( 313 (nb_rx_queue * (max_portid + 1) * 2 * RX_QUEUE_SIZE + 314 nb_ports * (max_portid + 1) * 2 * nb_lcores * MAX_PKT_BURST + 315 nb_ports * (max_portid + 1) * 2 * nb_tx_queue * TX_QUEUE_SIZE + 316 nb_lcores * MEMPOOL_CACHE_SIZE + 317 #ifdef FF_KNI 318 nb_ports * KNI_MBUF_MAX + 319 nb_ports * KNI_QUEUE_SIZE + 320 #endif 321 nb_lcores * nb_ports * DISPATCH_RING_SIZE), 322 (unsigned)8192); 323 324 unsigned socketid = 0; 325 uint16_t i, lcore_id; 326 char s[64]; 327 328 for (i = 0; i < ff_global_cfg.dpdk.nb_procs; i++) { 329 lcore_id = ff_global_cfg.dpdk.proc_lcore[i]; 330 if (numa_on) { 331 socketid = rte_lcore_to_socket_id(lcore_id); 332 } 333 334 if (socketid >= NB_SOCKETS) { 335 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 336 socketid, i, NB_SOCKETS); 337 } 338 339 if (pktmbuf_pool[socketid] != NULL) { 340 continue; 341 } 342 343 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 344 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 345 pktmbuf_pool[socketid] = 346 rte_pktmbuf_pool_create(s, nb_mbuf, 347 MEMPOOL_CACHE_SIZE, 0, 348 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 349 } else { 350 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 351 pktmbuf_pool[socketid] = rte_mempool_lookup(s); 352 } 353 354 if (pktmbuf_pool[socketid] == NULL) { 355 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid); 356 } else { 357 printf("create mbuf pool on socket %d\n", socketid); 358 } 359 360 #ifdef FF_USE_PAGE_ARRAY 361 nb_mbuf = RTE_ALIGN_CEIL ( 362 nb_ports*nb_lcores*MAX_PKT_BURST + 363 nb_ports*nb_tx_queue*TX_QUEUE_SIZE + 364 nb_lcores*MEMPOOL_CACHE_SIZE, 365 (unsigned)4096); 366 ff_init_ref_pool(nb_mbuf, socketid); 367 #endif 368 } 369 370 return 0; 371 } 372 373 static struct rte_ring * 374 create_ring(const char *name, unsigned count, int socket_id, unsigned flags) 375 { 376 struct rte_ring *ring; 377 378 if (name == NULL) { 379 rte_exit(EXIT_FAILURE, "create ring failed, no name!\n"); 380 } 381 382 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 383 ring = rte_ring_create(name, count, socket_id, flags); 384 } else { 385 ring = rte_ring_lookup(name); 386 } 387 388 if (ring == NULL) { 389 rte_exit(EXIT_FAILURE, "create ring:%s failed!\n", name); 390 } 391 392 return ring; 393 } 394 395 static int 396 init_dispatch_ring(void) 397 { 398 int j; 399 char name_buf[RTE_RING_NAMESIZE]; 400 int queueid; 401 402 unsigned socketid = lcore_conf.socket_id; 403 404 /* Create ring according to ports actually being used. */ 405 int nb_ports = ff_global_cfg.dpdk.nb_ports; 406 for (j = 0; j < nb_ports; j++) { 407 uint16_t portid = ff_global_cfg.dpdk.portid_list[j]; 408 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[portid]; 409 int nb_queues = pconf->nb_lcores; 410 if (dispatch_ring[portid] == NULL) { 411 snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_p%d", portid); 412 413 dispatch_ring[portid] = rte_zmalloc(name_buf, 414 sizeof(struct rte_ring *) * nb_queues, 415 RTE_CACHE_LINE_SIZE); 416 if (dispatch_ring[portid] == NULL) { 417 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 418 "failed\n", name_buf); 419 } 420 } 421 422 for(queueid = 0; queueid < nb_queues; ++queueid) { 423 snprintf(name_buf, RTE_RING_NAMESIZE, "dispatch_ring_p%d_q%d", 424 portid, queueid); 425 dispatch_ring[portid][queueid] = create_ring(name_buf, 426 DISPATCH_RING_SIZE, socketid, RING_F_SC_DEQ); 427 428 if (dispatch_ring[portid][queueid] == NULL) 429 rte_panic("create ring:%s failed!\n", name_buf); 430 431 printf("create ring:%s success, %u ring entries are now free!\n", 432 name_buf, rte_ring_free_count(dispatch_ring[portid][queueid])); 433 } 434 } 435 436 return 0; 437 } 438 439 static void 440 ff_msg_init(struct rte_mempool *mp, 441 __attribute__((unused)) void *opaque_arg, 442 void *obj, __attribute__((unused)) unsigned i) 443 { 444 struct ff_msg *msg = (struct ff_msg *)obj; 445 msg->msg_type = FF_UNKNOWN; 446 msg->buf_addr = (char *)msg + sizeof(struct ff_msg); 447 msg->buf_len = mp->elt_size - sizeof(struct ff_msg); 448 msg->original_buf = NULL; 449 msg->original_buf_len = 0; 450 } 451 452 static int 453 init_msg_ring(void) 454 { 455 uint16_t i, j; 456 uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; 457 unsigned socketid = lcore_conf.socket_id; 458 459 /* Create message buffer pool */ 460 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 461 message_pool = rte_mempool_create(FF_MSG_POOL, 462 MSG_RING_SIZE * 2 * nb_procs, 463 MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0, 464 NULL, NULL, ff_msg_init, NULL, 465 socketid, 0); 466 } else { 467 message_pool = rte_mempool_lookup(FF_MSG_POOL); 468 } 469 470 if (message_pool == NULL) { 471 rte_panic("Create msg mempool failed\n"); 472 } 473 474 for(i = 0; i < nb_procs; ++i) { 475 snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE, 476 "%s%u", FF_MSG_RING_IN, i); 477 msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0], 478 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 479 if (msg_ring[i].ring[0] == NULL) 480 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); 481 482 for (j = FF_SYSCTL; j < FF_MSG_NUM; j++) { 483 snprintf(msg_ring[i].ring_name[j], RTE_RING_NAMESIZE, 484 "%s%u_%u", FF_MSG_RING_OUT, i, j); 485 msg_ring[i].ring[j] = create_ring(msg_ring[i].ring_name[j], 486 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 487 if (msg_ring[i].ring[j] == NULL) 488 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[j]); 489 } 490 } 491 492 return 0; 493 } 494 495 #ifdef FF_KNI 496 497 static enum FF_KNICTL_CMD get_kni_action(const char *c){ 498 if (!c) 499 return FF_KNICTL_ACTION_DEFAULT; 500 if (0 == strcasecmp(c, "alltokni")){ 501 return FF_KNICTL_ACTION_ALL_TO_KNI; 502 } else if (0 == strcasecmp(c, "alltoff")){ 503 return FF_KNICTL_ACTION_ALL_TO_FF; 504 } else if (0 == strcasecmp(c, "default")){ 505 return FF_KNICTL_ACTION_DEFAULT; 506 } else { 507 return FF_KNICTL_ACTION_DEFAULT; 508 } 509 } 510 511 static int 512 init_kni(void) 513 { 514 int nb_ports = rte_eth_dev_count_avail(); 515 kni_accept = 0; 516 if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0) 517 kni_accept = 1; 518 519 knictl_action = get_kni_action(ff_global_cfg.kni.kni_action); 520 521 ff_kni_init(nb_ports, ff_global_cfg.kni.tcp_port, 522 ff_global_cfg.kni.udp_port); 523 524 unsigned socket_id = lcore_conf.socket_id; 525 struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id]; 526 527 nb_ports = ff_global_cfg.dpdk.nb_ports; 528 int i, ret; 529 for (i = 0; i < nb_ports; i++) { 530 uint16_t port_id = ff_global_cfg.dpdk.portid_list[i]; 531 ff_kni_alloc(port_id, socket_id, mbuf_pool, KNI_QUEUE_SIZE); 532 } 533 534 return 0; 535 } 536 #endif 537 538 //RSS reta update will failed when enable flow isolate 539 #ifndef FF_FLOW_ISOLATE 540 static void 541 set_rss_table(uint16_t port_id, uint16_t reta_size, uint16_t nb_queues) 542 { 543 if (reta_size == 0) { 544 return; 545 } 546 547 int reta_conf_size = RTE_MAX(1, reta_size / RTE_RETA_GROUP_SIZE); 548 struct rte_eth_rss_reta_entry64 reta_conf[reta_conf_size]; 549 550 /* config HW indirection table */ 551 unsigned i, j, hash=0; 552 for (i = 0; i < reta_conf_size; i++) { 553 reta_conf[i].mask = ~0ULL; 554 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) { 555 reta_conf[i].reta[j] = hash++ % nb_queues; 556 } 557 } 558 559 if (rte_eth_dev_rss_reta_update(port_id, reta_conf, reta_size)) { 560 rte_exit(EXIT_FAILURE, "port[%d], failed to update rss table\n", 561 port_id); 562 } 563 } 564 #endif 565 566 static int 567 init_port_start(void) 568 { 569 int nb_ports = ff_global_cfg.dpdk.nb_ports; 570 unsigned socketid = 0; 571 struct rte_mempool *mbuf_pool; 572 uint16_t i, j; 573 574 for (i = 0; i < nb_ports; i++) { 575 uint16_t port_id, u_port_id = ff_global_cfg.dpdk.portid_list[i]; 576 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[u_port_id]; 577 uint16_t nb_queues = pconf->nb_lcores; 578 579 for (j=0; j<=pconf->nb_slaves; j++) { 580 if (j < pconf->nb_slaves) { 581 port_id = pconf->slave_portid_list[j]; 582 printf("To init %s's %d'st slave port[%d]\n", 583 ff_global_cfg.dpdk.bond_cfgs->name, 584 j, port_id); 585 } else { 586 port_id = u_port_id; 587 } 588 589 struct rte_eth_dev_info dev_info; 590 struct rte_eth_conf port_conf = {0}; 591 struct rte_eth_rxconf rxq_conf; 592 struct rte_eth_txconf txq_conf; 593 594 int ret = rte_eth_dev_info_get(port_id, &dev_info); 595 if (ret != 0) 596 rte_exit(EXIT_FAILURE, 597 "Error during getting device (port %u) info: %s\n", 598 port_id, strerror(-ret)); 599 600 if (nb_queues > dev_info.max_rx_queues) { 601 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n", 602 nb_queues, 603 dev_info.max_rx_queues); 604 } 605 606 if (nb_queues > dev_info.max_tx_queues) { 607 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n", 608 nb_queues, 609 dev_info.max_tx_queues); 610 } 611 612 struct rte_ether_addr addr; 613 rte_eth_macaddr_get(port_id, &addr); 614 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 615 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 616 (unsigned)port_id, 617 addr.addr_bytes[0], addr.addr_bytes[1], 618 addr.addr_bytes[2], addr.addr_bytes[3], 619 addr.addr_bytes[4], addr.addr_bytes[5]); 620 621 rte_memcpy(pconf->mac, 622 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 623 624 /* Set RSS mode */ 625 uint64_t default_rss_hf = ETH_RSS_PROTO_MASK; 626 port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; 627 port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf; 628 if (dev_info.hash_key_size == 52) { 629 rsskey = default_rsskey_52bytes; 630 rsskey_len = 52; 631 } 632 if (ff_global_cfg.dpdk.symmetric_rss) { 633 printf("Use symmetric Receive-side Scaling(RSS) key\n"); 634 rsskey = symmetric_rsskey; 635 } 636 port_conf.rx_adv_conf.rss_conf.rss_key = rsskey; 637 port_conf.rx_adv_conf.rss_conf.rss_key_len = rsskey_len; 638 port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; 639 if (port_conf.rx_adv_conf.rss_conf.rss_hf != 640 ETH_RSS_PROTO_MASK) { 641 printf("Port %u modified RSS hash function based on hardware support," 642 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 643 port_id, default_rss_hf, 644 port_conf.rx_adv_conf.rss_conf.rss_hf); 645 } 646 647 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) { 648 port_conf.txmode.offloads |= 649 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 650 } 651 652 /* Set Rx VLAN stripping */ 653 if (ff_global_cfg.dpdk.vlan_strip) { 654 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) { 655 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 656 } 657 } 658 659 /* Enable HW CRC stripping */ 660 port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC; 661 662 /* FIXME: Enable TCP LRO ?*/ 663 #if 0 664 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) { 665 printf("LRO is supported\n"); 666 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_LRO; 667 pconf->hw_features.rx_lro = 1; 668 } 669 #endif 670 671 /* Set Rx checksum checking */ 672 if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) && 673 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) && 674 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)) { 675 printf("RX checksum offload supported\n"); 676 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM; 677 pconf->hw_features.rx_csum = 1; 678 } 679 680 if (ff_global_cfg.dpdk.tx_csum_offoad_skip == 0) { 681 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) { 682 printf("TX ip checksum offload supported\n"); 683 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; 684 pconf->hw_features.tx_csum_ip = 1; 685 } 686 687 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) && 688 (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) { 689 printf("TX TCP&UDP checksum offload supported\n"); 690 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM; 691 pconf->hw_features.tx_csum_l4 = 1; 692 } 693 } else { 694 printf("TX checksum offoad is disabled\n"); 695 } 696 697 if (ff_global_cfg.dpdk.tso) { 698 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) { 699 printf("TSO is supported\n"); 700 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; 701 pconf->hw_features.tx_tso = 1; 702 } 703 } else { 704 printf("TSO is disabled\n"); 705 } 706 707 if (dev_info.reta_size) { 708 /* reta size must be power of 2 */ 709 assert((dev_info.reta_size & (dev_info.reta_size - 1)) == 0); 710 711 rss_reta_size[port_id] = dev_info.reta_size; 712 printf("port[%d]: rss table size: %d\n", port_id, 713 dev_info.reta_size); 714 } 715 716 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 717 continue; 718 } 719 720 ret = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf); 721 if (ret != 0) { 722 return ret; 723 } 724 725 static uint16_t nb_rxd = RX_QUEUE_SIZE; 726 static uint16_t nb_txd = TX_QUEUE_SIZE; 727 ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); 728 if (ret < 0) 729 printf("Could not adjust number of descriptors " 730 "for port%u (%d)\n", (unsigned)port_id, ret); 731 732 uint16_t q; 733 for (q = 0; q < nb_queues; q++) { 734 if (numa_on) { 735 uint16_t lcore_id = lcore_conf.port_cfgs[port_id].lcore_list[q]; 736 socketid = rte_lcore_to_socket_id(lcore_id); 737 } 738 mbuf_pool = pktmbuf_pool[socketid]; 739 740 txq_conf = dev_info.default_txconf; 741 txq_conf.offloads = port_conf.txmode.offloads; 742 ret = rte_eth_tx_queue_setup(port_id, q, nb_txd, 743 socketid, &txq_conf); 744 if (ret < 0) { 745 return ret; 746 } 747 748 rxq_conf = dev_info.default_rxconf; 749 rxq_conf.offloads = port_conf.rxmode.offloads; 750 ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd, 751 socketid, &rxq_conf, mbuf_pool); 752 if (ret < 0) { 753 return ret; 754 } 755 } 756 757 758 if (strncmp(dev_info.driver_name, BOND_DRIVER_NAME, 759 strlen(dev_info.driver_name)) == 0) { 760 761 rte_eth_macaddr_get(port_id, &addr); 762 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 763 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 764 (unsigned)port_id, 765 addr.addr_bytes[0], addr.addr_bytes[1], 766 addr.addr_bytes[2], addr.addr_bytes[3], 767 addr.addr_bytes[4], addr.addr_bytes[5]); 768 769 rte_memcpy(pconf->mac, 770 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 771 772 int mode, count, x; 773 uint16_t slaves[RTE_MAX_ETHPORTS], len = RTE_MAX_ETHPORTS; 774 775 mode = rte_eth_bond_mode_get(port_id); 776 printf("Port %u, bond mode:%d\n", port_id, mode); 777 778 count = rte_eth_bond_slaves_get(port_id, slaves, len); 779 printf("Port %u, %s's slave ports count:%d\n", port_id, 780 ff_global_cfg.dpdk.bond_cfgs->name, count); 781 for (x=0; x<count; x++) { 782 printf("Port %u, %s's slave port[%u]\n", port_id, 783 ff_global_cfg.dpdk.bond_cfgs->name, slaves[x]); 784 } 785 } 786 787 ret = rte_eth_dev_start(port_id); 788 if (ret < 0) { 789 return ret; 790 } 791 //RSS reta update will failed when enable flow isolate 792 #ifndef FF_FLOW_ISOLATE 793 if (nb_queues > 1) { 794 /* 795 * FIXME: modify RSS set to FDIR 796 */ 797 set_rss_table(port_id, dev_info.reta_size, nb_queues); 798 } 799 #endif 800 801 /* Enable RX in promiscuous mode for the Ethernet device. */ 802 if (ff_global_cfg.dpdk.promiscuous) { 803 ret = rte_eth_promiscuous_enable(port_id); 804 if (ret == 0) { 805 printf("set port %u to promiscuous mode ok\n", port_id); 806 } else { 807 printf("set port %u to promiscuous mode error\n", port_id); 808 } 809 } 810 } 811 } 812 813 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 814 check_all_ports_link_status(); 815 } 816 817 return 0; 818 } 819 820 static int 821 init_clock(void) 822 { 823 rte_timer_subsystem_init(); 824 uint64_t hz = rte_get_timer_hz(); 825 uint64_t intrs = MS_PER_S/ff_global_cfg.freebsd.hz; 826 uint64_t tsc = (hz + MS_PER_S - 1) / MS_PER_S*intrs; 827 828 rte_timer_init(&freebsd_clock); 829 rte_timer_reset(&freebsd_clock, tsc, PERIODICAL, 830 rte_lcore_id(), &ff_hardclock_job, NULL); 831 832 ff_update_current_ts(); 833 834 return 0; 835 } 836 837 #ifdef FF_FLOW_ISOLATE 838 /** Print a message out of a flow error. */ 839 static int 840 port_flow_complain(struct rte_flow_error *error) 841 { 842 static const char *const errstrlist[] = { 843 [RTE_FLOW_ERROR_TYPE_NONE] = "no error", 844 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", 845 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", 846 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", 847 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", 848 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", 849 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", 850 [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field", 851 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", 852 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", 853 [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification", 854 [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range", 855 [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask", 856 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", 857 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", 858 [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration", 859 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", 860 }; 861 const char *errstr; 862 char buf[32]; 863 int err = rte_errno; 864 865 if ((unsigned int)error->type >= RTE_DIM(errstrlist) || 866 !errstrlist[error->type]) 867 errstr = "unknown type"; 868 else 869 errstr = errstrlist[error->type]; 870 printf("Caught error type %d (%s): %s%s: %s\n", 871 error->type, errstr, 872 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", 873 error->cause), buf) : "", 874 error->message ? error->message : "(no stated reason)", 875 rte_strerror(err)); 876 return -err; 877 } 878 879 static int 880 port_flow_isolate(uint16_t port_id, int set) 881 { 882 struct rte_flow_error error; 883 884 /* Poisoning to make sure PMDs update it in case of error. */ 885 memset(&error, 0x66, sizeof(error)); 886 if (rte_flow_isolate(port_id, set, &error)) 887 return port_flow_complain(&error); 888 printf("Ingress traffic on port %u is %s to the defined flow rules\n", 889 port_id, 890 set ? "now restricted" : "not restricted anymore"); 891 return 0; 892 } 893 894 static int 895 create_tcp_flow(uint16_t port_id, uint16_t tcp_port) { 896 struct rte_flow_attr attr = {.ingress = 1}; 897 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 898 int nb_queues = pconf->nb_lcores; 899 uint16_t queue[RTE_MAX_QUEUES_PER_PORT]; 900 int i = 0, j = 0; 901 for (i = 0, j = 0; i < nb_queues; ++i) 902 queue[j++] = i; 903 struct rte_flow_action_rss rss = { 904 .types = ETH_RSS_NONFRAG_IPV4_TCP, 905 .key_len = rsskey_len, 906 .key = rsskey, 907 .queue_num = j, 908 .queue = queue, 909 }; 910 911 struct rte_eth_dev_info dev_info; 912 int ret = rte_eth_dev_info_get(port_id, &dev_info); 913 if (ret != 0) 914 rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n", port_id, strerror(-ret)); 915 916 struct rte_flow_item pattern[3]; 917 struct rte_flow_action action[2]; 918 struct rte_flow_item_tcp tcp_spec; 919 struct rte_flow_item_tcp tcp_mask = { 920 .hdr = { 921 .src_port = RTE_BE16(0x0000), 922 .dst_port = RTE_BE16(0xffff), 923 }, 924 }; 925 struct rte_flow_error error; 926 927 memset(pattern, 0, sizeof(pattern)); 928 memset(action, 0, sizeof(action)); 929 930 /* set the dst ipv4 packet to the required value */ 931 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 932 933 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 934 tcp_spec.hdr.dst_port = rte_cpu_to_be_16(tcp_port); 935 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 936 pattern[1].spec = &tcp_spec; 937 pattern[1].mask = &tcp_mask; 938 939 /* end the pattern array */ 940 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 941 942 /* create the action */ 943 action[0].type = RTE_FLOW_ACTION_TYPE_RSS; 944 action[0].conf = &rss; 945 action[1].type = RTE_FLOW_ACTION_TYPE_END; 946 947 struct rte_flow *flow; 948 /* validate and create the flow rule */ 949 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 950 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 951 if (!flow) { 952 return port_flow_complain(&error); 953 } 954 } 955 956 memset(pattern, 0, sizeof(pattern)); 957 958 /* set the dst ipv4 packet to the required value */ 959 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 960 961 struct rte_flow_item_tcp tcp_src_mask = { 962 .hdr = { 963 .src_port = RTE_BE16(0xffff), 964 .dst_port = RTE_BE16(0x0000), 965 }, 966 }; 967 968 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 969 tcp_spec.hdr.src_port = rte_cpu_to_be_16(tcp_port); 970 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 971 pattern[1].spec = &tcp_spec; 972 pattern[1].mask = &tcp_src_mask; 973 974 /* end the pattern array */ 975 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 976 977 /* validate and create the flow rule */ 978 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 979 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 980 if (!flow) { 981 return port_flow_complain(&error); 982 } 983 } 984 985 return 1; 986 } 987 988 static int 989 init_flow(uint16_t port_id, uint16_t tcp_port) { 990 // struct ff_flow_cfg fcfg = ff_global_cfg.dpdk.flow_cfgs[0]; 991 992 // int i; 993 // for (i = 0; i < fcfg.nb_port; i++) { 994 // if(!create_tcp_flow(fcfg.port_id, fcfg.tcp_ports[i])) { 995 // return 0; 996 // } 997 // } 998 999 if(!create_tcp_flow(port_id, tcp_port)) { 1000 rte_exit(EXIT_FAILURE, "create tcp flow failed\n"); 1001 return -1; 1002 } 1003 1004 /* ARP rule */ 1005 struct rte_flow_attr attr = {.ingress = 1}; 1006 struct rte_flow_action_queue queue = {.index = 0}; 1007 1008 struct rte_flow_item pattern_[2]; 1009 struct rte_flow_action action[2]; 1010 struct rte_flow_item_eth eth_type = {.type = RTE_BE16(0x0806)}; 1011 struct rte_flow_item_eth eth_mask = { 1012 .type = RTE_BE16(0xffff) 1013 }; 1014 1015 memset(pattern_, 0, sizeof(pattern_)); 1016 memset(action, 0, sizeof(action)); 1017 1018 pattern_[0].type = RTE_FLOW_ITEM_TYPE_ETH; 1019 pattern_[0].spec = ð_type; 1020 pattern_[0].mask = ð_mask; 1021 1022 pattern_[1].type = RTE_FLOW_ITEM_TYPE_END; 1023 1024 /* create the action */ 1025 action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; 1026 action[0].conf = &queue; 1027 action[1].type = RTE_FLOW_ACTION_TYPE_END; 1028 1029 struct rte_flow *flow; 1030 struct rte_flow_error error; 1031 /* validate and create the flow rule */ 1032 if (!rte_flow_validate(port_id, &attr, pattern_, action, &error)) { 1033 flow = rte_flow_create(port_id, &attr, pattern_, action, &error); 1034 if (!flow) { 1035 return port_flow_complain(&error); 1036 } 1037 } 1038 1039 return 1; 1040 } 1041 1042 #endif 1043 1044 int 1045 ff_dpdk_init(int argc, char **argv) 1046 { 1047 if (ff_global_cfg.dpdk.nb_procs < 1 || 1048 ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE || 1049 ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs || 1050 ff_global_cfg.dpdk.proc_id < 0) { 1051 printf("param num_procs[%d] or proc_id[%d] error!\n", 1052 ff_global_cfg.dpdk.nb_procs, 1053 ff_global_cfg.dpdk.proc_id); 1054 exit(1); 1055 } 1056 1057 int ret = rte_eal_init(argc, argv); 1058 if (ret < 0) { 1059 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1060 } 1061 1062 numa_on = ff_global_cfg.dpdk.numa_on; 1063 1064 idle_sleep = ff_global_cfg.dpdk.idle_sleep; 1065 pkt_tx_delay = ff_global_cfg.dpdk.pkt_tx_delay > BURST_TX_DRAIN_US ? \ 1066 BURST_TX_DRAIN_US : ff_global_cfg.dpdk.pkt_tx_delay; 1067 1068 init_lcore_conf(); 1069 1070 init_mem_pool(); 1071 1072 init_dispatch_ring(); 1073 1074 init_msg_ring(); 1075 1076 #ifdef FF_KNI 1077 enable_kni = ff_global_cfg.kni.enable; 1078 if (enable_kni) { 1079 init_kni(); 1080 } 1081 #endif 1082 1083 #ifdef FF_USE_PAGE_ARRAY 1084 ff_mmap_init(); 1085 #endif 1086 1087 #ifdef FF_FLOW_ISOLATE 1088 // run once in primary process 1089 if (0 == lcore_conf.tx_queue_id[0]){ 1090 ret = port_flow_isolate(0, 1); 1091 if (ret < 0) 1092 rte_exit(EXIT_FAILURE, "init_port_isolate failed\n"); 1093 } 1094 #endif 1095 1096 ret = init_port_start(); 1097 if (ret < 0) { 1098 rte_exit(EXIT_FAILURE, "init_port_start failed\n"); 1099 } 1100 1101 init_clock(); 1102 #ifdef FF_FLOW_ISOLATE 1103 //Only give a example usage: port_id=0, tcp_port= 80. 1104 //Recommend: 1105 //1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id). 1106 //2. using config options `tcp_port` replace magic number of 80 1107 ret = init_flow(0, 80); 1108 if (ret < 0) { 1109 rte_exit(EXIT_FAILURE, "init_port_flow failed\n"); 1110 } 1111 #endif 1112 return 0; 1113 } 1114 1115 static void 1116 ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt) 1117 { 1118 uint8_t rx_csum = ctx->hw_features.rx_csum; 1119 if (rx_csum) { 1120 if (pkt->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) { 1121 rte_pktmbuf_free(pkt); 1122 return; 1123 } 1124 } 1125 1126 void *data = rte_pktmbuf_mtod(pkt, void*); 1127 uint16_t len = rte_pktmbuf_data_len(pkt); 1128 1129 void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum); 1130 if (hdr == NULL) { 1131 rte_pktmbuf_free(pkt); 1132 return; 1133 } 1134 1135 if (pkt->ol_flags & PKT_RX_VLAN_STRIPPED) { 1136 ff_mbuf_set_vlan_info(hdr, pkt->vlan_tci); 1137 } 1138 1139 struct rte_mbuf *pn = pkt->next; 1140 void *prev = hdr; 1141 while(pn != NULL) { 1142 data = rte_pktmbuf_mtod(pn, void*); 1143 len = rte_pktmbuf_data_len(pn); 1144 1145 void *mb = ff_mbuf_get(prev, pn, data, len); 1146 if (mb == NULL) { 1147 ff_mbuf_free(hdr); 1148 rte_pktmbuf_free(pkt); 1149 return; 1150 } 1151 pn = pn->next; 1152 prev = mb; 1153 } 1154 1155 ff_veth_process_packet(ctx->ifp, hdr); 1156 } 1157 1158 static enum FilterReturn 1159 protocol_filter(const void *data, uint16_t len) 1160 { 1161 if(len < RTE_ETHER_ADDR_LEN) 1162 return FILTER_UNKNOWN; 1163 1164 const struct rte_ether_hdr *hdr; 1165 const struct rte_vlan_hdr *vlanhdr; 1166 hdr = (const struct rte_ether_hdr *)data; 1167 uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); 1168 data += RTE_ETHER_HDR_LEN; 1169 len -= RTE_ETHER_HDR_LEN; 1170 1171 if (ether_type == RTE_ETHER_TYPE_VLAN) { 1172 vlanhdr = (struct rte_vlan_hdr *)data; 1173 ether_type = rte_be_to_cpu_16(vlanhdr->eth_proto); 1174 data += sizeof(struct rte_vlan_hdr); 1175 len -= sizeof(struct rte_vlan_hdr); 1176 } 1177 1178 if(ether_type == RTE_ETHER_TYPE_ARP) 1179 return FILTER_ARP; 1180 1181 #ifdef INET6 1182 if (ether_type == RTE_ETHER_TYPE_IPV6) { 1183 return ff_kni_proto_filter(data, 1184 len, ether_type); 1185 } 1186 #endif 1187 1188 #ifndef FF_KNI 1189 return FILTER_UNKNOWN; 1190 #else 1191 if (!enable_kni) { 1192 return FILTER_UNKNOWN; 1193 } 1194 1195 if(ether_type != RTE_ETHER_TYPE_IPV4) 1196 return FILTER_UNKNOWN; 1197 1198 return ff_kni_proto_filter(data, 1199 len, ether_type); 1200 #endif 1201 } 1202 1203 static inline void 1204 pktmbuf_deep_attach(struct rte_mbuf *mi, const struct rte_mbuf *m) 1205 { 1206 struct rte_mbuf *md; 1207 void *src, *dst; 1208 1209 dst = rte_pktmbuf_mtod(mi, void *); 1210 src = rte_pktmbuf_mtod(m, void *); 1211 1212 mi->data_len = m->data_len; 1213 rte_memcpy(dst, src, m->data_len); 1214 1215 mi->port = m->port; 1216 mi->vlan_tci = m->vlan_tci; 1217 mi->vlan_tci_outer = m->vlan_tci_outer; 1218 mi->tx_offload = m->tx_offload; 1219 mi->hash = m->hash; 1220 mi->ol_flags = m->ol_flags; 1221 mi->packet_type = m->packet_type; 1222 } 1223 1224 /* copied from rte_pktmbuf_clone */ 1225 static inline struct rte_mbuf * 1226 pktmbuf_deep_clone(const struct rte_mbuf *md, 1227 struct rte_mempool *mp) 1228 { 1229 struct rte_mbuf *mc, *mi, **prev; 1230 uint32_t pktlen; 1231 uint8_t nseg; 1232 1233 if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL)) 1234 return NULL; 1235 1236 mi = mc; 1237 prev = &mi->next; 1238 pktlen = md->pkt_len; 1239 nseg = 0; 1240 1241 do { 1242 nseg++; 1243 pktmbuf_deep_attach(mi, md); 1244 *prev = mi; 1245 prev = &mi->next; 1246 } while ((md = md->next) != NULL && 1247 (mi = rte_pktmbuf_alloc(mp)) != NULL); 1248 1249 *prev = NULL; 1250 mc->nb_segs = nseg; 1251 mc->pkt_len = pktlen; 1252 1253 /* Allocation of new indirect segment failed */ 1254 if (unlikely (mi == NULL)) { 1255 rte_pktmbuf_free(mc); 1256 return NULL; 1257 } 1258 1259 __rte_mbuf_sanity_check(mc, 1); 1260 return mc; 1261 } 1262 1263 static inline void 1264 process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs, 1265 uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring) 1266 { 1267 struct lcore_conf *qconf = &lcore_conf; 1268 uint16_t nb_queues = qconf->nb_queue_list[port_id]; 1269 1270 uint16_t i; 1271 for (i = 0; i < count; i++) { 1272 struct rte_mbuf *rtem = bufs[i]; 1273 1274 if (unlikely( ff_global_cfg.pcap.enable)) { 1275 if (!pkts_from_ring) { 1276 ff_dump_packets( ff_global_cfg.pcap.save_path, rtem, ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1277 } 1278 } 1279 1280 void *data = rte_pktmbuf_mtod(rtem, void*); 1281 uint16_t len = rte_pktmbuf_data_len(rtem); 1282 1283 if (!pkts_from_ring) { 1284 ff_traffic.rx_packets++; 1285 ff_traffic.rx_bytes += len; 1286 } 1287 1288 if (!pkts_from_ring && packet_dispatcher) { 1289 int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues); 1290 if (ret == FF_DISPATCH_RESPONSE) { 1291 rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len; 1292 1293 /* 1294 * We have not support vlan out strip 1295 */ 1296 if (rtem->vlan_tci) { 1297 data = rte_pktmbuf_prepend(rtem, sizeof(struct rte_vlan_hdr)); 1298 if (data != NULL) { 1299 memmove(data, data + sizeof(struct rte_vlan_hdr), RTE_ETHER_HDR_LEN); 1300 struct rte_ether_hdr *etherhdr = (struct rte_ether_hdr *)data; 1301 struct rte_vlan_hdr *vlanhdr = (struct rte_vlan_hdr *)(data + RTE_ETHER_HDR_LEN); 1302 vlanhdr->vlan_tci = rte_cpu_to_be_16(rtem->vlan_tci); 1303 vlanhdr->eth_proto = etherhdr->ether_type; 1304 etherhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN); 1305 } 1306 } 1307 send_single_packet(rtem, port_id); 1308 continue; 1309 } 1310 1311 if (ret == FF_DISPATCH_ERROR || ret >= nb_queues) { 1312 rte_pktmbuf_free(rtem); 1313 continue; 1314 } 1315 1316 if (ret != queue_id) { 1317 ret = rte_ring_enqueue(dispatch_ring[port_id][ret], rtem); 1318 if (ret < 0) 1319 rte_pktmbuf_free(rtem); 1320 1321 continue; 1322 } 1323 } 1324 1325 enum FilterReturn filter = protocol_filter(data, len); 1326 #ifdef INET6 1327 if (filter == FILTER_ARP || filter == FILTER_NDP) { 1328 #else 1329 if (filter == FILTER_ARP) { 1330 #endif 1331 struct rte_mempool *mbuf_pool; 1332 struct rte_mbuf *mbuf_clone; 1333 if (!pkts_from_ring) { 1334 uint16_t j; 1335 for(j = 0; j < nb_queues; ++j) { 1336 if(j == queue_id) 1337 continue; 1338 1339 unsigned socket_id = 0; 1340 if (numa_on) { 1341 uint16_t lcore_id = qconf->port_cfgs[port_id].lcore_list[j]; 1342 socket_id = rte_lcore_to_socket_id(lcore_id); 1343 } 1344 mbuf_pool = pktmbuf_pool[socket_id]; 1345 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1346 if(mbuf_clone) { 1347 int ret = rte_ring_enqueue(dispatch_ring[port_id][j], 1348 mbuf_clone); 1349 if (ret < 0) 1350 rte_pktmbuf_free(mbuf_clone); 1351 } 1352 } 1353 } 1354 1355 #ifdef FF_KNI 1356 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1357 mbuf_pool = pktmbuf_pool[qconf->socket_id]; 1358 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1359 if(mbuf_clone) { 1360 ff_kni_enqueue(port_id, mbuf_clone); 1361 } 1362 } 1363 #endif 1364 ff_veth_input(ctx, rtem); 1365 #ifdef FF_KNI 1366 } else if (enable_kni) { 1367 if (knictl_action == FF_KNICTL_ACTION_ALL_TO_KNI){ 1368 ff_kni_enqueue(port_id, rtem); 1369 } else if (knictl_action == FF_KNICTL_ACTION_ALL_TO_FF){ 1370 ff_veth_input(ctx, rtem); 1371 } else if (knictl_action == FF_KNICTL_ACTION_DEFAULT){ 1372 if (enable_kni && 1373 ((filter == FILTER_KNI && kni_accept) || 1374 (filter == FILTER_UNKNOWN && !kni_accept)) ) { 1375 ff_kni_enqueue(port_id, rtem); 1376 } else { 1377 ff_veth_input(ctx, rtem); 1378 } 1379 } else { 1380 ff_veth_input(ctx, rtem); 1381 } 1382 #endif 1383 } else { 1384 ff_veth_input(ctx, rtem); 1385 } 1386 } 1387 } 1388 1389 static inline int 1390 process_dispatch_ring(uint16_t port_id, uint16_t queue_id, 1391 struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx) 1392 { 1393 /* read packet from ring buf and to process */ 1394 uint16_t nb_rb; 1395 nb_rb = rte_ring_dequeue_burst(dispatch_ring[port_id][queue_id], 1396 (void **)pkts_burst, MAX_PKT_BURST, NULL); 1397 1398 if(nb_rb > 0) { 1399 process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1); 1400 } 1401 1402 return 0; 1403 } 1404 1405 static inline void 1406 handle_sysctl_msg(struct ff_msg *msg) 1407 { 1408 int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen, 1409 msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new, 1410 msg->sysctl.newlen); 1411 1412 if (ret < 0) { 1413 msg->result = errno; 1414 } else { 1415 msg->result = 0; 1416 } 1417 } 1418 1419 static inline void 1420 handle_ioctl_msg(struct ff_msg *msg) 1421 { 1422 int fd, ret; 1423 #ifdef INET6 1424 if (msg->msg_type == FF_IOCTL6) { 1425 fd = ff_socket(AF_INET6, SOCK_DGRAM, 0); 1426 } else 1427 #endif 1428 fd = ff_socket(AF_INET, SOCK_DGRAM, 0); 1429 1430 if (fd < 0) { 1431 ret = -1; 1432 goto done; 1433 } 1434 1435 ret = ff_ioctl_freebsd(fd, msg->ioctl.cmd, msg->ioctl.data); 1436 1437 ff_close(fd); 1438 1439 done: 1440 if (ret < 0) { 1441 msg->result = errno; 1442 } else { 1443 msg->result = 0; 1444 } 1445 } 1446 1447 static inline void 1448 handle_route_msg(struct ff_msg *msg) 1449 { 1450 int ret = ff_rtioctl(msg->route.fib, msg->route.data, 1451 &msg->route.len, msg->route.maxlen); 1452 if (ret < 0) { 1453 msg->result = errno; 1454 } else { 1455 msg->result = 0; 1456 } 1457 } 1458 1459 static inline void 1460 handle_top_msg(struct ff_msg *msg) 1461 { 1462 msg->top = ff_top_status; 1463 msg->result = 0; 1464 } 1465 1466 #ifdef FF_NETGRAPH 1467 static inline void 1468 handle_ngctl_msg(struct ff_msg *msg) 1469 { 1470 int ret = ff_ngctl(msg->ngctl.cmd, msg->ngctl.data); 1471 if (ret < 0) { 1472 msg->result = errno; 1473 } else { 1474 msg->result = 0; 1475 msg->ngctl.ret = ret; 1476 } 1477 } 1478 #endif 1479 1480 #ifdef FF_IPFW 1481 static inline void 1482 handle_ipfw_msg(struct ff_msg *msg) 1483 { 1484 int fd, ret; 1485 fd = ff_socket(AF_INET, SOCK_RAW, IPPROTO_RAW); 1486 if (fd < 0) { 1487 ret = -1; 1488 goto done; 1489 } 1490 1491 switch (msg->ipfw.cmd) { 1492 case FF_IPFW_GET: 1493 ret = ff_getsockopt_freebsd(fd, msg->ipfw.level, 1494 msg->ipfw.optname, msg->ipfw.optval, 1495 msg->ipfw.optlen); 1496 break; 1497 case FF_IPFW_SET: 1498 ret = ff_setsockopt_freebsd(fd, msg->ipfw.level, 1499 msg->ipfw.optname, msg->ipfw.optval, 1500 *(msg->ipfw.optlen)); 1501 break; 1502 default: 1503 ret = -1; 1504 errno = ENOTSUP; 1505 break; 1506 } 1507 1508 ff_close(fd); 1509 1510 done: 1511 if (ret < 0) { 1512 msg->result = errno; 1513 } else { 1514 msg->result = 0; 1515 } 1516 } 1517 #endif 1518 1519 static inline void 1520 handle_traffic_msg(struct ff_msg *msg) 1521 { 1522 msg->traffic = ff_traffic; 1523 msg->result = 0; 1524 } 1525 1526 #ifdef FF_KNI 1527 static inline void 1528 handle_knictl_msg(struct ff_msg *msg) 1529 { 1530 if (msg->knictl.kni_cmd == FF_KNICTL_CMD_SET){ 1531 switch (msg->knictl.kni_action){ 1532 case FF_KNICTL_ACTION_ALL_TO_FF: knictl_action = FF_KNICTL_ACTION_ALL_TO_FF; msg->result = 0; printf("new kni action: alltoff\n"); break; 1533 case FF_KNICTL_ACTION_ALL_TO_KNI: knictl_action = FF_KNICTL_ACTION_ALL_TO_KNI; msg->result = 0; printf("new kni action: alltokni\n"); break; 1534 case FF_KNICTL_ACTION_DEFAULT: knictl_action = FF_KNICTL_ACTION_DEFAULT; msg->result = 0; printf("new kni action: default\n"); break; 1535 default: msg->result = -1; 1536 } 1537 } 1538 else if (msg->knictl.kni_cmd == FF_KNICTL_CMD_GET){ 1539 msg->knictl.kni_action = knictl_action; 1540 } else { 1541 msg->result = -2; 1542 } 1543 } 1544 #endif 1545 1546 static inline void 1547 handle_default_msg(struct ff_msg *msg) 1548 { 1549 msg->result = ENOTSUP; 1550 } 1551 1552 static inline void 1553 handle_msg(struct ff_msg *msg, uint16_t proc_id) 1554 { 1555 switch (msg->msg_type) { 1556 case FF_SYSCTL: 1557 handle_sysctl_msg(msg); 1558 break; 1559 case FF_IOCTL: 1560 #ifdef INET6 1561 case FF_IOCTL6: 1562 #endif 1563 handle_ioctl_msg(msg); 1564 break; 1565 case FF_ROUTE: 1566 handle_route_msg(msg); 1567 break; 1568 case FF_TOP: 1569 handle_top_msg(msg); 1570 break; 1571 #ifdef FF_NETGRAPH 1572 case FF_NGCTL: 1573 handle_ngctl_msg(msg); 1574 break; 1575 #endif 1576 #ifdef FF_IPFW 1577 case FF_IPFW_CTL: 1578 handle_ipfw_msg(msg); 1579 break; 1580 #endif 1581 case FF_TRAFFIC: 1582 handle_traffic_msg(msg); 1583 break; 1584 #ifdef FF_KNI 1585 case FF_KNICTL: 1586 handle_knictl_msg(msg); 1587 break; 1588 #endif 1589 default: 1590 handle_default_msg(msg); 1591 break; 1592 } 1593 if (rte_ring_enqueue(msg_ring[proc_id].ring[msg->msg_type], msg) < 0) { 1594 if (msg->original_buf) { 1595 rte_free(msg->buf_addr); 1596 msg->buf_addr = msg->original_buf; 1597 msg->buf_len = msg->original_buf_len; 1598 msg->original_buf = NULL; 1599 } 1600 1601 rte_mempool_put(message_pool, msg); 1602 } 1603 } 1604 1605 static inline int 1606 process_msg_ring(uint16_t proc_id, struct rte_mbuf **pkts_burst) 1607 { 1608 /* read msg from ring buf and to process */ 1609 uint16_t nb_rb; 1610 int i; 1611 1612 nb_rb = rte_ring_dequeue_burst(msg_ring[proc_id].ring[0], 1613 (void **)pkts_burst, MAX_PKT_BURST, NULL); 1614 1615 if (likely(nb_rb == 0)) 1616 return 0; 1617 1618 for (i = 0; i < nb_rb; ++i) { 1619 handle_msg((struct ff_msg *)pkts_burst[i], proc_id); 1620 } 1621 1622 return 0; 1623 } 1624 1625 /* Send burst of packets on an output interface */ 1626 static inline int 1627 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) 1628 { 1629 struct rte_mbuf **m_table; 1630 int ret; 1631 uint16_t queueid; 1632 1633 queueid = qconf->tx_queue_id[port]; 1634 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 1635 1636 if (unlikely(ff_global_cfg.pcap.enable)) { 1637 uint16_t i; 1638 for (i = 0; i < n; i++) { 1639 ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i], 1640 ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1641 } 1642 } 1643 1644 ret = rte_eth_tx_burst(port, queueid, m_table, n); 1645 ff_traffic.tx_packets += ret; 1646 uint16_t i; 1647 for (i = 0; i < ret; i++) { 1648 ff_traffic.tx_bytes += rte_pktmbuf_pkt_len(m_table[i]); 1649 #ifdef FF_USE_PAGE_ARRAY 1650 if (qconf->tx_mbufs[port].bsd_m_table[i]) 1651 ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs); 1652 #endif 1653 } 1654 if (unlikely(ret < n)) { 1655 do { 1656 rte_pktmbuf_free(m_table[ret]); 1657 #ifdef FF_USE_PAGE_ARRAY 1658 if ( qconf->tx_mbufs[port].bsd_m_table[ret] ) 1659 ff_mbuf_free(qconf->tx_mbufs[port].bsd_m_table[ret]); 1660 #endif 1661 } while (++ret < n); 1662 } 1663 return 0; 1664 } 1665 1666 /* Enqueue a single packet, and send burst if queue is filled */ 1667 static inline int 1668 send_single_packet(struct rte_mbuf *m, uint8_t port) 1669 { 1670 uint16_t len; 1671 struct lcore_conf *qconf; 1672 1673 qconf = &lcore_conf; 1674 len = qconf->tx_mbufs[port].len; 1675 qconf->tx_mbufs[port].m_table[len] = m; 1676 len++; 1677 1678 /* enough pkts to be sent */ 1679 if (unlikely(len == MAX_PKT_BURST)) { 1680 send_burst(qconf, MAX_PKT_BURST, port); 1681 len = 0; 1682 } 1683 1684 qconf->tx_mbufs[port].len = len; 1685 return 0; 1686 } 1687 1688 int 1689 ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m, 1690 int total) 1691 { 1692 #ifdef FF_USE_PAGE_ARRAY 1693 struct lcore_conf *qconf = &lcore_conf; 1694 int len = 0; 1695 1696 len = ff_if_send_onepkt(ctx, m,total); 1697 if (unlikely(len == MAX_PKT_BURST)) { 1698 send_burst(qconf, MAX_PKT_BURST, ctx->port_id); 1699 len = 0; 1700 } 1701 qconf->tx_mbufs[ctx->port_id].len = len; 1702 return 0; 1703 #endif 1704 struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id]; 1705 struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool); 1706 if (head == NULL) { 1707 ff_mbuf_free(m); 1708 return -1; 1709 } 1710 1711 head->pkt_len = total; 1712 head->nb_segs = 0; 1713 1714 int off = 0; 1715 struct rte_mbuf *cur = head, *prev = NULL; 1716 while(total > 0) { 1717 if (cur == NULL) { 1718 cur = rte_pktmbuf_alloc(mbuf_pool); 1719 if (cur == NULL) { 1720 rte_pktmbuf_free(head); 1721 ff_mbuf_free(m); 1722 return -1; 1723 } 1724 } 1725 1726 if (prev != NULL) { 1727 prev->next = cur; 1728 } 1729 head->nb_segs++; 1730 1731 prev = cur; 1732 void *data = rte_pktmbuf_mtod(cur, void*); 1733 int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total; 1734 int ret = ff_mbuf_copydata(m, data, off, len); 1735 if (ret < 0) { 1736 rte_pktmbuf_free(head); 1737 ff_mbuf_free(m); 1738 return -1; 1739 } 1740 1741 1742 cur->data_len = len; 1743 off += len; 1744 total -= len; 1745 cur = NULL; 1746 } 1747 1748 struct ff_tx_offload offload = {0}; 1749 ff_mbuf_tx_offload(m, &offload); 1750 1751 void *data = rte_pktmbuf_mtod(head, void*); 1752 1753 if (offload.ip_csum) { 1754 /* ipv6 not supported yet */ 1755 struct rte_ipv4_hdr *iph; 1756 int iph_len; 1757 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1758 iph_len = (iph->version_ihl & 0x0f) << 2; 1759 1760 head->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; 1761 head->l2_len = RTE_ETHER_HDR_LEN; 1762 head->l3_len = iph_len; 1763 } 1764 1765 if (ctx->hw_features.tx_csum_l4) { 1766 struct rte_ipv4_hdr *iph; 1767 int iph_len; 1768 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1769 iph_len = (iph->version_ihl & 0x0f) << 2; 1770 1771 if (offload.tcp_csum) { 1772 head->ol_flags |= PKT_TX_TCP_CKSUM; 1773 head->l2_len = RTE_ETHER_HDR_LEN; 1774 head->l3_len = iph_len; 1775 } 1776 1777 /* 1778 * TCP segmentation offload. 1779 * 1780 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag 1781 * implies PKT_TX_TCP_CKSUM) 1782 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 1783 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and 1784 * write the IP checksum to 0 in the packet 1785 * - fill the mbuf offload information: l2_len, 1786 * l3_len, l4_len, tso_segsz 1787 * - calculate the pseudo header checksum without taking ip_len 1788 * in account, and set it in the TCP header. Refer to 1789 * rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum() that can be 1790 * used as helpers. 1791 */ 1792 if (offload.tso_seg_size) { 1793 struct rte_tcp_hdr *tcph; 1794 int tcph_len; 1795 tcph = (struct rte_tcp_hdr *)((char *)iph + iph_len); 1796 tcph_len = (tcph->data_off & 0xf0) >> 2; 1797 tcph->cksum = rte_ipv4_phdr_cksum(iph, PKT_TX_TCP_SEG); 1798 1799 head->ol_flags |= PKT_TX_TCP_SEG; 1800 head->l4_len = tcph_len; 1801 head->tso_segsz = offload.tso_seg_size; 1802 } 1803 1804 if (offload.udp_csum) { 1805 head->ol_flags |= PKT_TX_UDP_CKSUM; 1806 head->l2_len = RTE_ETHER_HDR_LEN; 1807 head->l3_len = iph_len; 1808 } 1809 } 1810 1811 ff_mbuf_free(m); 1812 1813 return send_single_packet(head, ctx->port_id); 1814 } 1815 1816 static int 1817 main_loop(void *arg) 1818 { 1819 struct loop_routine *lr = (struct loop_routine *)arg; 1820 1821 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1822 uint64_t prev_tsc, diff_tsc, cur_tsc, usch_tsc, div_tsc, usr_tsc, sys_tsc, end_tsc, idle_sleep_tsc; 1823 int i, j, nb_rx, idle; 1824 uint16_t port_id, queue_id; 1825 struct lcore_conf *qconf; 1826 uint64_t drain_tsc = 0; 1827 struct ff_dpdk_if_context *ctx; 1828 1829 if (pkt_tx_delay) { 1830 drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * pkt_tx_delay; 1831 } 1832 1833 prev_tsc = 0; 1834 usch_tsc = 0; 1835 1836 qconf = &lcore_conf; 1837 1838 while (1) { 1839 cur_tsc = rte_rdtsc(); 1840 if (unlikely(freebsd_clock.expire < cur_tsc)) { 1841 rte_timer_manage(); 1842 } 1843 1844 idle = 1; 1845 sys_tsc = 0; 1846 usr_tsc = 0; 1847 1848 /* 1849 * TX burst queue drain 1850 */ 1851 diff_tsc = cur_tsc - prev_tsc; 1852 if (unlikely(diff_tsc >= drain_tsc)) { 1853 for (i = 0; i < qconf->nb_tx_port; i++) { 1854 port_id = qconf->tx_port_id[i]; 1855 if (qconf->tx_mbufs[port_id].len == 0) 1856 continue; 1857 1858 idle = 0; 1859 1860 send_burst(qconf, 1861 qconf->tx_mbufs[port_id].len, 1862 port_id); 1863 qconf->tx_mbufs[port_id].len = 0; 1864 } 1865 1866 prev_tsc = cur_tsc; 1867 } 1868 1869 /* 1870 * Read packet from RX queues 1871 */ 1872 for (i = 0; i < qconf->nb_rx_queue; ++i) { 1873 port_id = qconf->rx_queue_list[i].port_id; 1874 queue_id = qconf->rx_queue_list[i].queue_id; 1875 ctx = veth_ctx[port_id]; 1876 1877 #ifdef FF_KNI 1878 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1879 ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST); 1880 } 1881 #endif 1882 1883 process_dispatch_ring(port_id, queue_id, pkts_burst, ctx); 1884 1885 nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst, 1886 MAX_PKT_BURST); 1887 if (nb_rx == 0) 1888 continue; 1889 1890 idle = 0; 1891 1892 /* Prefetch first packets */ 1893 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1894 rte_prefetch0(rte_pktmbuf_mtod( 1895 pkts_burst[j], void *)); 1896 } 1897 1898 /* Prefetch and handle already prefetched packets */ 1899 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1900 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1901 j + PREFETCH_OFFSET], void *)); 1902 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1903 } 1904 1905 /* Handle remaining prefetched packets */ 1906 for (; j < nb_rx; j++) { 1907 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1908 } 1909 } 1910 1911 process_msg_ring(qconf->proc_id, pkts_burst); 1912 1913 div_tsc = rte_rdtsc(); 1914 1915 if (likely(lr->loop != NULL && (!idle || cur_tsc - usch_tsc >= drain_tsc))) { 1916 usch_tsc = cur_tsc; 1917 lr->loop(lr->arg); 1918 } 1919 1920 idle_sleep_tsc = rte_rdtsc(); 1921 if (likely(idle && idle_sleep)) { 1922 usleep(idle_sleep); 1923 end_tsc = rte_rdtsc(); 1924 } else { 1925 end_tsc = idle_sleep_tsc; 1926 } 1927 1928 if (usch_tsc == cur_tsc) { 1929 usr_tsc = idle_sleep_tsc - div_tsc; 1930 } 1931 1932 if (!idle) { 1933 sys_tsc = div_tsc - cur_tsc; 1934 ff_top_status.sys_tsc += sys_tsc; 1935 } 1936 1937 ff_top_status.usr_tsc += usr_tsc; 1938 ff_top_status.work_tsc += end_tsc - cur_tsc; 1939 ff_top_status.idle_tsc += end_tsc - cur_tsc - usr_tsc - sys_tsc; 1940 1941 ff_top_status.loops++; 1942 } 1943 1944 return 0; 1945 } 1946 1947 int 1948 ff_dpdk_if_up(void) { 1949 int i; 1950 struct lcore_conf *qconf = &lcore_conf; 1951 for (i = 0; i < qconf->nb_tx_port; i++) { 1952 uint16_t port_id = qconf->tx_port_id[i]; 1953 1954 struct ff_port_cfg *pconf = &qconf->port_cfgs[port_id]; 1955 veth_ctx[port_id] = ff_veth_attach(pconf); 1956 if (veth_ctx[port_id] == NULL) { 1957 rte_exit(EXIT_FAILURE, "ff_veth_attach failed"); 1958 } 1959 } 1960 1961 return 0; 1962 } 1963 1964 void 1965 ff_dpdk_run(loop_func_t loop, void *arg) { 1966 struct loop_routine *lr = rte_malloc(NULL, 1967 sizeof(struct loop_routine), 0); 1968 lr->loop = loop; 1969 lr->arg = arg; 1970 rte_eal_mp_remote_launch(main_loop, lr, CALL_MAIN); 1971 rte_eal_mp_wait_lcore(); 1972 rte_free(lr); 1973 } 1974 1975 void 1976 ff_dpdk_pktmbuf_free(void *m) 1977 { 1978 rte_pktmbuf_free_seg((struct rte_mbuf *)m); 1979 } 1980 1981 static uint32_t 1982 toeplitz_hash(unsigned keylen, const uint8_t *key, 1983 unsigned datalen, const uint8_t *data) 1984 { 1985 uint32_t hash = 0, v; 1986 u_int i, b; 1987 1988 /* XXXRW: Perhaps an assertion about key length vs. data length? */ 1989 1990 v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3]; 1991 for (i = 0; i < datalen; i++) { 1992 for (b = 0; b < 8; b++) { 1993 if (data[i] & (1<<(7-b))) 1994 hash ^= v; 1995 v <<= 1; 1996 if ((i + 4) < keylen && 1997 (key[i+4] & (1<<(7-b)))) 1998 v |= 1; 1999 } 2000 } 2001 return (hash); 2002 } 2003 2004 int 2005 ff_in_pcbladdr(uint16_t family, void *faddr, uint16_t fport, void *laddr) 2006 { 2007 int ret = 0; 2008 uint16_t fa; 2009 2010 if (!pcblddr_fun) 2011 return ret; 2012 2013 if (family == AF_INET) 2014 fa = AF_INET; 2015 else if (family == AF_INET6_FREEBSD) 2016 fa = AF_INET6_LINUX; 2017 else 2018 return EADDRNOTAVAIL; 2019 2020 ret = (*pcblddr_fun)(fa, faddr, fport, laddr); 2021 2022 return ret; 2023 } 2024 2025 void 2026 ff_regist_pcblddr_fun(pcblddr_func_t func) 2027 { 2028 pcblddr_fun = func; 2029 } 2030 2031 int 2032 ff_rss_check(void *softc, uint32_t saddr, uint32_t daddr, 2033 uint16_t sport, uint16_t dport) 2034 { 2035 struct lcore_conf *qconf = &lcore_conf; 2036 struct ff_dpdk_if_context *ctx = ff_veth_softc_to_hostc(softc); 2037 uint16_t nb_queues = qconf->nb_queue_list[ctx->port_id]; 2038 2039 if (nb_queues <= 1) { 2040 return 1; 2041 } 2042 2043 uint16_t reta_size = rss_reta_size[ctx->port_id]; 2044 uint16_t queueid = qconf->tx_queue_id[ctx->port_id]; 2045 2046 uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) + 2047 sizeof(dport)]; 2048 2049 unsigned datalen = 0; 2050 2051 bcopy(&saddr, &data[datalen], sizeof(saddr)); 2052 datalen += sizeof(saddr); 2053 2054 bcopy(&daddr, &data[datalen], sizeof(daddr)); 2055 datalen += sizeof(daddr); 2056 2057 bcopy(&sport, &data[datalen], sizeof(sport)); 2058 datalen += sizeof(sport); 2059 2060 bcopy(&dport, &data[datalen], sizeof(dport)); 2061 datalen += sizeof(dport); 2062 2063 uint32_t hash = 0; 2064 hash = toeplitz_hash(rsskey_len, rsskey, datalen, data); 2065 2066 return ((hash & (reta_size - 1)) % nb_queues) == queueid; 2067 } 2068 2069 void 2070 ff_regist_packet_dispatcher(dispatch_func_t func) 2071 { 2072 packet_dispatcher = func; 2073 } 2074 2075 uint64_t 2076 ff_get_tsc_ns() 2077 { 2078 uint64_t cur_tsc = rte_rdtsc(); 2079 uint64_t hz = rte_get_tsc_hz(); 2080 return ((double)cur_tsc/(double)hz) * NS_PER_S; 2081 } 2082 2083