1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 #include <assert.h> 27 #include <unistd.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 31 #include <rte_common.h> 32 #include <rte_byteorder.h> 33 #include <rte_log.h> 34 #include <rte_memory.h> 35 #include <rte_memcpy.h> 36 #include <rte_memzone.h> 37 #include <rte_config.h> 38 #include <rte_eal.h> 39 #include <rte_pci.h> 40 #include <rte_mbuf.h> 41 #include <rte_memory.h> 42 #include <rte_lcore.h> 43 #include <rte_launch.h> 44 #include <rte_ethdev.h> 45 #include <rte_debug.h> 46 #include <rte_common.h> 47 #include <rte_ether.h> 48 #include <rte_malloc.h> 49 #include <rte_cycles.h> 50 #include <rte_timer.h> 51 #include <rte_thash.h> 52 #include <rte_ip.h> 53 #include <rte_tcp.h> 54 #include <rte_udp.h> 55 #include <rte_eth_bond.h> 56 57 #include "ff_dpdk_if.h" 58 #include "ff_dpdk_pcap.h" 59 #include "ff_dpdk_kni.h" 60 #include "ff_config.h" 61 #include "ff_veth.h" 62 #include "ff_host_interface.h" 63 #include "ff_msg.h" 64 #include "ff_api.h" 65 #include "ff_memory.h" 66 67 #ifdef FF_KNI 68 #define KNI_MBUF_MAX 2048 69 #define KNI_QUEUE_SIZE 2048 70 71 int enable_kni; 72 static int kni_accept; 73 static int knictl_action = FF_KNICTL_ACTION_DEFAULT; 74 #endif 75 76 static int numa_on; 77 78 static unsigned idle_sleep; 79 static unsigned pkt_tx_delay; 80 81 static struct rte_timer freebsd_clock; 82 83 // Mellanox Linux's driver key 84 static uint8_t default_rsskey_40bytes[40] = { 85 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b, 86 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb, 87 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c, 88 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9, 89 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc 90 }; 91 92 static uint8_t default_rsskey_52bytes[52] = { 93 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 94 0xb6, 0x75, 0xea, 0x5b, 0x12, 0x4f, 0x9f, 0x30, 95 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 96 0xa0, 0x8c, 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 97 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8, 0x55, 98 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 99 0x81, 0x15, 0x03, 0x66 100 }; 101 102 static uint8_t symmetric_rsskey[52] = { 103 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 104 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 105 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 106 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 107 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 108 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 109 0x6d, 0x5a, 0x6d, 0x5a 110 }; 111 112 static int rsskey_len = sizeof(default_rsskey_40bytes); 113 static uint8_t *rsskey = default_rsskey_40bytes; 114 115 struct lcore_conf lcore_conf; 116 117 struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 118 119 static pcblddr_func_t pcblddr_fun; 120 121 static struct rte_ring **dispatch_ring[RTE_MAX_ETHPORTS]; 122 static dispatch_func_t packet_dispatcher; 123 124 static uint16_t rss_reta_size[RTE_MAX_ETHPORTS]; 125 126 #define BOND_DRIVER_NAME "net_bonding" 127 128 static inline int send_single_packet(struct rte_mbuf *m, uint8_t port); 129 130 struct ff_msg_ring { 131 char ring_name[FF_MSG_NUM][RTE_RING_NAMESIZE]; 132 /* ring[0] for lcore recv msg, other send */ 133 /* ring[1] for lcore send msg, other read */ 134 struct rte_ring *ring[FF_MSG_NUM]; 135 } __rte_cache_aligned; 136 137 static struct ff_msg_ring msg_ring[RTE_MAX_LCORE]; 138 static struct rte_mempool *message_pool; 139 static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS]; 140 141 static struct ff_top_args ff_top_status; 142 static struct ff_traffic_args ff_traffic; 143 extern void ff_hardclock(void); 144 145 static void 146 ff_hardclock_job(__rte_unused struct rte_timer *timer, 147 __rte_unused void *arg) { 148 ff_hardclock(); 149 ff_update_current_ts(); 150 } 151 152 struct ff_dpdk_if_context * 153 ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg) 154 { 155 struct ff_dpdk_if_context *ctx; 156 157 ctx = calloc(1, sizeof(struct ff_dpdk_if_context)); 158 if (ctx == NULL) 159 return NULL; 160 161 ctx->sc = sc; 162 ctx->ifp = ifp; 163 ctx->port_id = cfg->port_id; 164 ctx->hw_features = cfg->hw_features; 165 166 return ctx; 167 } 168 169 void 170 ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx) 171 { 172 free(ctx); 173 } 174 175 static void 176 check_all_ports_link_status(void) 177 { 178 #define CHECK_INTERVAL 100 /* 100ms */ 179 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 180 181 uint16_t portid; 182 uint8_t count, all_ports_up, print_flag = 0; 183 struct rte_eth_link link; 184 185 printf("\nChecking link status"); 186 fflush(stdout); 187 188 int i, nb_ports; 189 nb_ports = ff_global_cfg.dpdk.nb_ports; 190 for (count = 0; count <= MAX_CHECK_TIME; count++) { 191 all_ports_up = 1; 192 for (i = 0; i < nb_ports; i++) { 193 uint16_t portid = ff_global_cfg.dpdk.portid_list[i]; 194 memset(&link, 0, sizeof(link)); 195 rte_eth_link_get_nowait(portid, &link); 196 197 /* print link status if flag set */ 198 if (print_flag == 1) { 199 if (link.link_status) { 200 printf("Port %d Link Up - speed %u " 201 "Mbps - %s\n", (int)portid, 202 (unsigned)link.link_speed, 203 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 204 ("full-duplex") : ("half-duplex\n")); 205 } else { 206 printf("Port %d Link Down\n", (int)portid); 207 } 208 continue; 209 } 210 /* clear all_ports_up flag if any link down */ 211 if (link.link_status == 0) { 212 all_ports_up = 0; 213 break; 214 } 215 } 216 217 /* after finally printing all link status, get out */ 218 if (print_flag == 1) 219 break; 220 221 if (all_ports_up == 0) { 222 printf("."); 223 fflush(stdout); 224 rte_delay_ms(CHECK_INTERVAL); 225 } 226 227 /* set the print_flag if all ports up or timeout */ 228 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 229 print_flag = 1; 230 printf("done\n"); 231 } 232 } 233 } 234 235 static int 236 init_lcore_conf(void) 237 { 238 uint8_t nb_dev_ports = rte_eth_dev_count_avail(); 239 if (nb_dev_ports == 0) { 240 rte_exit(EXIT_FAILURE, "No probed ethernet devices\n"); 241 } 242 243 if (ff_global_cfg.dpdk.max_portid >= nb_dev_ports) { 244 rte_exit(EXIT_FAILURE, "this machine doesn't have port %d.\n", 245 ff_global_cfg.dpdk.max_portid); 246 } 247 248 lcore_conf.port_cfgs = ff_global_cfg.dpdk.port_cfgs; 249 lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id; 250 251 uint16_t socket_id = 0; 252 if (numa_on) { 253 socket_id = rte_lcore_to_socket_id(rte_lcore_id()); 254 } 255 256 lcore_conf.socket_id = socket_id; 257 258 uint16_t lcore_id = ff_global_cfg.dpdk.proc_lcore[lcore_conf.proc_id]; 259 if (!rte_lcore_is_enabled(lcore_id)) { 260 rte_exit(EXIT_FAILURE, "lcore %u unavailable\n", lcore_id); 261 } 262 263 int j; 264 for (j = 0; j < ff_global_cfg.dpdk.nb_ports; ++j) { 265 uint16_t port_id = ff_global_cfg.dpdk.portid_list[j]; 266 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 267 268 int queueid = -1; 269 int i; 270 for (i = 0; i < pconf->nb_lcores; i++) { 271 if (pconf->lcore_list[i] == lcore_id) { 272 queueid = i; 273 } 274 } 275 if (queueid < 0) { 276 continue; 277 } 278 printf("lcore: %u, port: %u, queue: %u\n", lcore_id, port_id, queueid); 279 uint16_t nb_rx_queue = lcore_conf.nb_rx_queue; 280 lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id; 281 lcore_conf.rx_queue_list[nb_rx_queue].queue_id = queueid; 282 lcore_conf.nb_rx_queue++; 283 284 lcore_conf.tx_queue_id[port_id] = queueid; 285 lcore_conf.tx_port_id[lcore_conf.nb_tx_port] = port_id; 286 lcore_conf.nb_tx_port++; 287 288 /* Enable pcap dump */ 289 if (ff_global_cfg.pcap.enable) { 290 ff_enable_pcap(ff_global_cfg.pcap.save_path, ff_global_cfg.pcap.snap_len); 291 } 292 293 lcore_conf.nb_queue_list[port_id] = pconf->nb_lcores; 294 } 295 296 if (lcore_conf.nb_rx_queue == 0) { 297 rte_exit(EXIT_FAILURE, "lcore %u has nothing to do\n", lcore_id); 298 } 299 300 return 0; 301 } 302 303 static int 304 init_mem_pool(void) 305 { 306 uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports; 307 uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs; 308 uint32_t nb_tx_queue = nb_lcores; 309 uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores; 310 uint16_t max_portid = ff_global_cfg.dpdk.max_portid; 311 312 unsigned nb_mbuf = RTE_ALIGN_CEIL ( 313 (nb_rx_queue * (max_portid + 1) * 2 * RX_QUEUE_SIZE + 314 nb_ports * (max_portid + 1) * 2 * nb_lcores * MAX_PKT_BURST + 315 nb_ports * (max_portid + 1) * 2 * nb_tx_queue * TX_QUEUE_SIZE + 316 nb_lcores * MEMPOOL_CACHE_SIZE + 317 #ifdef FF_KNI 318 nb_ports * KNI_MBUF_MAX + 319 nb_ports * KNI_QUEUE_SIZE + 320 #endif 321 nb_lcores * nb_ports * DISPATCH_RING_SIZE), 322 (unsigned)8192); 323 324 unsigned socketid = 0; 325 uint16_t i, lcore_id; 326 char s[64]; 327 328 for (i = 0; i < ff_global_cfg.dpdk.nb_procs; i++) { 329 lcore_id = ff_global_cfg.dpdk.proc_lcore[i]; 330 if (numa_on) { 331 socketid = rte_lcore_to_socket_id(lcore_id); 332 } 333 334 if (socketid >= NB_SOCKETS) { 335 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 336 socketid, i, NB_SOCKETS); 337 } 338 339 if (pktmbuf_pool[socketid] != NULL) { 340 continue; 341 } 342 343 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 344 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 345 pktmbuf_pool[socketid] = 346 rte_pktmbuf_pool_create(s, nb_mbuf, 347 MEMPOOL_CACHE_SIZE, 0, 348 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 349 } else { 350 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 351 pktmbuf_pool[socketid] = rte_mempool_lookup(s); 352 } 353 354 if (pktmbuf_pool[socketid] == NULL) { 355 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid); 356 } else { 357 printf("create mbuf pool on socket %d\n", socketid); 358 } 359 360 #ifdef FF_USE_PAGE_ARRAY 361 nb_mbuf = RTE_ALIGN_CEIL ( 362 nb_ports*nb_lcores*MAX_PKT_BURST + 363 nb_ports*nb_tx_queue*TX_QUEUE_SIZE + 364 nb_lcores*MEMPOOL_CACHE_SIZE, 365 (unsigned)4096); 366 ff_init_ref_pool(nb_mbuf, socketid); 367 #endif 368 } 369 370 return 0; 371 } 372 373 static struct rte_ring * 374 create_ring(const char *name, unsigned count, int socket_id, unsigned flags) 375 { 376 struct rte_ring *ring; 377 378 if (name == NULL) { 379 rte_exit(EXIT_FAILURE, "create ring failed, no name!\n"); 380 } 381 382 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 383 ring = rte_ring_create(name, count, socket_id, flags); 384 } else { 385 ring = rte_ring_lookup(name); 386 } 387 388 if (ring == NULL) { 389 rte_exit(EXIT_FAILURE, "create ring:%s failed!\n", name); 390 } 391 392 return ring; 393 } 394 395 static int 396 init_dispatch_ring(void) 397 { 398 int j; 399 char name_buf[RTE_RING_NAMESIZE]; 400 int queueid; 401 402 unsigned socketid = lcore_conf.socket_id; 403 404 /* Create ring according to ports actually being used. */ 405 int nb_ports = ff_global_cfg.dpdk.nb_ports; 406 for (j = 0; j < nb_ports; j++) { 407 uint16_t portid = ff_global_cfg.dpdk.portid_list[j]; 408 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[portid]; 409 int nb_queues = pconf->nb_lcores; 410 if (dispatch_ring[portid] == NULL) { 411 snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_p%d", portid); 412 413 dispatch_ring[portid] = rte_zmalloc(name_buf, 414 sizeof(struct rte_ring *) * nb_queues, 415 RTE_CACHE_LINE_SIZE); 416 if (dispatch_ring[portid] == NULL) { 417 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 418 "failed\n", name_buf); 419 } 420 } 421 422 for(queueid = 0; queueid < nb_queues; ++queueid) { 423 snprintf(name_buf, RTE_RING_NAMESIZE, "dispatch_ring_p%d_q%d", 424 portid, queueid); 425 dispatch_ring[portid][queueid] = create_ring(name_buf, 426 DISPATCH_RING_SIZE, socketid, RING_F_SC_DEQ); 427 428 if (dispatch_ring[portid][queueid] == NULL) 429 rte_panic("create ring:%s failed!\n", name_buf); 430 431 printf("create ring:%s success, %u ring entries are now free!\n", 432 name_buf, rte_ring_free_count(dispatch_ring[portid][queueid])); 433 } 434 } 435 436 return 0; 437 } 438 439 static void 440 ff_msg_init(struct rte_mempool *mp, 441 __attribute__((unused)) void *opaque_arg, 442 void *obj, __attribute__((unused)) unsigned i) 443 { 444 struct ff_msg *msg = (struct ff_msg *)obj; 445 msg->msg_type = FF_UNKNOWN; 446 msg->buf_addr = (char *)msg + sizeof(struct ff_msg); 447 msg->buf_len = mp->elt_size - sizeof(struct ff_msg); 448 msg->original_buf = NULL; 449 msg->original_buf_len = 0; 450 } 451 452 static int 453 init_msg_ring(void) 454 { 455 uint16_t i, j; 456 uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; 457 unsigned socketid = lcore_conf.socket_id; 458 459 /* Create message buffer pool */ 460 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 461 message_pool = rte_mempool_create(FF_MSG_POOL, 462 MSG_RING_SIZE * 2 * nb_procs, 463 MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0, 464 NULL, NULL, ff_msg_init, NULL, 465 socketid, 0); 466 } else { 467 message_pool = rte_mempool_lookup(FF_MSG_POOL); 468 } 469 470 if (message_pool == NULL) { 471 rte_panic("Create msg mempool failed\n"); 472 } 473 474 for(i = 0; i < nb_procs; ++i) { 475 snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE, 476 "%s%u", FF_MSG_RING_IN, i); 477 msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0], 478 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 479 if (msg_ring[i].ring[0] == NULL) 480 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); 481 482 for (j = FF_SYSCTL; j < FF_MSG_NUM; j++) { 483 snprintf(msg_ring[i].ring_name[j], RTE_RING_NAMESIZE, 484 "%s%u_%u", FF_MSG_RING_OUT, i, j); 485 msg_ring[i].ring[j] = create_ring(msg_ring[i].ring_name[j], 486 MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); 487 if (msg_ring[i].ring[j] == NULL) 488 rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[j]); 489 } 490 } 491 492 return 0; 493 } 494 495 #ifdef FF_KNI 496 497 static enum FF_KNICTL_CMD get_kni_action(const char *c){ 498 if (!c) 499 return FF_KNICTL_ACTION_DEFAULT; 500 if (0 == strcasecmp(c, "alltokni")){ 501 return FF_KNICTL_ACTION_ALL_TO_KNI; 502 } else if (0 == strcasecmp(c, "alltoff")){ 503 return FF_KNICTL_ACTION_ALL_TO_FF; 504 } else if (0 == strcasecmp(c, "default")){ 505 return FF_KNICTL_ACTION_DEFAULT; 506 } else { 507 return FF_KNICTL_ACTION_DEFAULT; 508 } 509 } 510 511 static int 512 init_kni(void) 513 { 514 int nb_ports = rte_eth_dev_count_avail(); 515 kni_accept = 0; 516 if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0) 517 kni_accept = 1; 518 519 knictl_action = get_kni_action(ff_global_cfg.kni.kni_action); 520 521 ff_kni_init(nb_ports, ff_global_cfg.kni.tcp_port, 522 ff_global_cfg.kni.udp_port); 523 524 unsigned socket_id = lcore_conf.socket_id; 525 struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id]; 526 527 nb_ports = ff_global_cfg.dpdk.nb_ports; 528 int i, ret; 529 for (i = 0; i < nb_ports; i++) { 530 uint16_t port_id = ff_global_cfg.dpdk.portid_list[i]; 531 ff_kni_alloc(port_id, socket_id, mbuf_pool, KNI_QUEUE_SIZE); 532 } 533 534 return 0; 535 } 536 #endif 537 538 //RSS reta update will failed when enable flow isolate 539 #ifndef FF_FLOW_ISOLATE 540 static void 541 set_rss_table(uint16_t port_id, uint16_t reta_size, uint16_t nb_queues) 542 { 543 if (reta_size == 0) { 544 return; 545 } 546 547 int reta_conf_size = RTE_MAX(1, reta_size / RTE_RETA_GROUP_SIZE); 548 struct rte_eth_rss_reta_entry64 reta_conf[reta_conf_size]; 549 550 /* config HW indirection table */ 551 unsigned i, j, hash=0; 552 for (i = 0; i < reta_conf_size; i++) { 553 reta_conf[i].mask = ~0ULL; 554 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) { 555 reta_conf[i].reta[j] = hash++ % nb_queues; 556 } 557 } 558 559 if (rte_eth_dev_rss_reta_update(port_id, reta_conf, reta_size)) { 560 rte_exit(EXIT_FAILURE, "port[%d], failed to update rss table\n", 561 port_id); 562 } 563 } 564 #endif 565 566 static int 567 init_port_start(void) 568 { 569 int nb_ports = ff_global_cfg.dpdk.nb_ports; 570 unsigned socketid = 0; 571 struct rte_mempool *mbuf_pool; 572 uint16_t i, j; 573 574 for (i = 0; i < nb_ports; i++) { 575 uint16_t port_id, u_port_id = ff_global_cfg.dpdk.portid_list[i]; 576 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[u_port_id]; 577 uint16_t nb_queues = pconf->nb_lcores; 578 579 for (j=0; j<=pconf->nb_slaves; j++) { 580 if (j < pconf->nb_slaves) { 581 port_id = pconf->slave_portid_list[j]; 582 printf("To init %s's %d'st slave port[%d]\n", 583 ff_global_cfg.dpdk.bond_cfgs->name, 584 j, port_id); 585 } else { 586 port_id = u_port_id; 587 } 588 589 struct rte_eth_dev_info dev_info; 590 struct rte_eth_conf port_conf = {0}; 591 struct rte_eth_rxconf rxq_conf; 592 struct rte_eth_txconf txq_conf; 593 594 int ret = rte_eth_dev_info_get(port_id, &dev_info); 595 if (ret != 0) 596 rte_exit(EXIT_FAILURE, 597 "Error during getting device (port %u) info: %s\n", 598 port_id, strerror(-ret)); 599 600 if (nb_queues > dev_info.max_rx_queues) { 601 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n", 602 nb_queues, 603 dev_info.max_rx_queues); 604 } 605 606 if (nb_queues > dev_info.max_tx_queues) { 607 rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n", 608 nb_queues, 609 dev_info.max_tx_queues); 610 } 611 612 struct rte_ether_addr addr; 613 rte_eth_macaddr_get(port_id, &addr); 614 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 615 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 616 (unsigned)port_id, 617 addr.addr_bytes[0], addr.addr_bytes[1], 618 addr.addr_bytes[2], addr.addr_bytes[3], 619 addr.addr_bytes[4], addr.addr_bytes[5]); 620 621 rte_memcpy(pconf->mac, 622 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 623 624 /* Set RSS mode */ 625 uint64_t default_rss_hf = ETH_RSS_PROTO_MASK; 626 port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; 627 port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf; 628 if (dev_info.hash_key_size == 52) { 629 rsskey = default_rsskey_52bytes; 630 rsskey_len = 52; 631 } 632 if (ff_global_cfg.dpdk.symmetric_rss) { 633 printf("Use symmetric Receive-side Scaling(RSS) key\n"); 634 rsskey = symmetric_rsskey; 635 } 636 port_conf.rx_adv_conf.rss_conf.rss_key = rsskey; 637 port_conf.rx_adv_conf.rss_conf.rss_key_len = rsskey_len; 638 port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; 639 if (port_conf.rx_adv_conf.rss_conf.rss_hf != 640 ETH_RSS_PROTO_MASK) { 641 printf("Port %u modified RSS hash function based on hardware support," 642 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 643 port_id, default_rss_hf, 644 port_conf.rx_adv_conf.rss_conf.rss_hf); 645 } 646 647 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) { 648 port_conf.txmode.offloads |= 649 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 650 } 651 652 /* Set Rx VLAN stripping */ 653 if (ff_global_cfg.dpdk.vlan_strip) { 654 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) { 655 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 656 } 657 } 658 659 /* Enable HW CRC stripping */ 660 port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC; 661 662 /* FIXME: Enable TCP LRO ?*/ 663 #if 0 664 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) { 665 printf("LRO is supported\n"); 666 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_LRO; 667 pconf->hw_features.rx_lro = 1; 668 } 669 #endif 670 671 /* Set Rx checksum checking */ 672 if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) && 673 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) && 674 (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)) { 675 printf("RX checksum offload supported\n"); 676 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM; 677 pconf->hw_features.rx_csum = 1; 678 } 679 680 if (ff_global_cfg.dpdk.tx_csum_offoad_skip == 0) { 681 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) { 682 printf("TX ip checksum offload supported\n"); 683 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; 684 pconf->hw_features.tx_csum_ip = 1; 685 } 686 687 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) && 688 (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) { 689 printf("TX TCP&UDP checksum offload supported\n"); 690 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM; 691 pconf->hw_features.tx_csum_l4 = 1; 692 } 693 } else { 694 printf("TX checksum offoad is disabled\n"); 695 } 696 697 if (ff_global_cfg.dpdk.tso) { 698 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) { 699 printf("TSO is supported\n"); 700 port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; 701 pconf->hw_features.tx_tso = 1; 702 } 703 } else { 704 printf("TSO is disabled\n"); 705 } 706 707 if (dev_info.reta_size) { 708 /* reta size must be power of 2 */ 709 assert((dev_info.reta_size & (dev_info.reta_size - 1)) == 0); 710 711 rss_reta_size[port_id] = dev_info.reta_size; 712 printf("port[%d]: rss table size: %d\n", port_id, 713 dev_info.reta_size); 714 } 715 716 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 717 continue; 718 } 719 720 ret = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf); 721 if (ret != 0) { 722 return ret; 723 } 724 725 static uint16_t nb_rxd = RX_QUEUE_SIZE; 726 static uint16_t nb_txd = TX_QUEUE_SIZE; 727 ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); 728 if (ret < 0) 729 printf("Could not adjust number of descriptors " 730 "for port%u (%d)\n", (unsigned)port_id, ret); 731 732 uint16_t q; 733 for (q = 0; q < nb_queues; q++) { 734 if (numa_on) { 735 uint16_t lcore_id = lcore_conf.port_cfgs[port_id].lcore_list[q]; 736 socketid = rte_lcore_to_socket_id(lcore_id); 737 } 738 mbuf_pool = pktmbuf_pool[socketid]; 739 740 txq_conf = dev_info.default_txconf; 741 txq_conf.offloads = port_conf.txmode.offloads; 742 ret = rte_eth_tx_queue_setup(port_id, q, nb_txd, 743 socketid, &txq_conf); 744 if (ret < 0) { 745 return ret; 746 } 747 748 rxq_conf = dev_info.default_rxconf; 749 rxq_conf.offloads = port_conf.rxmode.offloads; 750 ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd, 751 socketid, &rxq_conf, mbuf_pool); 752 if (ret < 0) { 753 return ret; 754 } 755 } 756 757 758 if (strncmp(dev_info.driver_name, BOND_DRIVER_NAME, 759 strlen(dev_info.driver_name)) == 0) { 760 761 rte_eth_macaddr_get(port_id, &addr); 762 printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 763 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", 764 (unsigned)port_id, 765 addr.addr_bytes[0], addr.addr_bytes[1], 766 addr.addr_bytes[2], addr.addr_bytes[3], 767 addr.addr_bytes[4], addr.addr_bytes[5]); 768 769 rte_memcpy(pconf->mac, 770 addr.addr_bytes, RTE_ETHER_ADDR_LEN); 771 772 int mode, count, x; 773 uint16_t slaves[RTE_MAX_ETHPORTS], len = RTE_MAX_ETHPORTS; 774 775 mode = rte_eth_bond_mode_get(port_id); 776 printf("Port %u, bond mode:%d\n", port_id, mode); 777 778 count = rte_eth_bond_slaves_get(port_id, slaves, len); 779 printf("Port %u, %s's slave ports count:%d\n", port_id, 780 ff_global_cfg.dpdk.bond_cfgs->name, count); 781 for (x=0; x<count; x++) { 782 printf("Port %u, %s's slave port[%u]\n", port_id, 783 ff_global_cfg.dpdk.bond_cfgs->name, slaves[x]); 784 } 785 } 786 787 ret = rte_eth_dev_start(port_id); 788 if (ret < 0) { 789 return ret; 790 } 791 //RSS reta update will failed when enable flow isolate 792 #ifndef FF_FLOW_ISOLATE 793 if (nb_queues > 1) { 794 /* set HW rss hash function to Toeplitz. */ 795 if (!rte_eth_dev_filter_supported(port_id, RTE_ETH_FILTER_HASH)) { 796 struct rte_eth_hash_filter_info info = {0}; 797 info.info_type = RTE_ETH_HASH_FILTER_GLOBAL_CONFIG; 798 info.info.global_conf.hash_func = RTE_ETH_HASH_FUNCTION_TOEPLITZ; 799 800 if (rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_HASH, 801 RTE_ETH_FILTER_SET, &info) < 0) { 802 rte_exit(EXIT_FAILURE, "port[%d] set hash func failed\n", 803 port_id); 804 } 805 } 806 807 set_rss_table(port_id, dev_info.reta_size, nb_queues); 808 } 809 #endif 810 811 /* Enable RX in promiscuous mode for the Ethernet device. */ 812 if (ff_global_cfg.dpdk.promiscuous) { 813 ret = rte_eth_promiscuous_enable(port_id); 814 if (ret == 0) { 815 printf("set port %u to promiscuous mode ok\n", port_id); 816 } else { 817 printf("set port %u to promiscuous mode error\n", port_id); 818 } 819 } 820 } 821 } 822 823 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 824 check_all_ports_link_status(); 825 } 826 827 return 0; 828 } 829 830 static int 831 init_clock(void) 832 { 833 rte_timer_subsystem_init(); 834 uint64_t hz = rte_get_timer_hz(); 835 uint64_t intrs = MS_PER_S/ff_global_cfg.freebsd.hz; 836 uint64_t tsc = (hz + MS_PER_S - 1) / MS_PER_S*intrs; 837 838 rte_timer_init(&freebsd_clock); 839 rte_timer_reset(&freebsd_clock, tsc, PERIODICAL, 840 rte_lcore_id(), &ff_hardclock_job, NULL); 841 842 ff_update_current_ts(); 843 844 return 0; 845 } 846 847 #ifdef FF_FLOW_ISOLATE 848 /** Print a message out of a flow error. */ 849 static int 850 port_flow_complain(struct rte_flow_error *error) 851 { 852 static const char *const errstrlist[] = { 853 [RTE_FLOW_ERROR_TYPE_NONE] = "no error", 854 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", 855 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", 856 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", 857 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", 858 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", 859 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", 860 [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field", 861 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", 862 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", 863 [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification", 864 [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range", 865 [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask", 866 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", 867 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", 868 [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration", 869 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", 870 }; 871 const char *errstr; 872 char buf[32]; 873 int err = rte_errno; 874 875 if ((unsigned int)error->type >= RTE_DIM(errstrlist) || 876 !errstrlist[error->type]) 877 errstr = "unknown type"; 878 else 879 errstr = errstrlist[error->type]; 880 printf("Caught error type %d (%s): %s%s: %s\n", 881 error->type, errstr, 882 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", 883 error->cause), buf) : "", 884 error->message ? error->message : "(no stated reason)", 885 rte_strerror(err)); 886 return -err; 887 } 888 889 static int 890 port_flow_isolate(uint16_t port_id, int set) 891 { 892 struct rte_flow_error error; 893 894 /* Poisoning to make sure PMDs update it in case of error. */ 895 memset(&error, 0x66, sizeof(error)); 896 if (rte_flow_isolate(port_id, set, &error)) 897 return port_flow_complain(&error); 898 printf("Ingress traffic on port %u is %s to the defined flow rules\n", 899 port_id, 900 set ? "now restricted" : "not restricted anymore"); 901 return 0; 902 } 903 904 static int 905 create_tcp_flow(uint16_t port_id, uint16_t tcp_port) { 906 struct rte_flow_attr attr = {.ingress = 1}; 907 struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id]; 908 int nb_queues = pconf->nb_lcores; 909 uint16_t queue[RTE_MAX_QUEUES_PER_PORT]; 910 int i = 0, j = 0; 911 for (i = 0, j = 0; i < nb_queues; ++i) 912 queue[j++] = i; 913 struct rte_flow_action_rss rss = { 914 .types = ETH_RSS_NONFRAG_IPV4_TCP, 915 .key_len = rsskey_len, 916 .key = rsskey, 917 .queue_num = j, 918 .queue = queue, 919 }; 920 921 struct rte_eth_dev_info dev_info; 922 int ret = rte_eth_dev_info_get(port_id, &dev_info); 923 if (ret != 0) 924 rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n", port_id, strerror(-ret)); 925 926 struct rte_flow_item pattern[3]; 927 struct rte_flow_action action[2]; 928 struct rte_flow_item_tcp tcp_spec; 929 struct rte_flow_item_tcp tcp_mask = { 930 .hdr = { 931 .src_port = RTE_BE16(0x0000), 932 .dst_port = RTE_BE16(0xffff), 933 }, 934 }; 935 struct rte_flow_error error; 936 937 memset(pattern, 0, sizeof(pattern)); 938 memset(action, 0, sizeof(action)); 939 940 /* set the dst ipv4 packet to the required value */ 941 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 942 943 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 944 tcp_spec.hdr.dst_port = rte_cpu_to_be_16(tcp_port); 945 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 946 pattern[1].spec = &tcp_spec; 947 pattern[1].mask = &tcp_mask; 948 949 /* end the pattern array */ 950 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 951 952 /* create the action */ 953 action[0].type = RTE_FLOW_ACTION_TYPE_RSS; 954 action[0].conf = &rss; 955 action[1].type = RTE_FLOW_ACTION_TYPE_END; 956 957 struct rte_flow *flow; 958 /* validate and create the flow rule */ 959 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 960 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 961 if (!flow) { 962 return port_flow_complain(&error); 963 } 964 } 965 966 memset(pattern, 0, sizeof(pattern)); 967 968 /* set the dst ipv4 packet to the required value */ 969 pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4; 970 971 struct rte_flow_item_tcp tcp_src_mask = { 972 .hdr = { 973 .src_port = RTE_BE16(0xffff), 974 .dst_port = RTE_BE16(0x0000), 975 }, 976 }; 977 978 memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); 979 tcp_spec.hdr.src_port = rte_cpu_to_be_16(tcp_port); 980 pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP; 981 pattern[1].spec = &tcp_spec; 982 pattern[1].mask = &tcp_src_mask; 983 984 /* end the pattern array */ 985 pattern[2].type = RTE_FLOW_ITEM_TYPE_END; 986 987 /* validate and create the flow rule */ 988 if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) { 989 flow = rte_flow_create(port_id, &attr, pattern, action, &error); 990 if (!flow) { 991 return port_flow_complain(&error); 992 } 993 } 994 995 return 1; 996 } 997 998 static int 999 init_flow(uint16_t port_id, uint16_t tcp_port) { 1000 // struct ff_flow_cfg fcfg = ff_global_cfg.dpdk.flow_cfgs[0]; 1001 1002 // int i; 1003 // for (i = 0; i < fcfg.nb_port; i++) { 1004 // if(!create_tcp_flow(fcfg.port_id, fcfg.tcp_ports[i])) { 1005 // return 0; 1006 // } 1007 // } 1008 1009 if(!create_tcp_flow(port_id, tcp_port)) { 1010 rte_exit(EXIT_FAILURE, "create tcp flow failed\n"); 1011 return -1; 1012 } 1013 1014 /* ARP rule */ 1015 struct rte_flow_attr attr = {.ingress = 1}; 1016 struct rte_flow_action_queue queue = {.index = 0}; 1017 1018 struct rte_flow_item pattern_[2]; 1019 struct rte_flow_action action[2]; 1020 struct rte_flow_item_eth eth_type = {.type = RTE_BE16(0x0806)}; 1021 struct rte_flow_item_eth eth_mask = { 1022 .type = RTE_BE16(0xffff) 1023 }; 1024 1025 memset(pattern_, 0, sizeof(pattern_)); 1026 memset(action, 0, sizeof(action)); 1027 1028 pattern_[0].type = RTE_FLOW_ITEM_TYPE_ETH; 1029 pattern_[0].spec = ð_type; 1030 pattern_[0].mask = ð_mask; 1031 1032 pattern_[1].type = RTE_FLOW_ITEM_TYPE_END; 1033 1034 /* create the action */ 1035 action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE; 1036 action[0].conf = &queue; 1037 action[1].type = RTE_FLOW_ACTION_TYPE_END; 1038 1039 struct rte_flow *flow; 1040 struct rte_flow_error error; 1041 /* validate and create the flow rule */ 1042 if (!rte_flow_validate(port_id, &attr, pattern_, action, &error)) { 1043 flow = rte_flow_create(port_id, &attr, pattern_, action, &error); 1044 if (!flow) { 1045 return port_flow_complain(&error); 1046 } 1047 } 1048 1049 return 1; 1050 } 1051 1052 #endif 1053 1054 int 1055 ff_dpdk_init(int argc, char **argv) 1056 { 1057 if (ff_global_cfg.dpdk.nb_procs < 1 || 1058 ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE || 1059 ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs || 1060 ff_global_cfg.dpdk.proc_id < 0) { 1061 printf("param num_procs[%d] or proc_id[%d] error!\n", 1062 ff_global_cfg.dpdk.nb_procs, 1063 ff_global_cfg.dpdk.proc_id); 1064 exit(1); 1065 } 1066 1067 int ret = rte_eal_init(argc, argv); 1068 if (ret < 0) { 1069 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1070 } 1071 1072 numa_on = ff_global_cfg.dpdk.numa_on; 1073 1074 idle_sleep = ff_global_cfg.dpdk.idle_sleep; 1075 pkt_tx_delay = ff_global_cfg.dpdk.pkt_tx_delay > BURST_TX_DRAIN_US ? \ 1076 BURST_TX_DRAIN_US : ff_global_cfg.dpdk.pkt_tx_delay; 1077 1078 init_lcore_conf(); 1079 1080 init_mem_pool(); 1081 1082 init_dispatch_ring(); 1083 1084 init_msg_ring(); 1085 1086 #ifdef FF_KNI 1087 enable_kni = ff_global_cfg.kni.enable; 1088 if (enable_kni) { 1089 init_kni(); 1090 } 1091 #endif 1092 1093 #ifdef FF_USE_PAGE_ARRAY 1094 ff_mmap_init(); 1095 #endif 1096 1097 #ifdef FF_FLOW_ISOLATE 1098 // run once in primary process 1099 if (0 == lcore_conf.tx_queue_id[0]){ 1100 ret = port_flow_isolate(0, 1); 1101 if (ret < 0) 1102 rte_exit(EXIT_FAILURE, "init_port_isolate failed\n"); 1103 } 1104 #endif 1105 1106 ret = init_port_start(); 1107 if (ret < 0) { 1108 rte_exit(EXIT_FAILURE, "init_port_start failed\n"); 1109 } 1110 1111 init_clock(); 1112 #ifdef FF_FLOW_ISOLATE 1113 //Only give a example usage: port_id=0, tcp_port= 80. 1114 //Recommend: 1115 //1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id). 1116 //2. using config options `tcp_port` replace magic number of 80 1117 ret = init_flow(0, 80); 1118 if (ret < 0) { 1119 rte_exit(EXIT_FAILURE, "init_port_flow failed\n"); 1120 } 1121 #endif 1122 return 0; 1123 } 1124 1125 static void 1126 ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt) 1127 { 1128 uint8_t rx_csum = ctx->hw_features.rx_csum; 1129 if (rx_csum) { 1130 if (pkt->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) { 1131 rte_pktmbuf_free(pkt); 1132 return; 1133 } 1134 } 1135 1136 void *data = rte_pktmbuf_mtod(pkt, void*); 1137 uint16_t len = rte_pktmbuf_data_len(pkt); 1138 1139 void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum); 1140 if (hdr == NULL) { 1141 rte_pktmbuf_free(pkt); 1142 return; 1143 } 1144 1145 if (pkt->ol_flags & PKT_RX_VLAN_STRIPPED) { 1146 ff_mbuf_set_vlan_info(hdr, pkt->vlan_tci); 1147 } 1148 1149 struct rte_mbuf *pn = pkt->next; 1150 void *prev = hdr; 1151 while(pn != NULL) { 1152 data = rte_pktmbuf_mtod(pn, void*); 1153 len = rte_pktmbuf_data_len(pn); 1154 1155 void *mb = ff_mbuf_get(prev, pn, data, len); 1156 if (mb == NULL) { 1157 ff_mbuf_free(hdr); 1158 rte_pktmbuf_free(pkt); 1159 return; 1160 } 1161 pn = pn->next; 1162 prev = mb; 1163 } 1164 1165 ff_veth_process_packet(ctx->ifp, hdr); 1166 } 1167 1168 static enum FilterReturn 1169 protocol_filter(const void *data, uint16_t len) 1170 { 1171 if(len < RTE_ETHER_ADDR_LEN) 1172 return FILTER_UNKNOWN; 1173 1174 const struct rte_ether_hdr *hdr; 1175 const struct rte_vlan_hdr *vlanhdr; 1176 hdr = (const struct rte_ether_hdr *)data; 1177 uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); 1178 data += RTE_ETHER_HDR_LEN; 1179 len -= RTE_ETHER_HDR_LEN; 1180 1181 if (ether_type == RTE_ETHER_TYPE_VLAN) { 1182 vlanhdr = (struct rte_vlan_hdr *)data; 1183 ether_type = rte_be_to_cpu_16(vlanhdr->eth_proto); 1184 data += sizeof(struct rte_vlan_hdr); 1185 len -= sizeof(struct rte_vlan_hdr); 1186 } 1187 1188 if(ether_type == RTE_ETHER_TYPE_ARP) 1189 return FILTER_ARP; 1190 1191 #ifdef INET6 1192 if (ether_type == RTE_ETHER_TYPE_IPV6) { 1193 return ff_kni_proto_filter(data, 1194 len, ether_type); 1195 } 1196 #endif 1197 1198 #ifndef FF_KNI 1199 return FILTER_UNKNOWN; 1200 #else 1201 if (!enable_kni) { 1202 return FILTER_UNKNOWN; 1203 } 1204 1205 if(ether_type != RTE_ETHER_TYPE_IPV4) 1206 return FILTER_UNKNOWN; 1207 1208 return ff_kni_proto_filter(data, 1209 len, ether_type); 1210 #endif 1211 } 1212 1213 static inline void 1214 pktmbuf_deep_attach(struct rte_mbuf *mi, const struct rte_mbuf *m) 1215 { 1216 struct rte_mbuf *md; 1217 void *src, *dst; 1218 1219 dst = rte_pktmbuf_mtod(mi, void *); 1220 src = rte_pktmbuf_mtod(m, void *); 1221 1222 mi->data_len = m->data_len; 1223 rte_memcpy(dst, src, m->data_len); 1224 1225 mi->port = m->port; 1226 mi->vlan_tci = m->vlan_tci; 1227 mi->vlan_tci_outer = m->vlan_tci_outer; 1228 mi->tx_offload = m->tx_offload; 1229 mi->hash = m->hash; 1230 mi->ol_flags = m->ol_flags; 1231 mi->packet_type = m->packet_type; 1232 } 1233 1234 /* copied from rte_pktmbuf_clone */ 1235 static inline struct rte_mbuf * 1236 pktmbuf_deep_clone(const struct rte_mbuf *md, 1237 struct rte_mempool *mp) 1238 { 1239 struct rte_mbuf *mc, *mi, **prev; 1240 uint32_t pktlen; 1241 uint8_t nseg; 1242 1243 if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL)) 1244 return NULL; 1245 1246 mi = mc; 1247 prev = &mi->next; 1248 pktlen = md->pkt_len; 1249 nseg = 0; 1250 1251 do { 1252 nseg++; 1253 pktmbuf_deep_attach(mi, md); 1254 *prev = mi; 1255 prev = &mi->next; 1256 } while ((md = md->next) != NULL && 1257 (mi = rte_pktmbuf_alloc(mp)) != NULL); 1258 1259 *prev = NULL; 1260 mc->nb_segs = nseg; 1261 mc->pkt_len = pktlen; 1262 1263 /* Allocation of new indirect segment failed */ 1264 if (unlikely (mi == NULL)) { 1265 rte_pktmbuf_free(mc); 1266 return NULL; 1267 } 1268 1269 __rte_mbuf_sanity_check(mc, 1); 1270 return mc; 1271 } 1272 1273 static inline void 1274 process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs, 1275 uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring) 1276 { 1277 struct lcore_conf *qconf = &lcore_conf; 1278 uint16_t nb_queues = qconf->nb_queue_list[port_id]; 1279 1280 uint16_t i; 1281 for (i = 0; i < count; i++) { 1282 struct rte_mbuf *rtem = bufs[i]; 1283 1284 if (unlikely( ff_global_cfg.pcap.enable)) { 1285 if (!pkts_from_ring) { 1286 ff_dump_packets( ff_global_cfg.pcap.save_path, rtem, ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1287 } 1288 } 1289 1290 void *data = rte_pktmbuf_mtod(rtem, void*); 1291 uint16_t len = rte_pktmbuf_data_len(rtem); 1292 1293 if (!pkts_from_ring) { 1294 ff_traffic.rx_packets++; 1295 ff_traffic.rx_bytes += len; 1296 } 1297 1298 if (!pkts_from_ring && packet_dispatcher) { 1299 int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues); 1300 if (ret == FF_DISPATCH_RESPONSE) { 1301 rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len; 1302 1303 /* 1304 * We have not support vlan out strip 1305 */ 1306 if (rtem->vlan_tci) { 1307 data = rte_pktmbuf_prepend(rtem, sizeof(struct rte_vlan_hdr)); 1308 if (data != NULL) { 1309 memmove(data, data + sizeof(struct rte_vlan_hdr), RTE_ETHER_HDR_LEN); 1310 struct rte_ether_hdr *etherhdr = (struct rte_ether_hdr *)data; 1311 struct rte_vlan_hdr *vlanhdr = (struct rte_vlan_hdr *)(data + RTE_ETHER_HDR_LEN); 1312 vlanhdr->vlan_tci = rte_cpu_to_be_16(rtem->vlan_tci); 1313 vlanhdr->eth_proto = etherhdr->ether_type; 1314 etherhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN); 1315 } 1316 } 1317 send_single_packet(rtem, port_id); 1318 continue; 1319 } 1320 1321 if (ret == FF_DISPATCH_ERROR || ret >= nb_queues) { 1322 rte_pktmbuf_free(rtem); 1323 continue; 1324 } 1325 1326 if (ret != queue_id) { 1327 ret = rte_ring_enqueue(dispatch_ring[port_id][ret], rtem); 1328 if (ret < 0) 1329 rte_pktmbuf_free(rtem); 1330 1331 continue; 1332 } 1333 } 1334 1335 enum FilterReturn filter = protocol_filter(data, len); 1336 #ifdef INET6 1337 if (filter == FILTER_ARP || filter == FILTER_NDP) { 1338 #else 1339 if (filter == FILTER_ARP) { 1340 #endif 1341 struct rte_mempool *mbuf_pool; 1342 struct rte_mbuf *mbuf_clone; 1343 if (!pkts_from_ring) { 1344 uint16_t j; 1345 for(j = 0; j < nb_queues; ++j) { 1346 if(j == queue_id) 1347 continue; 1348 1349 unsigned socket_id = 0; 1350 if (numa_on) { 1351 uint16_t lcore_id = qconf->port_cfgs[port_id].lcore_list[j]; 1352 socket_id = rte_lcore_to_socket_id(lcore_id); 1353 } 1354 mbuf_pool = pktmbuf_pool[socket_id]; 1355 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1356 if(mbuf_clone) { 1357 int ret = rte_ring_enqueue(dispatch_ring[port_id][j], 1358 mbuf_clone); 1359 if (ret < 0) 1360 rte_pktmbuf_free(mbuf_clone); 1361 } 1362 } 1363 } 1364 1365 #ifdef FF_KNI 1366 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1367 mbuf_pool = pktmbuf_pool[qconf->socket_id]; 1368 mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool); 1369 if(mbuf_clone) { 1370 ff_kni_enqueue(port_id, mbuf_clone); 1371 } 1372 } 1373 #endif 1374 ff_veth_input(ctx, rtem); 1375 #ifdef FF_KNI 1376 } else if (enable_kni) { 1377 if (knictl_action == FF_KNICTL_ACTION_ALL_TO_KNI){ 1378 ff_kni_enqueue(port_id, rtem); 1379 } else if (knictl_action == FF_KNICTL_ACTION_ALL_TO_FF){ 1380 ff_veth_input(ctx, rtem); 1381 } else if (knictl_action == FF_KNICTL_ACTION_DEFAULT){ 1382 if (enable_kni && 1383 ((filter == FILTER_KNI && kni_accept) || 1384 (filter == FILTER_UNKNOWN && !kni_accept)) ) { 1385 ff_kni_enqueue(port_id, rtem); 1386 } else { 1387 ff_veth_input(ctx, rtem); 1388 } 1389 } else { 1390 ff_veth_input(ctx, rtem); 1391 } 1392 #endif 1393 } else { 1394 ff_veth_input(ctx, rtem); 1395 } 1396 } 1397 } 1398 1399 static inline int 1400 process_dispatch_ring(uint16_t port_id, uint16_t queue_id, 1401 struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx) 1402 { 1403 /* read packet from ring buf and to process */ 1404 uint16_t nb_rb; 1405 nb_rb = rte_ring_dequeue_burst(dispatch_ring[port_id][queue_id], 1406 (void **)pkts_burst, MAX_PKT_BURST, NULL); 1407 1408 if(nb_rb > 0) { 1409 process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1); 1410 } 1411 1412 return 0; 1413 } 1414 1415 static inline void 1416 handle_sysctl_msg(struct ff_msg *msg) 1417 { 1418 int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen, 1419 msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new, 1420 msg->sysctl.newlen); 1421 1422 if (ret < 0) { 1423 msg->result = errno; 1424 } else { 1425 msg->result = 0; 1426 } 1427 } 1428 1429 static inline void 1430 handle_ioctl_msg(struct ff_msg *msg) 1431 { 1432 int fd, ret; 1433 #ifdef INET6 1434 if (msg->msg_type == FF_IOCTL6) { 1435 fd = ff_socket(AF_INET6, SOCK_DGRAM, 0); 1436 } else 1437 #endif 1438 fd = ff_socket(AF_INET, SOCK_DGRAM, 0); 1439 1440 if (fd < 0) { 1441 ret = -1; 1442 goto done; 1443 } 1444 1445 ret = ff_ioctl_freebsd(fd, msg->ioctl.cmd, msg->ioctl.data); 1446 1447 ff_close(fd); 1448 1449 done: 1450 if (ret < 0) { 1451 msg->result = errno; 1452 } else { 1453 msg->result = 0; 1454 } 1455 } 1456 1457 static inline void 1458 handle_route_msg(struct ff_msg *msg) 1459 { 1460 int ret = ff_rtioctl(msg->route.fib, msg->route.data, 1461 &msg->route.len, msg->route.maxlen); 1462 if (ret < 0) { 1463 msg->result = errno; 1464 } else { 1465 msg->result = 0; 1466 } 1467 } 1468 1469 static inline void 1470 handle_top_msg(struct ff_msg *msg) 1471 { 1472 msg->top = ff_top_status; 1473 msg->result = 0; 1474 } 1475 1476 #ifdef FF_NETGRAPH 1477 static inline void 1478 handle_ngctl_msg(struct ff_msg *msg) 1479 { 1480 int ret = ff_ngctl(msg->ngctl.cmd, msg->ngctl.data); 1481 if (ret < 0) { 1482 msg->result = errno; 1483 } else { 1484 msg->result = 0; 1485 msg->ngctl.ret = ret; 1486 } 1487 } 1488 #endif 1489 1490 #ifdef FF_IPFW 1491 static inline void 1492 handle_ipfw_msg(struct ff_msg *msg) 1493 { 1494 int fd, ret; 1495 fd = ff_socket(AF_INET, SOCK_RAW, IPPROTO_RAW); 1496 if (fd < 0) { 1497 ret = -1; 1498 goto done; 1499 } 1500 1501 switch (msg->ipfw.cmd) { 1502 case FF_IPFW_GET: 1503 ret = ff_getsockopt_freebsd(fd, msg->ipfw.level, 1504 msg->ipfw.optname, msg->ipfw.optval, 1505 msg->ipfw.optlen); 1506 break; 1507 case FF_IPFW_SET: 1508 ret = ff_setsockopt_freebsd(fd, msg->ipfw.level, 1509 msg->ipfw.optname, msg->ipfw.optval, 1510 *(msg->ipfw.optlen)); 1511 break; 1512 default: 1513 ret = -1; 1514 errno = ENOTSUP; 1515 break; 1516 } 1517 1518 ff_close(fd); 1519 1520 done: 1521 if (ret < 0) { 1522 msg->result = errno; 1523 } else { 1524 msg->result = 0; 1525 } 1526 } 1527 #endif 1528 1529 static inline void 1530 handle_traffic_msg(struct ff_msg *msg) 1531 { 1532 msg->traffic = ff_traffic; 1533 msg->result = 0; 1534 } 1535 1536 #ifdef FF_KNI 1537 static inline void 1538 handle_knictl_msg(struct ff_msg *msg) 1539 { 1540 if (msg->knictl.kni_cmd == FF_KNICTL_CMD_SET){ 1541 switch (msg->knictl.kni_action){ 1542 case FF_KNICTL_ACTION_ALL_TO_FF: knictl_action = FF_KNICTL_ACTION_ALL_TO_FF; msg->result = 0; printf("new kni action: alltoff\n"); break; 1543 case FF_KNICTL_ACTION_ALL_TO_KNI: knictl_action = FF_KNICTL_ACTION_ALL_TO_KNI; msg->result = 0; printf("new kni action: alltokni\n"); break; 1544 case FF_KNICTL_ACTION_DEFAULT: knictl_action = FF_KNICTL_ACTION_DEFAULT; msg->result = 0; printf("new kni action: default\n"); break; 1545 default: msg->result = -1; 1546 } 1547 } 1548 else if (msg->knictl.kni_cmd == FF_KNICTL_CMD_GET){ 1549 msg->knictl.kni_action = knictl_action; 1550 } else { 1551 msg->result = -2; 1552 } 1553 } 1554 #endif 1555 1556 static inline void 1557 handle_default_msg(struct ff_msg *msg) 1558 { 1559 msg->result = ENOTSUP; 1560 } 1561 1562 static inline void 1563 handle_msg(struct ff_msg *msg, uint16_t proc_id) 1564 { 1565 switch (msg->msg_type) { 1566 case FF_SYSCTL: 1567 handle_sysctl_msg(msg); 1568 break; 1569 case FF_IOCTL: 1570 #ifdef INET6 1571 case FF_IOCTL6: 1572 #endif 1573 handle_ioctl_msg(msg); 1574 break; 1575 case FF_ROUTE: 1576 handle_route_msg(msg); 1577 break; 1578 case FF_TOP: 1579 handle_top_msg(msg); 1580 break; 1581 #ifdef FF_NETGRAPH 1582 case FF_NGCTL: 1583 handle_ngctl_msg(msg); 1584 break; 1585 #endif 1586 #ifdef FF_IPFW 1587 case FF_IPFW_CTL: 1588 handle_ipfw_msg(msg); 1589 break; 1590 #endif 1591 case FF_TRAFFIC: 1592 handle_traffic_msg(msg); 1593 break; 1594 #ifdef FF_KNI 1595 case FF_KNICTL: 1596 handle_knictl_msg(msg); 1597 break; 1598 #endif 1599 default: 1600 handle_default_msg(msg); 1601 break; 1602 } 1603 if (rte_ring_enqueue(msg_ring[proc_id].ring[msg->msg_type], msg) < 0) { 1604 if (msg->original_buf) { 1605 rte_free(msg->buf_addr); 1606 msg->buf_addr = msg->original_buf; 1607 msg->buf_len = msg->original_buf_len; 1608 msg->original_buf = NULL; 1609 } 1610 1611 rte_mempool_put(message_pool, msg); 1612 } 1613 } 1614 1615 static inline int 1616 process_msg_ring(uint16_t proc_id, struct rte_mbuf **pkts_burst) 1617 { 1618 /* read msg from ring buf and to process */ 1619 uint16_t nb_rb; 1620 int i; 1621 1622 nb_rb = rte_ring_dequeue_burst(msg_ring[proc_id].ring[0], 1623 (void **)pkts_burst, MAX_PKT_BURST, NULL); 1624 1625 if (likely(nb_rb == 0)) 1626 return 0; 1627 1628 for (i = 0; i < nb_rb; ++i) { 1629 handle_msg((struct ff_msg *)pkts_burst[i], proc_id); 1630 } 1631 1632 return 0; 1633 } 1634 1635 /* Send burst of packets on an output interface */ 1636 static inline int 1637 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) 1638 { 1639 struct rte_mbuf **m_table; 1640 int ret; 1641 uint16_t queueid; 1642 1643 queueid = qconf->tx_queue_id[port]; 1644 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 1645 1646 if (unlikely(ff_global_cfg.pcap.enable)) { 1647 uint16_t i; 1648 for (i = 0; i < n; i++) { 1649 ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i], 1650 ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len); 1651 } 1652 } 1653 1654 ret = rte_eth_tx_burst(port, queueid, m_table, n); 1655 ff_traffic.tx_packets += ret; 1656 uint16_t i; 1657 for (i = 0; i < ret; i++) { 1658 ff_traffic.tx_bytes += rte_pktmbuf_pkt_len(m_table[i]); 1659 #ifdef FF_USE_PAGE_ARRAY 1660 if (qconf->tx_mbufs[port].bsd_m_table[i]) 1661 ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs); 1662 #endif 1663 } 1664 if (unlikely(ret < n)) { 1665 do { 1666 rte_pktmbuf_free(m_table[ret]); 1667 #ifdef FF_USE_PAGE_ARRAY 1668 if ( qconf->tx_mbufs[port].bsd_m_table[ret] ) 1669 ff_mbuf_free(qconf->tx_mbufs[port].bsd_m_table[ret]); 1670 #endif 1671 } while (++ret < n); 1672 } 1673 return 0; 1674 } 1675 1676 /* Enqueue a single packet, and send burst if queue is filled */ 1677 static inline int 1678 send_single_packet(struct rte_mbuf *m, uint8_t port) 1679 { 1680 uint16_t len; 1681 struct lcore_conf *qconf; 1682 1683 qconf = &lcore_conf; 1684 len = qconf->tx_mbufs[port].len; 1685 qconf->tx_mbufs[port].m_table[len] = m; 1686 len++; 1687 1688 /* enough pkts to be sent */ 1689 if (unlikely(len == MAX_PKT_BURST)) { 1690 send_burst(qconf, MAX_PKT_BURST, port); 1691 len = 0; 1692 } 1693 1694 qconf->tx_mbufs[port].len = len; 1695 return 0; 1696 } 1697 1698 int 1699 ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m, 1700 int total) 1701 { 1702 #ifdef FF_USE_PAGE_ARRAY 1703 struct lcore_conf *qconf = &lcore_conf; 1704 int len = 0; 1705 1706 len = ff_if_send_onepkt(ctx, m,total); 1707 if (unlikely(len == MAX_PKT_BURST)) { 1708 send_burst(qconf, MAX_PKT_BURST, ctx->port_id); 1709 len = 0; 1710 } 1711 qconf->tx_mbufs[ctx->port_id].len = len; 1712 return 0; 1713 #endif 1714 struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id]; 1715 struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool); 1716 if (head == NULL) { 1717 ff_mbuf_free(m); 1718 return -1; 1719 } 1720 1721 head->pkt_len = total; 1722 head->nb_segs = 0; 1723 1724 int off = 0; 1725 struct rte_mbuf *cur = head, *prev = NULL; 1726 while(total > 0) { 1727 if (cur == NULL) { 1728 cur = rte_pktmbuf_alloc(mbuf_pool); 1729 if (cur == NULL) { 1730 rte_pktmbuf_free(head); 1731 ff_mbuf_free(m); 1732 return -1; 1733 } 1734 } 1735 1736 if (prev != NULL) { 1737 prev->next = cur; 1738 } 1739 head->nb_segs++; 1740 1741 prev = cur; 1742 void *data = rte_pktmbuf_mtod(cur, void*); 1743 int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total; 1744 int ret = ff_mbuf_copydata(m, data, off, len); 1745 if (ret < 0) { 1746 rte_pktmbuf_free(head); 1747 ff_mbuf_free(m); 1748 return -1; 1749 } 1750 1751 1752 cur->data_len = len; 1753 off += len; 1754 total -= len; 1755 cur = NULL; 1756 } 1757 1758 struct ff_tx_offload offload = {0}; 1759 ff_mbuf_tx_offload(m, &offload); 1760 1761 void *data = rte_pktmbuf_mtod(head, void*); 1762 1763 if (offload.ip_csum) { 1764 /* ipv6 not supported yet */ 1765 struct rte_ipv4_hdr *iph; 1766 int iph_len; 1767 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1768 iph_len = (iph->version_ihl & 0x0f) << 2; 1769 1770 head->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; 1771 head->l2_len = RTE_ETHER_HDR_LEN; 1772 head->l3_len = iph_len; 1773 } 1774 1775 if (ctx->hw_features.tx_csum_l4) { 1776 struct rte_ipv4_hdr *iph; 1777 int iph_len; 1778 iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN); 1779 iph_len = (iph->version_ihl & 0x0f) << 2; 1780 1781 if (offload.tcp_csum) { 1782 head->ol_flags |= PKT_TX_TCP_CKSUM; 1783 head->l2_len = RTE_ETHER_HDR_LEN; 1784 head->l3_len = iph_len; 1785 } 1786 1787 /* 1788 * TCP segmentation offload. 1789 * 1790 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag 1791 * implies PKT_TX_TCP_CKSUM) 1792 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 1793 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and 1794 * write the IP checksum to 0 in the packet 1795 * - fill the mbuf offload information: l2_len, 1796 * l3_len, l4_len, tso_segsz 1797 * - calculate the pseudo header checksum without taking ip_len 1798 * in account, and set it in the TCP header. Refer to 1799 * rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum() that can be 1800 * used as helpers. 1801 */ 1802 if (offload.tso_seg_size) { 1803 struct rte_tcp_hdr *tcph; 1804 int tcph_len; 1805 tcph = (struct rte_tcp_hdr *)((char *)iph + iph_len); 1806 tcph_len = (tcph->data_off & 0xf0) >> 2; 1807 tcph->cksum = rte_ipv4_phdr_cksum(iph, PKT_TX_TCP_SEG); 1808 1809 head->ol_flags |= PKT_TX_TCP_SEG; 1810 head->l4_len = tcph_len; 1811 head->tso_segsz = offload.tso_seg_size; 1812 } 1813 1814 if (offload.udp_csum) { 1815 head->ol_flags |= PKT_TX_UDP_CKSUM; 1816 head->l2_len = RTE_ETHER_HDR_LEN; 1817 head->l3_len = iph_len; 1818 } 1819 } 1820 1821 ff_mbuf_free(m); 1822 1823 return send_single_packet(head, ctx->port_id); 1824 } 1825 1826 static int 1827 main_loop(void *arg) 1828 { 1829 struct loop_routine *lr = (struct loop_routine *)arg; 1830 1831 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1832 uint64_t prev_tsc, diff_tsc, cur_tsc, usch_tsc, div_tsc, usr_tsc, sys_tsc, end_tsc, idle_sleep_tsc; 1833 int i, j, nb_rx, idle; 1834 uint16_t port_id, queue_id; 1835 struct lcore_conf *qconf; 1836 uint64_t drain_tsc = 0; 1837 struct ff_dpdk_if_context *ctx; 1838 1839 if (pkt_tx_delay) { 1840 drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * pkt_tx_delay; 1841 } 1842 1843 prev_tsc = 0; 1844 usch_tsc = 0; 1845 1846 qconf = &lcore_conf; 1847 1848 while (1) { 1849 cur_tsc = rte_rdtsc(); 1850 if (unlikely(freebsd_clock.expire < cur_tsc)) { 1851 rte_timer_manage(); 1852 } 1853 1854 idle = 1; 1855 sys_tsc = 0; 1856 usr_tsc = 0; 1857 1858 /* 1859 * TX burst queue drain 1860 */ 1861 diff_tsc = cur_tsc - prev_tsc; 1862 if (unlikely(diff_tsc >= drain_tsc)) { 1863 for (i = 0; i < qconf->nb_tx_port; i++) { 1864 port_id = qconf->tx_port_id[i]; 1865 if (qconf->tx_mbufs[port_id].len == 0) 1866 continue; 1867 1868 idle = 0; 1869 1870 send_burst(qconf, 1871 qconf->tx_mbufs[port_id].len, 1872 port_id); 1873 qconf->tx_mbufs[port_id].len = 0; 1874 } 1875 1876 prev_tsc = cur_tsc; 1877 } 1878 1879 /* 1880 * Read packet from RX queues 1881 */ 1882 for (i = 0; i < qconf->nb_rx_queue; ++i) { 1883 port_id = qconf->rx_queue_list[i].port_id; 1884 queue_id = qconf->rx_queue_list[i].queue_id; 1885 ctx = veth_ctx[port_id]; 1886 1887 #ifdef FF_KNI 1888 if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) { 1889 ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST); 1890 } 1891 #endif 1892 1893 process_dispatch_ring(port_id, queue_id, pkts_burst, ctx); 1894 1895 nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst, 1896 MAX_PKT_BURST); 1897 if (nb_rx == 0) 1898 continue; 1899 1900 idle = 0; 1901 1902 /* Prefetch first packets */ 1903 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1904 rte_prefetch0(rte_pktmbuf_mtod( 1905 pkts_burst[j], void *)); 1906 } 1907 1908 /* Prefetch and handle already prefetched packets */ 1909 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1910 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1911 j + PREFETCH_OFFSET], void *)); 1912 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1913 } 1914 1915 /* Handle remaining prefetched packets */ 1916 for (; j < nb_rx; j++) { 1917 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0); 1918 } 1919 } 1920 1921 process_msg_ring(qconf->proc_id, pkts_burst); 1922 1923 div_tsc = rte_rdtsc(); 1924 1925 if (likely(lr->loop != NULL && (!idle || cur_tsc - usch_tsc >= drain_tsc))) { 1926 usch_tsc = cur_tsc; 1927 lr->loop(lr->arg); 1928 } 1929 1930 idle_sleep_tsc = rte_rdtsc(); 1931 if (likely(idle && idle_sleep)) { 1932 usleep(idle_sleep); 1933 end_tsc = rte_rdtsc(); 1934 } else { 1935 end_tsc = idle_sleep_tsc; 1936 } 1937 1938 if (usch_tsc == cur_tsc) { 1939 usr_tsc = idle_sleep_tsc - div_tsc; 1940 } 1941 1942 if (!idle) { 1943 sys_tsc = div_tsc - cur_tsc; 1944 ff_top_status.sys_tsc += sys_tsc; 1945 } 1946 1947 ff_top_status.usr_tsc += usr_tsc; 1948 ff_top_status.work_tsc += end_tsc - cur_tsc; 1949 ff_top_status.idle_tsc += end_tsc - cur_tsc - usr_tsc - sys_tsc; 1950 1951 ff_top_status.loops++; 1952 } 1953 1954 return 0; 1955 } 1956 1957 int 1958 ff_dpdk_if_up(void) { 1959 int i; 1960 struct lcore_conf *qconf = &lcore_conf; 1961 for (i = 0; i < qconf->nb_tx_port; i++) { 1962 uint16_t port_id = qconf->tx_port_id[i]; 1963 1964 struct ff_port_cfg *pconf = &qconf->port_cfgs[port_id]; 1965 veth_ctx[port_id] = ff_veth_attach(pconf); 1966 if (veth_ctx[port_id] == NULL) { 1967 rte_exit(EXIT_FAILURE, "ff_veth_attach failed"); 1968 } 1969 } 1970 1971 return 0; 1972 } 1973 1974 void 1975 ff_dpdk_run(loop_func_t loop, void *arg) { 1976 struct loop_routine *lr = rte_malloc(NULL, 1977 sizeof(struct loop_routine), 0); 1978 lr->loop = loop; 1979 lr->arg = arg; 1980 rte_eal_mp_remote_launch(main_loop, lr, CALL_MASTER); 1981 rte_eal_mp_wait_lcore(); 1982 rte_free(lr); 1983 } 1984 1985 void 1986 ff_dpdk_pktmbuf_free(void *m) 1987 { 1988 rte_pktmbuf_free_seg((struct rte_mbuf *)m); 1989 } 1990 1991 static uint32_t 1992 toeplitz_hash(unsigned keylen, const uint8_t *key, 1993 unsigned datalen, const uint8_t *data) 1994 { 1995 uint32_t hash = 0, v; 1996 u_int i, b; 1997 1998 /* XXXRW: Perhaps an assertion about key length vs. data length? */ 1999 2000 v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3]; 2001 for (i = 0; i < datalen; i++) { 2002 for (b = 0; b < 8; b++) { 2003 if (data[i] & (1<<(7-b))) 2004 hash ^= v; 2005 v <<= 1; 2006 if ((i + 4) < keylen && 2007 (key[i+4] & (1<<(7-b)))) 2008 v |= 1; 2009 } 2010 } 2011 return (hash); 2012 } 2013 2014 int 2015 ff_in_pcbladdr(uint16_t family, void *faddr, uint16_t fport, void *laddr) 2016 { 2017 int ret = 0; 2018 uint16_t fa; 2019 2020 if (!pcblddr_fun) 2021 return ret; 2022 2023 if (family == AF_INET) 2024 fa = AF_INET; 2025 else if (family == AF_INET6_FREEBSD) 2026 fa = AF_INET6_LINUX; 2027 else 2028 return EADDRNOTAVAIL; 2029 2030 ret = (*pcblddr_fun)(fa, faddr, fport, laddr); 2031 2032 return ret; 2033 } 2034 2035 void 2036 ff_regist_pcblddr_fun(pcblddr_func_t func) 2037 { 2038 pcblddr_fun = func; 2039 } 2040 2041 int 2042 ff_rss_check(void *softc, uint32_t saddr, uint32_t daddr, 2043 uint16_t sport, uint16_t dport) 2044 { 2045 struct lcore_conf *qconf = &lcore_conf; 2046 struct ff_dpdk_if_context *ctx = ff_veth_softc_to_hostc(softc); 2047 uint16_t nb_queues = qconf->nb_queue_list[ctx->port_id]; 2048 2049 if (nb_queues <= 1) { 2050 return 1; 2051 } 2052 2053 uint16_t reta_size = rss_reta_size[ctx->port_id]; 2054 uint16_t queueid = qconf->tx_queue_id[ctx->port_id]; 2055 2056 uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) + 2057 sizeof(dport)]; 2058 2059 unsigned datalen = 0; 2060 2061 bcopy(&saddr, &data[datalen], sizeof(saddr)); 2062 datalen += sizeof(saddr); 2063 2064 bcopy(&daddr, &data[datalen], sizeof(daddr)); 2065 datalen += sizeof(daddr); 2066 2067 bcopy(&sport, &data[datalen], sizeof(sport)); 2068 datalen += sizeof(sport); 2069 2070 bcopy(&dport, &data[datalen], sizeof(dport)); 2071 datalen += sizeof(dport); 2072 2073 uint32_t hash = 0; 2074 hash = toeplitz_hash(rsskey_len, rsskey, datalen, data); 2075 2076 return ((hash & (reta_size - 1)) % nb_queues) == queueid; 2077 } 2078 2079 void 2080 ff_regist_packet_dispatcher(dispatch_func_t func) 2081 { 2082 packet_dispatcher = func; 2083 } 2084 2085 uint64_t 2086 ff_get_tsc_ns() 2087 { 2088 uint64_t cur_tsc = rte_rdtsc(); 2089 uint64_t hz = rte_get_tsc_hz(); 2090 return ((double)cur_tsc/(double)hz) * NS_PER_S; 2091 } 2092 2093