1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #ifndef _GNU_SOURCE 6 #define _GNU_SOURCE 7 #endif 8 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <stdint.h> 12 #include <inttypes.h> 13 #include <sys/types.h> 14 #include <string.h> 15 #include <sys/queue.h> 16 #include <stdarg.h> 17 #include <errno.h> 18 #include <getopt.h> 19 #include <sched.h> 20 21 #include <rte_common.h> 22 #include <rte_vect.h> 23 #include <rte_byteorder.h> 24 #include <rte_log.h> 25 #include <rte_memory.h> 26 #include <rte_memcpy.h> 27 #include <rte_eal.h> 28 #include <rte_launch.h> 29 #include <rte_atomic.h> 30 #include <rte_cycles.h> 31 #include <rte_prefetch.h> 32 #include <rte_lcore.h> 33 #include <rte_per_lcore.h> 34 #include <rte_branch_prediction.h> 35 #include <rte_interrupts.h> 36 #include <rte_random.h> 37 #include <rte_debug.h> 38 #include <rte_ether.h> 39 #include <rte_ethdev.h> 40 #include <rte_ring.h> 41 #include <rte_mempool.h> 42 #include <rte_mbuf.h> 43 #include <rte_ip.h> 44 #include <rte_tcp.h> 45 #include <rte_udp.h> 46 #include <rte_string_fns.h> 47 #include <rte_pause.h> 48 #include <rte_timer.h> 49 50 #include <cmdline_parse.h> 51 #include <cmdline_parse_etheraddr.h> 52 53 #include <lthread_api.h> 54 55 #define APP_LOOKUP_EXACT_MATCH 0 56 #define APP_LOOKUP_LPM 1 57 #define DO_RFC_1812_CHECKS 58 59 /* Enable cpu-load stats 0-off, 1-on */ 60 #define APP_CPU_LOAD 1 61 62 #ifndef APP_LOOKUP_METHOD 63 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 64 #endif 65 66 #ifndef __GLIBC__ /* sched_getcpu() is glibc specific */ 67 #define sched_getcpu() rte_lcore_id() 68 #endif 69 70 static int 71 check_ptype(int portid) 72 { 73 int i, ret; 74 int ipv4 = 0, ipv6 = 0; 75 76 ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL, 77 0); 78 if (ret <= 0) 79 return 0; 80 81 uint32_t ptypes[ret]; 82 83 ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, 84 ptypes, ret); 85 for (i = 0; i < ret; ++i) { 86 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 87 ipv4 = 1; 88 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 89 ipv6 = 1; 90 } 91 92 if (ipv4 && ipv6) 93 return 1; 94 95 return 0; 96 } 97 98 static inline void 99 parse_ptype(struct rte_mbuf *m) 100 { 101 struct rte_ether_hdr *eth_hdr; 102 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 103 uint16_t ether_type; 104 105 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 106 ether_type = eth_hdr->ether_type; 107 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 108 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 109 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 110 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 111 112 m->packet_type = packet_type; 113 } 114 115 static uint16_t 116 cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue, 117 struct rte_mbuf *pkts[], uint16_t nb_pkts, 118 __rte_unused uint16_t max_pkts, __rte_unused void *user_param) 119 { 120 unsigned int i; 121 122 for (i = 0; i < nb_pkts; i++) 123 parse_ptype(pkts[i]); 124 125 return nb_pkts; 126 } 127 128 /* 129 * When set to zero, simple forwaring path is eanbled. 130 * When set to one, optimized forwarding path is enabled. 131 * Note that LPM optimisation path uses SSE4.1 instructions. 132 */ 133 #define ENABLE_MULTI_BUFFER_OPTIMIZE 1 134 135 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 136 #include <rte_hash.h> 137 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 138 #include <rte_lpm.h> 139 #include <rte_lpm6.h> 140 #else 141 #error "APP_LOOKUP_METHOD set to incorrect value" 142 #endif 143 144 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 145 146 #define MAX_JUMBO_PKT_LEN 9600 147 148 #define IPV6_ADDR_LEN 16 149 150 #define MEMPOOL_CACHE_SIZE 256 151 152 /* 153 * This expression is used to calculate the number of mbufs needed depending on 154 * user input, taking into account memory for rx and tx hardware rings, cache 155 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 156 * NB_MBUF never goes below a minimum value of 8192 157 */ 158 159 #define NB_MBUF RTE_MAX(\ 160 (nb_ports*nb_rx_queue*nb_rxd + \ 161 nb_ports*nb_lcores*MAX_PKT_BURST + \ 162 nb_ports*n_tx_queue*nb_txd + \ 163 nb_lcores*MEMPOOL_CACHE_SIZE), \ 164 (unsigned)8192) 165 166 #define MAX_PKT_BURST 32 167 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 168 169 /* 170 * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. 171 */ 172 #define MAX_TX_BURST (MAX_PKT_BURST / 2) 173 #define BURST_SIZE MAX_TX_BURST 174 175 #define NB_SOCKETS 8 176 177 /* Configure how many packets ahead to prefetch, when reading packets */ 178 #define PREFETCH_OFFSET 3 179 180 /* Used to mark destination port as 'invalid'. */ 181 #define BAD_PORT ((uint16_t)-1) 182 183 #define FWDSTEP 4 184 185 /* 186 * Configurable number of RX/TX ring descriptors 187 */ 188 #define RTE_TEST_RX_DESC_DEFAULT 1024 189 #define RTE_TEST_TX_DESC_DEFAULT 1024 190 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 191 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 192 193 /* ethernet addresses of ports */ 194 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; 195 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 196 197 static xmm_t val_eth[RTE_MAX_ETHPORTS]; 198 199 /* replace first 12B of the ethernet header. */ 200 #define MASK_ETH 0x3f 201 202 /* mask of enabled ports */ 203 static uint32_t enabled_port_mask; 204 static int promiscuous_on; /**< Set in promiscuous mode off by default. */ 205 static int numa_on = 1; /**< NUMA is enabled by default. */ 206 static int parse_ptype_on; 207 208 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 209 static int ipv6; /**< ipv6 is false by default. */ 210 #endif 211 212 #if (APP_CPU_LOAD == 1) 213 214 #define MAX_CPU RTE_MAX_LCORE 215 #define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000) /**< Timeout for collecting 5s */ 216 217 #define CPU_PROCESS 0 218 #define CPU_POLL 1 219 #define MAX_CPU_COUNTER 2 220 221 struct cpu_load { 222 uint16_t n_cpu; 223 uint64_t counter; 224 uint64_t hits[MAX_CPU_COUNTER][MAX_CPU]; 225 } __rte_cache_aligned; 226 227 static struct cpu_load cpu_load; 228 static int cpu_load_lcore_id = -1; 229 230 #define SET_CPU_BUSY(thread, counter) \ 231 thread->conf.busy[counter] = 1 232 233 #define SET_CPU_IDLE(thread, counter) \ 234 thread->conf.busy[counter] = 0 235 236 #define IS_CPU_BUSY(thread, counter) \ 237 (thread->conf.busy[counter] > 0) 238 239 #else 240 241 #define SET_CPU_BUSY(thread, counter) 242 #define SET_CPU_IDLE(thread, counter) 243 #define IS_CPU_BUSY(thread, counter) 0 244 245 #endif 246 247 struct mbuf_table { 248 uint16_t len; 249 struct rte_mbuf *m_table[MAX_PKT_BURST]; 250 }; 251 252 struct lcore_rx_queue { 253 uint16_t port_id; 254 uint8_t queue_id; 255 } __rte_cache_aligned; 256 257 #define MAX_RX_QUEUE_PER_LCORE 16 258 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 259 #define MAX_RX_QUEUE_PER_PORT 128 260 261 #define MAX_LCORE_PARAMS 1024 262 struct rx_thread_params { 263 uint16_t port_id; 264 uint8_t queue_id; 265 uint8_t lcore_id; 266 uint8_t thread_id; 267 } __rte_cache_aligned; 268 269 static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS]; 270 static struct rx_thread_params rx_thread_params_array_default[] = { 271 {0, 0, 2, 0}, 272 {0, 1, 2, 1}, 273 {0, 2, 2, 2}, 274 {1, 0, 2, 3}, 275 {1, 1, 2, 4}, 276 {1, 2, 2, 5}, 277 {2, 0, 2, 6}, 278 {3, 0, 3, 7}, 279 {3, 1, 3, 8}, 280 }; 281 282 static struct rx_thread_params *rx_thread_params = 283 rx_thread_params_array_default; 284 static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default); 285 286 struct tx_thread_params { 287 uint8_t lcore_id; 288 uint8_t thread_id; 289 } __rte_cache_aligned; 290 291 static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS]; 292 static struct tx_thread_params tx_thread_params_array_default[] = { 293 {4, 0}, 294 {5, 1}, 295 {6, 2}, 296 {7, 3}, 297 {8, 4}, 298 {9, 5}, 299 {10, 6}, 300 {11, 7}, 301 {12, 8}, 302 }; 303 304 static struct tx_thread_params *tx_thread_params = 305 tx_thread_params_array_default; 306 static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default); 307 308 static struct rte_eth_conf port_conf = { 309 .rxmode = { 310 .mq_mode = ETH_MQ_RX_RSS, 311 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 312 .split_hdr_size = 0, 313 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 314 }, 315 .rx_adv_conf = { 316 .rss_conf = { 317 .rss_key = NULL, 318 .rss_hf = ETH_RSS_TCP, 319 }, 320 }, 321 .txmode = { 322 .mq_mode = ETH_MQ_TX_NONE, 323 }, 324 }; 325 326 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 327 328 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 329 330 #include <rte_hash_crc.h> 331 #define DEFAULT_HASH_FUNC rte_hash_crc 332 333 struct ipv4_5tuple { 334 uint32_t ip_dst; 335 uint32_t ip_src; 336 uint16_t port_dst; 337 uint16_t port_src; 338 uint8_t proto; 339 } __rte_packed; 340 341 union ipv4_5tuple_host { 342 struct { 343 uint8_t pad0; 344 uint8_t proto; 345 uint16_t pad1; 346 uint32_t ip_src; 347 uint32_t ip_dst; 348 uint16_t port_src; 349 uint16_t port_dst; 350 }; 351 __m128i xmm; 352 }; 353 354 #define XMM_NUM_IN_IPV6_5TUPLE 3 355 356 struct ipv6_5tuple { 357 uint8_t ip_dst[IPV6_ADDR_LEN]; 358 uint8_t ip_src[IPV6_ADDR_LEN]; 359 uint16_t port_dst; 360 uint16_t port_src; 361 uint8_t proto; 362 } __rte_packed; 363 364 union ipv6_5tuple_host { 365 struct { 366 uint16_t pad0; 367 uint8_t proto; 368 uint8_t pad1; 369 uint8_t ip_src[IPV6_ADDR_LEN]; 370 uint8_t ip_dst[IPV6_ADDR_LEN]; 371 uint16_t port_src; 372 uint16_t port_dst; 373 uint64_t reserve; 374 }; 375 __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE]; 376 }; 377 378 struct ipv4_l3fwd_route { 379 struct ipv4_5tuple key; 380 uint8_t if_out; 381 }; 382 383 struct ipv6_l3fwd_route { 384 struct ipv6_5tuple key; 385 uint8_t if_out; 386 }; 387 388 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 389 {{RTE_IPV4(101, 0, 0, 0), RTE_IPV4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0}, 390 {{RTE_IPV4(201, 0, 0, 0), RTE_IPV4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1}, 391 {{RTE_IPV4(111, 0, 0, 0), RTE_IPV4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2}, 392 {{RTE_IPV4(211, 0, 0, 0), RTE_IPV4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3}, 393 }; 394 395 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 396 {{ 397 {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 398 {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 399 0x05}, 400 101, 11, IPPROTO_TCP}, 0}, 401 402 {{ 403 {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 404 {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 405 0x05}, 406 102, 12, IPPROTO_TCP}, 1}, 407 408 {{ 409 {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 410 {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 411 0x05}, 412 101, 11, IPPROTO_TCP}, 2}, 413 414 {{ 415 {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 416 {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 417 0x05}, 418 102, 12, IPPROTO_TCP}, 3}, 419 }; 420 421 typedef struct rte_hash lookup_struct_t; 422 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 423 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 424 425 #ifdef RTE_ARCH_X86_64 426 /* default to 4 million hash entries (approx) */ 427 #define L3FWD_HASH_ENTRIES (1024*1024*4) 428 #else 429 /* 32-bit has less address-space for hugepage memory, limit to 1M entries */ 430 #define L3FWD_HASH_ENTRIES (1024*1024*1) 431 #endif 432 #define HASH_ENTRY_NUMBER_DEFAULT 4 433 434 static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT; 435 436 static inline uint32_t 437 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, 438 uint32_t init_val) 439 { 440 const union ipv4_5tuple_host *k; 441 uint32_t t; 442 const uint32_t *p; 443 444 k = data; 445 t = k->proto; 446 p = (const uint32_t *)&k->port_src; 447 448 init_val = rte_hash_crc_4byte(t, init_val); 449 init_val = rte_hash_crc_4byte(k->ip_src, init_val); 450 init_val = rte_hash_crc_4byte(k->ip_dst, init_val); 451 init_val = rte_hash_crc_4byte(*p, init_val); 452 return init_val; 453 } 454 455 static inline uint32_t 456 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, 457 uint32_t init_val) 458 { 459 const union ipv6_5tuple_host *k; 460 uint32_t t; 461 const uint32_t *p; 462 const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; 463 const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; 464 465 k = data; 466 t = k->proto; 467 p = (const uint32_t *)&k->port_src; 468 469 ip_src0 = (const uint32_t *) k->ip_src; 470 ip_src1 = (const uint32_t *)(k->ip_src + 4); 471 ip_src2 = (const uint32_t *)(k->ip_src + 8); 472 ip_src3 = (const uint32_t *)(k->ip_src + 12); 473 ip_dst0 = (const uint32_t *) k->ip_dst; 474 ip_dst1 = (const uint32_t *)(k->ip_dst + 4); 475 ip_dst2 = (const uint32_t *)(k->ip_dst + 8); 476 ip_dst3 = (const uint32_t *)(k->ip_dst + 12); 477 init_val = rte_hash_crc_4byte(t, init_val); 478 init_val = rte_hash_crc_4byte(*ip_src0, init_val); 479 init_val = rte_hash_crc_4byte(*ip_src1, init_val); 480 init_val = rte_hash_crc_4byte(*ip_src2, init_val); 481 init_val = rte_hash_crc_4byte(*ip_src3, init_val); 482 init_val = rte_hash_crc_4byte(*ip_dst0, init_val); 483 init_val = rte_hash_crc_4byte(*ip_dst1, init_val); 484 init_val = rte_hash_crc_4byte(*ip_dst2, init_val); 485 init_val = rte_hash_crc_4byte(*ip_dst3, init_val); 486 init_val = rte_hash_crc_4byte(*p, init_val); 487 return init_val; 488 } 489 490 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) 491 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) 492 493 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 494 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 495 496 #endif 497 498 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 499 struct ipv4_l3fwd_route { 500 uint32_t ip; 501 uint8_t depth; 502 uint8_t if_out; 503 }; 504 505 struct ipv6_l3fwd_route { 506 uint8_t ip[16]; 507 uint8_t depth; 508 uint8_t if_out; 509 }; 510 511 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 512 {RTE_IPV4(1, 1, 1, 0), 24, 0}, 513 {RTE_IPV4(2, 1, 1, 0), 24, 1}, 514 {RTE_IPV4(3, 1, 1, 0), 24, 2}, 515 {RTE_IPV4(4, 1, 1, 0), 24, 3}, 516 {RTE_IPV4(5, 1, 1, 0), 24, 4}, 517 {RTE_IPV4(6, 1, 1, 0), 24, 5}, 518 {RTE_IPV4(7, 1, 1, 0), 24, 6}, 519 {RTE_IPV4(8, 1, 1, 0), 24, 7}, 520 }; 521 522 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 523 {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0}, 524 {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1}, 525 {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2}, 526 {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3}, 527 {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4}, 528 {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5}, 529 {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6}, 530 {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7}, 531 }; 532 533 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) 534 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) 535 536 #define IPV4_L3FWD_LPM_MAX_RULES 1024 537 #define IPV6_L3FWD_LPM_MAX_RULES 1024 538 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) 539 540 typedef struct rte_lpm lookup_struct_t; 541 typedef struct rte_lpm6 lookup6_struct_t; 542 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 543 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 544 #endif 545 546 struct lcore_conf { 547 lookup_struct_t *ipv4_lookup_struct; 548 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 549 lookup6_struct_t *ipv6_lookup_struct; 550 #else 551 lookup_struct_t *ipv6_lookup_struct; 552 #endif 553 void *data; 554 } __rte_cache_aligned; 555 556 static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; 557 RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf); 558 559 #define MAX_RX_QUEUE_PER_THREAD 16 560 #define MAX_TX_PORT_PER_THREAD RTE_MAX_ETHPORTS 561 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 562 #define MAX_RX_QUEUE_PER_PORT 128 563 564 #define MAX_RX_THREAD 1024 565 #define MAX_TX_THREAD 1024 566 #define MAX_THREAD (MAX_RX_THREAD + MAX_TX_THREAD) 567 568 /** 569 * Producers and consumers threads configuration 570 */ 571 static int lthreads_on = 1; /**< Use lthreads for processing*/ 572 573 rte_atomic16_t rx_counter; /**< Number of spawned rx threads */ 574 rte_atomic16_t tx_counter; /**< Number of spawned tx threads */ 575 576 struct thread_conf { 577 uint16_t lcore_id; /**< Initial lcore for rx thread */ 578 uint16_t cpu_id; /**< Cpu id for cpu load stats counter */ 579 uint16_t thread_id; /**< Thread ID */ 580 581 #if (APP_CPU_LOAD > 0) 582 int busy[MAX_CPU_COUNTER]; 583 #endif 584 }; 585 586 struct thread_rx_conf { 587 struct thread_conf conf; 588 589 uint16_t n_rx_queue; 590 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 591 592 uint16_t n_ring; /**< Number of output rings */ 593 struct rte_ring *ring[RTE_MAX_LCORE]; 594 struct lthread_cond *ready[RTE_MAX_LCORE]; 595 596 #if (APP_CPU_LOAD > 0) 597 int busy[MAX_CPU_COUNTER]; 598 #endif 599 } __rte_cache_aligned; 600 601 uint16_t n_rx_thread; 602 struct thread_rx_conf rx_thread[MAX_RX_THREAD]; 603 604 struct thread_tx_conf { 605 struct thread_conf conf; 606 607 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 608 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; 609 610 struct rte_ring *ring; 611 struct lthread_cond **ready; 612 613 } __rte_cache_aligned; 614 615 uint16_t n_tx_thread; 616 struct thread_tx_conf tx_thread[MAX_TX_THREAD]; 617 618 /* Send burst of packets on an output interface */ 619 static inline int 620 send_burst(struct thread_tx_conf *qconf, uint16_t n, uint16_t port) 621 { 622 struct rte_mbuf **m_table; 623 int ret; 624 uint16_t queueid; 625 626 queueid = qconf->tx_queue_id[port]; 627 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 628 629 ret = rte_eth_tx_burst(port, queueid, m_table, n); 630 if (unlikely(ret < n)) { 631 do { 632 rte_pktmbuf_free(m_table[ret]); 633 } while (++ret < n); 634 } 635 636 return 0; 637 } 638 639 /* Enqueue a single packet, and send burst if queue is filled */ 640 static inline int 641 send_single_packet(struct rte_mbuf *m, uint16_t port) 642 { 643 uint16_t len; 644 struct thread_tx_conf *qconf; 645 646 if (lthreads_on) 647 qconf = (struct thread_tx_conf *)lthread_get_data(); 648 else 649 qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; 650 651 len = qconf->tx_mbufs[port].len; 652 qconf->tx_mbufs[port].m_table[len] = m; 653 len++; 654 655 /* enough pkts to be sent */ 656 if (unlikely(len == MAX_PKT_BURST)) { 657 send_burst(qconf, MAX_PKT_BURST, port); 658 len = 0; 659 } 660 661 qconf->tx_mbufs[port].len = len; 662 return 0; 663 } 664 665 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 666 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 667 static __rte_always_inline void 668 send_packetsx4(uint16_t port, 669 struct rte_mbuf *m[], uint32_t num) 670 { 671 uint32_t len, j, n; 672 struct thread_tx_conf *qconf; 673 674 if (lthreads_on) 675 qconf = (struct thread_tx_conf *)lthread_get_data(); 676 else 677 qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; 678 679 len = qconf->tx_mbufs[port].len; 680 681 /* 682 * If TX buffer for that queue is empty, and we have enough packets, 683 * then send them straightway. 684 */ 685 if (num >= MAX_TX_BURST && len == 0) { 686 n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); 687 if (unlikely(n < num)) { 688 do { 689 rte_pktmbuf_free(m[n]); 690 } while (++n < num); 691 } 692 return; 693 } 694 695 /* 696 * Put packets into TX buffer for that queue. 697 */ 698 699 n = len + num; 700 n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; 701 702 j = 0; 703 switch (n % FWDSTEP) { 704 while (j < n) { 705 case 0: 706 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 707 j++; 708 /* fall-through */ 709 case 3: 710 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 711 j++; 712 /* fall-through */ 713 case 2: 714 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 715 j++; 716 /* fall-through */ 717 case 1: 718 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 719 j++; 720 } 721 } 722 723 len += n; 724 725 /* enough pkts to be sent */ 726 if (unlikely(len == MAX_PKT_BURST)) { 727 728 send_burst(qconf, MAX_PKT_BURST, port); 729 730 /* copy rest of the packets into the TX buffer. */ 731 len = num - n; 732 j = 0; 733 switch (len % FWDSTEP) { 734 while (j < len) { 735 case 0: 736 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 737 j++; 738 /* fall-through */ 739 case 3: 740 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 741 j++; 742 /* fall-through */ 743 case 2: 744 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 745 j++; 746 /* fall-through */ 747 case 1: 748 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 749 j++; 750 } 751 } 752 } 753 754 qconf->tx_mbufs[port].len = len; 755 } 756 #endif /* APP_LOOKUP_LPM */ 757 758 #ifdef DO_RFC_1812_CHECKS 759 static inline int 760 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 761 { 762 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 763 /* 764 * 1. The packet length reported by the Link Layer must be large 765 * enough to hold the minimum length legal IP datagram (20 bytes). 766 */ 767 if (link_len < sizeof(struct rte_ipv4_hdr)) 768 return -1; 769 770 /* 2. The IP checksum must be correct. */ 771 /* this is checked in H/W */ 772 773 /* 774 * 3. The IP version number must be 4. If the version number is not 4 775 * then the packet may be another version of IP, such as IPng or 776 * ST-II. 777 */ 778 if (((pkt->version_ihl) >> 4) != 4) 779 return -3; 780 /* 781 * 4. The IP header length field must be large enough to hold the 782 * minimum length legal IP datagram (20 bytes = 5 words). 783 */ 784 if ((pkt->version_ihl & 0xf) < 5) 785 return -4; 786 787 /* 788 * 5. The IP total length field must be large enough to hold the IP 789 * datagram header, whose length is specified in the IP header length 790 * field. 791 */ 792 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 793 return -5; 794 795 return 0; 796 } 797 #endif 798 799 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 800 801 static __m128i mask0; 802 static __m128i mask1; 803 static __m128i mask2; 804 static inline uint16_t 805 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid, 806 lookup_struct_t *ipv4_l3fwd_lookup_struct) 807 { 808 int ret = 0; 809 union ipv4_5tuple_host key; 810 811 ipv4_hdr = (uint8_t *)ipv4_hdr + 812 offsetof(struct rte_ipv4_hdr, time_to_live); 813 __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr)); 814 /* Get 5 tuple: dst port, src port, dst IP address, src IP address and 815 protocol */ 816 key.xmm = _mm_and_si128(data, mask0); 817 /* Find destination port */ 818 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 819 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 820 } 821 822 static inline uint16_t 823 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, 824 lookup_struct_t *ipv6_l3fwd_lookup_struct) 825 { 826 int ret = 0; 827 union ipv6_5tuple_host key; 828 829 ipv6_hdr = (uint8_t *)ipv6_hdr + 830 offsetof(struct rte_ipv6_hdr, payload_len); 831 __m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr)); 832 __m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + 833 sizeof(__m128i))); 834 __m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + 835 sizeof(__m128i) + sizeof(__m128i))); 836 /* Get part of 5 tuple: src IP address lower 96 bits and protocol */ 837 key.xmm[0] = _mm_and_si128(data0, mask1); 838 /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address 839 higher 32 bits */ 840 key.xmm[1] = data1; 841 /* Get part of 5 tuple: dst port and src port and dst IP address higher 842 32 bits */ 843 key.xmm[2] = _mm_and_si128(data2, mask2); 844 845 /* Find destination port */ 846 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 847 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 848 } 849 #endif 850 851 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 852 853 static inline uint16_t 854 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid, 855 lookup_struct_t *ipv4_l3fwd_lookup_struct) 856 { 857 uint32_t next_hop; 858 859 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 860 rte_be_to_cpu_32(((struct rte_ipv4_hdr *)ipv4_hdr)->dst_addr), 861 &next_hop) == 0) ? next_hop : portid); 862 } 863 864 static inline uint16_t 865 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, 866 lookup6_struct_t *ipv6_l3fwd_lookup_struct) 867 { 868 uint32_t next_hop; 869 870 return ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, 871 ((struct rte_ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ? 872 next_hop : portid); 873 } 874 #endif 875 876 static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid) 877 __rte_unused; 878 879 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \ 880 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 881 882 #define MASK_ALL_PKTS 0xff 883 #define EXCLUDE_1ST_PKT 0xfe 884 #define EXCLUDE_2ND_PKT 0xfd 885 #define EXCLUDE_3RD_PKT 0xfb 886 #define EXCLUDE_4TH_PKT 0xf7 887 #define EXCLUDE_5TH_PKT 0xef 888 #define EXCLUDE_6TH_PKT 0xdf 889 #define EXCLUDE_7TH_PKT 0xbf 890 #define EXCLUDE_8TH_PKT 0x7f 891 892 static inline void 893 simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid) 894 { 895 struct rte_ether_hdr *eth_hdr[8]; 896 struct rte_ipv4_hdr *ipv4_hdr[8]; 897 uint16_t dst_port[8]; 898 int32_t ret[8]; 899 union ipv4_5tuple_host key[8]; 900 __m128i data[8]; 901 902 eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct rte_ether_hdr *); 903 eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct rte_ether_hdr *); 904 eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct rte_ether_hdr *); 905 eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct rte_ether_hdr *); 906 eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct rte_ether_hdr *); 907 eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct rte_ether_hdr *); 908 eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct rte_ether_hdr *); 909 eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct rte_ether_hdr *); 910 911 /* Handle IPv4 headers.*/ 912 ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct rte_ipv4_hdr *, 913 sizeof(struct rte_ether_hdr)); 914 ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct rte_ipv4_hdr *, 915 sizeof(struct rte_ether_hdr)); 916 ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct rte_ipv4_hdr *, 917 sizeof(struct rte_ether_hdr)); 918 ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct rte_ipv4_hdr *, 919 sizeof(struct rte_ether_hdr)); 920 ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct rte_ipv4_hdr *, 921 sizeof(struct rte_ether_hdr)); 922 ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct rte_ipv4_hdr *, 923 sizeof(struct rte_ether_hdr)); 924 ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct rte_ipv4_hdr *, 925 sizeof(struct rte_ether_hdr)); 926 ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct rte_ipv4_hdr *, 927 sizeof(struct rte_ether_hdr)); 928 929 #ifdef DO_RFC_1812_CHECKS 930 /* Check to make sure the packet is valid (RFC1812) */ 931 uint8_t valid_mask = MASK_ALL_PKTS; 932 933 if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) { 934 rte_pktmbuf_free(m[0]); 935 valid_mask &= EXCLUDE_1ST_PKT; 936 } 937 if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) { 938 rte_pktmbuf_free(m[1]); 939 valid_mask &= EXCLUDE_2ND_PKT; 940 } 941 if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) { 942 rte_pktmbuf_free(m[2]); 943 valid_mask &= EXCLUDE_3RD_PKT; 944 } 945 if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) { 946 rte_pktmbuf_free(m[3]); 947 valid_mask &= EXCLUDE_4TH_PKT; 948 } 949 if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) { 950 rte_pktmbuf_free(m[4]); 951 valid_mask &= EXCLUDE_5TH_PKT; 952 } 953 if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) { 954 rte_pktmbuf_free(m[5]); 955 valid_mask &= EXCLUDE_6TH_PKT; 956 } 957 if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) { 958 rte_pktmbuf_free(m[6]); 959 valid_mask &= EXCLUDE_7TH_PKT; 960 } 961 if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) { 962 rte_pktmbuf_free(m[7]); 963 valid_mask &= EXCLUDE_8TH_PKT; 964 } 965 if (unlikely(valid_mask != MASK_ALL_PKTS)) { 966 if (valid_mask == 0) 967 return; 968 969 uint8_t i = 0; 970 971 for (i = 0; i < 8; i++) 972 if ((0x1 << i) & valid_mask) 973 l3fwd_simple_forward(m[i], portid); 974 } 975 #endif /* End of #ifdef DO_RFC_1812_CHECKS */ 976 977 data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, 978 sizeof(struct rte_ether_hdr) + 979 offsetof(struct rte_ipv4_hdr, time_to_live))); 980 data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, 981 sizeof(struct rte_ether_hdr) + 982 offsetof(struct rte_ipv4_hdr, time_to_live))); 983 data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, 984 sizeof(struct rte_ether_hdr) + 985 offsetof(struct rte_ipv4_hdr, time_to_live))); 986 data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, 987 sizeof(struct rte_ether_hdr) + 988 offsetof(struct rte_ipv4_hdr, time_to_live))); 989 data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, 990 sizeof(struct rte_ether_hdr) + 991 offsetof(struct rte_ipv4_hdr, time_to_live))); 992 data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, 993 sizeof(struct rte_ether_hdr) + 994 offsetof(struct rte_ipv4_hdr, time_to_live))); 995 data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, 996 sizeof(struct rte_ether_hdr) + 997 offsetof(struct rte_ipv4_hdr, time_to_live))); 998 data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, 999 sizeof(struct rte_ether_hdr) + 1000 offsetof(struct rte_ipv4_hdr, time_to_live))); 1001 1002 key[0].xmm = _mm_and_si128(data[0], mask0); 1003 key[1].xmm = _mm_and_si128(data[1], mask0); 1004 key[2].xmm = _mm_and_si128(data[2], mask0); 1005 key[3].xmm = _mm_and_si128(data[3], mask0); 1006 key[4].xmm = _mm_and_si128(data[4], mask0); 1007 key[5].xmm = _mm_and_si128(data[5], mask0); 1008 key[6].xmm = _mm_and_si128(data[6], mask0); 1009 key[7].xmm = _mm_and_si128(data[7], mask0); 1010 1011 const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], 1012 &key[4], &key[5], &key[6], &key[7]}; 1013 1014 rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, 1015 &key_array[0], 8, ret); 1016 dst_port[0] = ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]); 1017 dst_port[1] = ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]); 1018 dst_port[2] = ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]); 1019 dst_port[3] = ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]); 1020 dst_port[4] = ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]); 1021 dst_port[5] = ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]); 1022 dst_port[6] = ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]); 1023 dst_port[7] = ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]); 1024 1025 if (dst_port[0] >= RTE_MAX_ETHPORTS || 1026 (enabled_port_mask & 1 << dst_port[0]) == 0) 1027 dst_port[0] = portid; 1028 if (dst_port[1] >= RTE_MAX_ETHPORTS || 1029 (enabled_port_mask & 1 << dst_port[1]) == 0) 1030 dst_port[1] = portid; 1031 if (dst_port[2] >= RTE_MAX_ETHPORTS || 1032 (enabled_port_mask & 1 << dst_port[2]) == 0) 1033 dst_port[2] = portid; 1034 if (dst_port[3] >= RTE_MAX_ETHPORTS || 1035 (enabled_port_mask & 1 << dst_port[3]) == 0) 1036 dst_port[3] = portid; 1037 if (dst_port[4] >= RTE_MAX_ETHPORTS || 1038 (enabled_port_mask & 1 << dst_port[4]) == 0) 1039 dst_port[4] = portid; 1040 if (dst_port[5] >= RTE_MAX_ETHPORTS || 1041 (enabled_port_mask & 1 << dst_port[5]) == 0) 1042 dst_port[5] = portid; 1043 if (dst_port[6] >= RTE_MAX_ETHPORTS || 1044 (enabled_port_mask & 1 << dst_port[6]) == 0) 1045 dst_port[6] = portid; 1046 if (dst_port[7] >= RTE_MAX_ETHPORTS || 1047 (enabled_port_mask & 1 << dst_port[7]) == 0) 1048 dst_port[7] = portid; 1049 1050 #ifdef DO_RFC_1812_CHECKS 1051 /* Update time to live and header checksum */ 1052 --(ipv4_hdr[0]->time_to_live); 1053 --(ipv4_hdr[1]->time_to_live); 1054 --(ipv4_hdr[2]->time_to_live); 1055 --(ipv4_hdr[3]->time_to_live); 1056 ++(ipv4_hdr[0]->hdr_checksum); 1057 ++(ipv4_hdr[1]->hdr_checksum); 1058 ++(ipv4_hdr[2]->hdr_checksum); 1059 ++(ipv4_hdr[3]->hdr_checksum); 1060 --(ipv4_hdr[4]->time_to_live); 1061 --(ipv4_hdr[5]->time_to_live); 1062 --(ipv4_hdr[6]->time_to_live); 1063 --(ipv4_hdr[7]->time_to_live); 1064 ++(ipv4_hdr[4]->hdr_checksum); 1065 ++(ipv4_hdr[5]->hdr_checksum); 1066 ++(ipv4_hdr[6]->hdr_checksum); 1067 ++(ipv4_hdr[7]->hdr_checksum); 1068 #endif 1069 1070 /* dst addr */ 1071 *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; 1072 *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; 1073 *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; 1074 *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; 1075 *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; 1076 *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; 1077 *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; 1078 *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; 1079 1080 /* src addr */ 1081 rte_ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); 1082 rte_ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); 1083 rte_ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); 1084 rte_ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); 1085 rte_ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); 1086 rte_ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); 1087 rte_ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); 1088 rte_ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); 1089 1090 send_single_packet(m[0], (uint8_t)dst_port[0]); 1091 send_single_packet(m[1], (uint8_t)dst_port[1]); 1092 send_single_packet(m[2], (uint8_t)dst_port[2]); 1093 send_single_packet(m[3], (uint8_t)dst_port[3]); 1094 send_single_packet(m[4], (uint8_t)dst_port[4]); 1095 send_single_packet(m[5], (uint8_t)dst_port[5]); 1096 send_single_packet(m[6], (uint8_t)dst_port[6]); 1097 send_single_packet(m[7], (uint8_t)dst_port[7]); 1098 1099 } 1100 1101 static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0, 1102 __m128i mask1, union ipv6_5tuple_host *key) 1103 { 1104 __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1105 __m128i *, sizeof(struct rte_ether_hdr) + 1106 offsetof(struct rte_ipv6_hdr, payload_len))); 1107 __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1108 __m128i *, sizeof(struct rte_ether_hdr) + 1109 offsetof(struct rte_ipv6_hdr, payload_len) + 1110 sizeof(__m128i))); 1111 __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1112 __m128i *, sizeof(struct rte_ether_hdr) + 1113 offsetof(struct rte_ipv6_hdr, payload_len) + 1114 sizeof(__m128i) + sizeof(__m128i))); 1115 key->xmm[0] = _mm_and_si128(tmpdata0, mask0); 1116 key->xmm[1] = tmpdata1; 1117 key->xmm[2] = _mm_and_si128(tmpdata2, mask1); 1118 } 1119 1120 static inline void 1121 simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid) 1122 { 1123 int32_t ret[8]; 1124 uint16_t dst_port[8]; 1125 struct rte_ether_hdr *eth_hdr[8]; 1126 union ipv6_5tuple_host key[8]; 1127 1128 __rte_unused struct rte_ipv6_hdr *ipv6_hdr[8]; 1129 1130 eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct rte_ether_hdr *); 1131 eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct rte_ether_hdr *); 1132 eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct rte_ether_hdr *); 1133 eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct rte_ether_hdr *); 1134 eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct rte_ether_hdr *); 1135 eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct rte_ether_hdr *); 1136 eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct rte_ether_hdr *); 1137 eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct rte_ether_hdr *); 1138 1139 /* Handle IPv6 headers.*/ 1140 ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct rte_ipv6_hdr *, 1141 sizeof(struct rte_ether_hdr)); 1142 ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct rte_ipv6_hdr *, 1143 sizeof(struct rte_ether_hdr)); 1144 ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct rte_ipv6_hdr *, 1145 sizeof(struct rte_ether_hdr)); 1146 ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct rte_ipv6_hdr *, 1147 sizeof(struct rte_ether_hdr)); 1148 ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct rte_ipv6_hdr *, 1149 sizeof(struct rte_ether_hdr)); 1150 ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct rte_ipv6_hdr *, 1151 sizeof(struct rte_ether_hdr)); 1152 ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct rte_ipv6_hdr *, 1153 sizeof(struct rte_ether_hdr)); 1154 ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct rte_ipv6_hdr *, 1155 sizeof(struct rte_ether_hdr)); 1156 1157 get_ipv6_5tuple(m[0], mask1, mask2, &key[0]); 1158 get_ipv6_5tuple(m[1], mask1, mask2, &key[1]); 1159 get_ipv6_5tuple(m[2], mask1, mask2, &key[2]); 1160 get_ipv6_5tuple(m[3], mask1, mask2, &key[3]); 1161 get_ipv6_5tuple(m[4], mask1, mask2, &key[4]); 1162 get_ipv6_5tuple(m[5], mask1, mask2, &key[5]); 1163 get_ipv6_5tuple(m[6], mask1, mask2, &key[6]); 1164 get_ipv6_5tuple(m[7], mask1, mask2, &key[7]); 1165 1166 const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], 1167 &key[4], &key[5], &key[6], &key[7]}; 1168 1169 rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, 1170 &key_array[0], 4, ret); 1171 dst_port[0] = ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]); 1172 dst_port[1] = ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]); 1173 dst_port[2] = ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]); 1174 dst_port[3] = ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]); 1175 dst_port[4] = ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]); 1176 dst_port[5] = ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]); 1177 dst_port[6] = ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]); 1178 dst_port[7] = ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]); 1179 1180 if (dst_port[0] >= RTE_MAX_ETHPORTS || 1181 (enabled_port_mask & 1 << dst_port[0]) == 0) 1182 dst_port[0] = portid; 1183 if (dst_port[1] >= RTE_MAX_ETHPORTS || 1184 (enabled_port_mask & 1 << dst_port[1]) == 0) 1185 dst_port[1] = portid; 1186 if (dst_port[2] >= RTE_MAX_ETHPORTS || 1187 (enabled_port_mask & 1 << dst_port[2]) == 0) 1188 dst_port[2] = portid; 1189 if (dst_port[3] >= RTE_MAX_ETHPORTS || 1190 (enabled_port_mask & 1 << dst_port[3]) == 0) 1191 dst_port[3] = portid; 1192 if (dst_port[4] >= RTE_MAX_ETHPORTS || 1193 (enabled_port_mask & 1 << dst_port[4]) == 0) 1194 dst_port[4] = portid; 1195 if (dst_port[5] >= RTE_MAX_ETHPORTS || 1196 (enabled_port_mask & 1 << dst_port[5]) == 0) 1197 dst_port[5] = portid; 1198 if (dst_port[6] >= RTE_MAX_ETHPORTS || 1199 (enabled_port_mask & 1 << dst_port[6]) == 0) 1200 dst_port[6] = portid; 1201 if (dst_port[7] >= RTE_MAX_ETHPORTS || 1202 (enabled_port_mask & 1 << dst_port[7]) == 0) 1203 dst_port[7] = portid; 1204 1205 /* dst addr */ 1206 *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; 1207 *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; 1208 *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; 1209 *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; 1210 *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; 1211 *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; 1212 *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; 1213 *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; 1214 1215 /* src addr */ 1216 rte_ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); 1217 rte_ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); 1218 rte_ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); 1219 rte_ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); 1220 rte_ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); 1221 rte_ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); 1222 rte_ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); 1223 rte_ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); 1224 1225 send_single_packet(m[0], dst_port[0]); 1226 send_single_packet(m[1], dst_port[1]); 1227 send_single_packet(m[2], dst_port[2]); 1228 send_single_packet(m[3], dst_port[3]); 1229 send_single_packet(m[4], dst_port[4]); 1230 send_single_packet(m[5], dst_port[5]); 1231 send_single_packet(m[6], dst_port[6]); 1232 send_single_packet(m[7], dst_port[7]); 1233 1234 } 1235 #endif /* APP_LOOKUP_METHOD */ 1236 1237 static __rte_always_inline void 1238 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid) 1239 { 1240 struct rte_ether_hdr *eth_hdr; 1241 struct rte_ipv4_hdr *ipv4_hdr; 1242 uint16_t dst_port; 1243 1244 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 1245 1246 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 1247 /* Handle IPv4 headers.*/ 1248 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 1249 sizeof(struct rte_ether_hdr)); 1250 1251 #ifdef DO_RFC_1812_CHECKS 1252 /* Check to make sure the packet is valid (RFC1812) */ 1253 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 1254 rte_pktmbuf_free(m); 1255 return; 1256 } 1257 #endif 1258 1259 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 1260 RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct); 1261 if (dst_port >= RTE_MAX_ETHPORTS || 1262 (enabled_port_mask & 1 << dst_port) == 0) 1263 dst_port = portid; 1264 1265 #ifdef DO_RFC_1812_CHECKS 1266 /* Update time to live and header checksum */ 1267 --(ipv4_hdr->time_to_live); 1268 ++(ipv4_hdr->hdr_checksum); 1269 #endif 1270 /* dst addr */ 1271 *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; 1272 1273 /* src addr */ 1274 rte_ether_addr_copy(&ports_eth_addr[dst_port], 1275 ð_hdr->s_addr); 1276 1277 send_single_packet(m, dst_port); 1278 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 1279 /* Handle IPv6 headers.*/ 1280 struct rte_ipv6_hdr *ipv6_hdr; 1281 1282 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 1283 sizeof(struct rte_ether_hdr)); 1284 1285 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 1286 RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct); 1287 1288 if (dst_port >= RTE_MAX_ETHPORTS || 1289 (enabled_port_mask & 1 << dst_port) == 0) 1290 dst_port = portid; 1291 1292 /* dst addr */ 1293 *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; 1294 1295 /* src addr */ 1296 rte_ether_addr_copy(&ports_eth_addr[dst_port], 1297 ð_hdr->s_addr); 1298 1299 send_single_packet(m, dst_port); 1300 } else 1301 /* Free the mbuf that contains non-IPV4/IPV6 packet */ 1302 rte_pktmbuf_free(m); 1303 } 1304 1305 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1306 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1307 #ifdef DO_RFC_1812_CHECKS 1308 1309 #define IPV4_MIN_VER_IHL 0x45 1310 #define IPV4_MAX_VER_IHL 0x4f 1311 #define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) 1312 1313 /* Minimum value of IPV4 total length (20B) in network byte order. */ 1314 #define IPV4_MIN_LEN_BE (sizeof(struct rte_ipv4_hdr) << 8) 1315 1316 /* 1317 * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: 1318 * - The IP version number must be 4. 1319 * - The IP header length field must be large enough to hold the 1320 * minimum length legal IP datagram (20 bytes = 5 words). 1321 * - The IP total length field must be large enough to hold the IP 1322 * datagram header, whose length is specified in the IP header length 1323 * field. 1324 * If we encounter invalid IPV4 packet, then set destination port for it 1325 * to BAD_PORT value. 1326 */ 1327 static __rte_always_inline void 1328 rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) 1329 { 1330 uint8_t ihl; 1331 1332 if (RTE_ETH_IS_IPV4_HDR(ptype)) { 1333 ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; 1334 1335 ipv4_hdr->time_to_live--; 1336 ipv4_hdr->hdr_checksum++; 1337 1338 if (ihl > IPV4_MAX_VER_IHL_DIFF || 1339 ((uint8_t)ipv4_hdr->total_length == 0 && 1340 ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) { 1341 dp[0] = BAD_PORT; 1342 } 1343 } 1344 } 1345 1346 #else 1347 #define rfc1812_process(mb, dp, ptype) do { } while (0) 1348 #endif /* DO_RFC_1812_CHECKS */ 1349 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */ 1350 1351 1352 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1353 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1354 1355 static __rte_always_inline uint16_t 1356 get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint16_t portid) 1357 { 1358 uint32_t next_hop; 1359 struct rte_ipv6_hdr *ipv6_hdr; 1360 struct rte_ether_hdr *eth_hdr; 1361 1362 if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { 1363 return (uint16_t) ((rte_lpm_lookup( 1364 RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dst_ipv4, 1365 &next_hop) == 0) ? next_hop : portid); 1366 1367 } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { 1368 1369 eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); 1370 ipv6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); 1371 1372 return (uint16_t) ((rte_lpm6_lookup( 1373 RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, 1374 ipv6_hdr->dst_addr, &next_hop) == 0) ? 1375 next_hop : portid); 1376 1377 } 1378 1379 return portid; 1380 } 1381 1382 static inline void 1383 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint16_t portid) 1384 { 1385 struct rte_ether_hdr *eth_hdr; 1386 struct rte_ipv4_hdr *ipv4_hdr; 1387 uint32_t dst_ipv4; 1388 uint16_t dp; 1389 __m128i te, ve; 1390 1391 eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); 1392 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 1393 1394 dst_ipv4 = ipv4_hdr->dst_addr; 1395 dst_ipv4 = rte_be_to_cpu_32(dst_ipv4); 1396 dp = get_dst_port(pkt, dst_ipv4, portid); 1397 1398 te = _mm_load_si128((__m128i *)eth_hdr); 1399 ve = val_eth[dp]; 1400 1401 dst_port[0] = dp; 1402 rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type); 1403 1404 te = _mm_blend_epi16(te, ve, MASK_ETH); 1405 _mm_store_si128((__m128i *)eth_hdr, te); 1406 } 1407 1408 /* 1409 * Read packet_type and destination IPV4 addresses from 4 mbufs. 1410 */ 1411 static inline void 1412 processx4_step1(struct rte_mbuf *pkt[FWDSTEP], 1413 __m128i *dip, 1414 uint32_t *ipv4_flag) 1415 { 1416 struct rte_ipv4_hdr *ipv4_hdr; 1417 struct rte_ether_hdr *eth_hdr; 1418 uint32_t x0, x1, x2, x3; 1419 1420 eth_hdr = rte_pktmbuf_mtod(pkt[0], struct rte_ether_hdr *); 1421 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 1422 x0 = ipv4_hdr->dst_addr; 1423 ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; 1424 1425 eth_hdr = rte_pktmbuf_mtod(pkt[1], struct rte_ether_hdr *); 1426 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 1427 x1 = ipv4_hdr->dst_addr; 1428 ipv4_flag[0] &= pkt[1]->packet_type; 1429 1430 eth_hdr = rte_pktmbuf_mtod(pkt[2], struct rte_ether_hdr *); 1431 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 1432 x2 = ipv4_hdr->dst_addr; 1433 ipv4_flag[0] &= pkt[2]->packet_type; 1434 1435 eth_hdr = rte_pktmbuf_mtod(pkt[3], struct rte_ether_hdr *); 1436 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 1437 x3 = ipv4_hdr->dst_addr; 1438 ipv4_flag[0] &= pkt[3]->packet_type; 1439 1440 dip[0] = _mm_set_epi32(x3, x2, x1, x0); 1441 } 1442 1443 /* 1444 * Lookup into LPM for destination port. 1445 * If lookup fails, use incoming port (portid) as destination port. 1446 */ 1447 static inline void 1448 processx4_step2(__m128i dip, 1449 uint32_t ipv4_flag, 1450 uint16_t portid, 1451 struct rte_mbuf *pkt[FWDSTEP], 1452 uint16_t dprt[FWDSTEP]) 1453 { 1454 rte_xmm_t dst; 1455 const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 1456 4, 5, 6, 7, 0, 1, 2, 3); 1457 1458 /* Byte swap 4 IPV4 addresses. */ 1459 dip = _mm_shuffle_epi8(dip, bswap_mask); 1460 1461 /* if all 4 packets are IPV4. */ 1462 if (likely(ipv4_flag)) { 1463 rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip, 1464 dst.u32, portid); 1465 1466 /* get rid of unused upper 16 bit for each dport. */ 1467 dst.x = _mm_packs_epi32(dst.x, dst.x); 1468 *(uint64_t *)dprt = dst.u64[0]; 1469 } else { 1470 dst.x = dip; 1471 dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid); 1472 dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid); 1473 dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid); 1474 dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid); 1475 } 1476 } 1477 1478 /* 1479 * Update source and destination MAC addresses in the ethernet header. 1480 * Perform RFC1812 checks and updates for IPV4 packets. 1481 */ 1482 static inline void 1483 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) 1484 { 1485 __m128i te[FWDSTEP]; 1486 __m128i ve[FWDSTEP]; 1487 __m128i *p[FWDSTEP]; 1488 1489 p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); 1490 p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); 1491 p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); 1492 p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); 1493 1494 ve[0] = val_eth[dst_port[0]]; 1495 te[0] = _mm_load_si128(p[0]); 1496 1497 ve[1] = val_eth[dst_port[1]]; 1498 te[1] = _mm_load_si128(p[1]); 1499 1500 ve[2] = val_eth[dst_port[2]]; 1501 te[2] = _mm_load_si128(p[2]); 1502 1503 ve[3] = val_eth[dst_port[3]]; 1504 te[3] = _mm_load_si128(p[3]); 1505 1506 /* Update first 12 bytes, keep rest bytes intact. */ 1507 te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); 1508 te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH); 1509 te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); 1510 te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); 1511 1512 _mm_store_si128(p[0], te[0]); 1513 _mm_store_si128(p[1], te[1]); 1514 _mm_store_si128(p[2], te[2]); 1515 _mm_store_si128(p[3], te[3]); 1516 1517 rfc1812_process((struct rte_ipv4_hdr *) 1518 ((struct rte_ether_hdr *)p[0] + 1), 1519 &dst_port[0], pkt[0]->packet_type); 1520 rfc1812_process((struct rte_ipv4_hdr *) 1521 ((struct rte_ether_hdr *)p[1] + 1), 1522 &dst_port[1], pkt[1]->packet_type); 1523 rfc1812_process((struct rte_ipv4_hdr *) 1524 ((struct rte_ether_hdr *)p[2] + 1), 1525 &dst_port[2], pkt[2]->packet_type); 1526 rfc1812_process((struct rte_ipv4_hdr *) 1527 ((struct rte_ether_hdr *)p[3] + 1), 1528 &dst_port[3], pkt[3]->packet_type); 1529 } 1530 1531 /* 1532 * We group consecutive packets with the same destionation port into one burst. 1533 * To avoid extra latency this is done together with some other packet 1534 * processing, but after we made a final decision about packet's destination. 1535 * To do this we maintain: 1536 * pnum - array of number of consecutive packets with the same dest port for 1537 * each packet in the input burst. 1538 * lp - pointer to the last updated element in the pnum. 1539 * dlp - dest port value lp corresponds to. 1540 */ 1541 1542 #define GRPSZ (1 << FWDSTEP) 1543 #define GRPMSK (GRPSZ - 1) 1544 1545 #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ 1546 if (likely((dlp) == (dcp)[(idx)])) { \ 1547 (lp)[0]++; \ 1548 } else { \ 1549 (dlp) = (dcp)[idx]; \ 1550 (lp) = (pn) + (idx); \ 1551 (lp)[0] = 1; \ 1552 } \ 1553 } while (0) 1554 1555 /* 1556 * Group consecutive packets with the same destination port in bursts of 4. 1557 * Suppose we have array of destionation ports: 1558 * dst_port[] = {a, b, c, d,, e, ... } 1559 * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. 1560 * We doing 4 comparisons at once and the result is 4 bit mask. 1561 * This mask is used as an index into prebuild array of pnum values. 1562 */ 1563 static inline uint16_t * 1564 port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) 1565 { 1566 static const struct { 1567 uint64_t pnum; /* prebuild 4 values for pnum[]. */ 1568 int32_t idx; /* index for new last updated elemnet. */ 1569 uint16_t lpv; /* add value to the last updated element. */ 1570 } gptbl[GRPSZ] = { 1571 { 1572 /* 0: a != b, b != c, c != d, d != e */ 1573 .pnum = UINT64_C(0x0001000100010001), 1574 .idx = 4, 1575 .lpv = 0, 1576 }, 1577 { 1578 /* 1: a == b, b != c, c != d, d != e */ 1579 .pnum = UINT64_C(0x0001000100010002), 1580 .idx = 4, 1581 .lpv = 1, 1582 }, 1583 { 1584 /* 2: a != b, b == c, c != d, d != e */ 1585 .pnum = UINT64_C(0x0001000100020001), 1586 .idx = 4, 1587 .lpv = 0, 1588 }, 1589 { 1590 /* 3: a == b, b == c, c != d, d != e */ 1591 .pnum = UINT64_C(0x0001000100020003), 1592 .idx = 4, 1593 .lpv = 2, 1594 }, 1595 { 1596 /* 4: a != b, b != c, c == d, d != e */ 1597 .pnum = UINT64_C(0x0001000200010001), 1598 .idx = 4, 1599 .lpv = 0, 1600 }, 1601 { 1602 /* 5: a == b, b != c, c == d, d != e */ 1603 .pnum = UINT64_C(0x0001000200010002), 1604 .idx = 4, 1605 .lpv = 1, 1606 }, 1607 { 1608 /* 6: a != b, b == c, c == d, d != e */ 1609 .pnum = UINT64_C(0x0001000200030001), 1610 .idx = 4, 1611 .lpv = 0, 1612 }, 1613 { 1614 /* 7: a == b, b == c, c == d, d != e */ 1615 .pnum = UINT64_C(0x0001000200030004), 1616 .idx = 4, 1617 .lpv = 3, 1618 }, 1619 { 1620 /* 8: a != b, b != c, c != d, d == e */ 1621 .pnum = UINT64_C(0x0002000100010001), 1622 .idx = 3, 1623 .lpv = 0, 1624 }, 1625 { 1626 /* 9: a == b, b != c, c != d, d == e */ 1627 .pnum = UINT64_C(0x0002000100010002), 1628 .idx = 3, 1629 .lpv = 1, 1630 }, 1631 { 1632 /* 0xa: a != b, b == c, c != d, d == e */ 1633 .pnum = UINT64_C(0x0002000100020001), 1634 .idx = 3, 1635 .lpv = 0, 1636 }, 1637 { 1638 /* 0xb: a == b, b == c, c != d, d == e */ 1639 .pnum = UINT64_C(0x0002000100020003), 1640 .idx = 3, 1641 .lpv = 2, 1642 }, 1643 { 1644 /* 0xc: a != b, b != c, c == d, d == e */ 1645 .pnum = UINT64_C(0x0002000300010001), 1646 .idx = 2, 1647 .lpv = 0, 1648 }, 1649 { 1650 /* 0xd: a == b, b != c, c == d, d == e */ 1651 .pnum = UINT64_C(0x0002000300010002), 1652 .idx = 2, 1653 .lpv = 1, 1654 }, 1655 { 1656 /* 0xe: a != b, b == c, c == d, d == e */ 1657 .pnum = UINT64_C(0x0002000300040001), 1658 .idx = 1, 1659 .lpv = 0, 1660 }, 1661 { 1662 /* 0xf: a == b, b == c, c == d, d == e */ 1663 .pnum = UINT64_C(0x0002000300040005), 1664 .idx = 0, 1665 .lpv = 4, 1666 }, 1667 }; 1668 1669 union { 1670 uint16_t u16[FWDSTEP + 1]; 1671 uint64_t u64; 1672 } *pnum = (void *)pn; 1673 1674 int32_t v; 1675 1676 dp1 = _mm_cmpeq_epi16(dp1, dp2); 1677 dp1 = _mm_unpacklo_epi16(dp1, dp1); 1678 v = _mm_movemask_ps((__m128)dp1); 1679 1680 /* update last port counter. */ 1681 lp[0] += gptbl[v].lpv; 1682 1683 /* if dest port value has changed. */ 1684 if (v != GRPMSK) { 1685 pnum->u64 = gptbl[v].pnum; 1686 pnum->u16[FWDSTEP] = 1; 1687 lp = pnum->u16 + gptbl[v].idx; 1688 } 1689 1690 return lp; 1691 } 1692 1693 #endif /* APP_LOOKUP_METHOD */ 1694 1695 static void 1696 process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, 1697 uint16_t portid) 1698 { 1699 1700 int j; 1701 1702 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1703 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1704 int32_t k; 1705 uint16_t dlp; 1706 uint16_t *lp; 1707 uint16_t dst_port[MAX_PKT_BURST]; 1708 __m128i dip[MAX_PKT_BURST / FWDSTEP]; 1709 uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; 1710 uint16_t pnum[MAX_PKT_BURST + 1]; 1711 #endif 1712 1713 1714 #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1) 1715 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1716 { 1717 /* 1718 * Send nb_rx - nb_rx%8 packets 1719 * in groups of 8. 1720 */ 1721 int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); 1722 1723 for (j = 0; j < n; j += 8) { 1724 uint32_t pkt_type = 1725 pkts_burst[j]->packet_type & 1726 pkts_burst[j+1]->packet_type & 1727 pkts_burst[j+2]->packet_type & 1728 pkts_burst[j+3]->packet_type & 1729 pkts_burst[j+4]->packet_type & 1730 pkts_burst[j+5]->packet_type & 1731 pkts_burst[j+6]->packet_type & 1732 pkts_burst[j+7]->packet_type; 1733 if (pkt_type & RTE_PTYPE_L3_IPV4) { 1734 simple_ipv4_fwd_8pkts(&pkts_burst[j], portid); 1735 } else if (pkt_type & 1736 RTE_PTYPE_L3_IPV6) { 1737 simple_ipv6_fwd_8pkts(&pkts_burst[j], portid); 1738 } else { 1739 l3fwd_simple_forward(pkts_burst[j], portid); 1740 l3fwd_simple_forward(pkts_burst[j+1], portid); 1741 l3fwd_simple_forward(pkts_burst[j+2], portid); 1742 l3fwd_simple_forward(pkts_burst[j+3], portid); 1743 l3fwd_simple_forward(pkts_burst[j+4], portid); 1744 l3fwd_simple_forward(pkts_burst[j+5], portid); 1745 l3fwd_simple_forward(pkts_burst[j+6], portid); 1746 l3fwd_simple_forward(pkts_burst[j+7], portid); 1747 } 1748 } 1749 for (; j < nb_rx ; j++) 1750 l3fwd_simple_forward(pkts_burst[j], portid); 1751 } 1752 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1753 1754 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1755 for (j = 0; j != k; j += FWDSTEP) 1756 processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], 1757 &ipv4_flag[j / FWDSTEP]); 1758 1759 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1760 for (j = 0; j != k; j += FWDSTEP) 1761 processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], 1762 portid, &pkts_burst[j], &dst_port[j]); 1763 1764 /* 1765 * Finish packet processing and group consecutive 1766 * packets with the same destination port. 1767 */ 1768 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1769 if (k != 0) { 1770 __m128i dp1, dp2; 1771 1772 lp = pnum; 1773 lp[0] = 1; 1774 1775 processx4_step3(pkts_burst, dst_port); 1776 1777 /* dp1: <d[0], d[1], d[2], d[3], ... > */ 1778 dp1 = _mm_loadu_si128((__m128i *)dst_port); 1779 1780 for (j = FWDSTEP; j != k; j += FWDSTEP) { 1781 processx4_step3(&pkts_burst[j], &dst_port[j]); 1782 1783 /* 1784 * dp2: 1785 * <d[j-3], d[j-2], d[j-1], d[j], ... > 1786 */ 1787 dp2 = _mm_loadu_si128( 1788 (__m128i *)&dst_port[j - FWDSTEP + 1]); 1789 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 1790 1791 /* 1792 * dp1: 1793 * <d[j], d[j+1], d[j+2], d[j+3], ... > 1794 */ 1795 dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) * 1796 sizeof(dst_port[0])); 1797 } 1798 1799 /* 1800 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > 1801 */ 1802 dp2 = _mm_shufflelo_epi16(dp1, 0xf9); 1803 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 1804 1805 /* 1806 * remove values added by the last repeated 1807 * dst port. 1808 */ 1809 lp[0]--; 1810 dlp = dst_port[j - 1]; 1811 } else { 1812 /* set dlp and lp to the never used values. */ 1813 dlp = BAD_PORT - 1; 1814 lp = pnum + MAX_PKT_BURST; 1815 } 1816 1817 /* Process up to last 3 packets one by one. */ 1818 switch (nb_rx % FWDSTEP) { 1819 case 3: 1820 process_packet(pkts_burst[j], dst_port + j, portid); 1821 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1822 j++; 1823 /* fall-through */ 1824 case 2: 1825 process_packet(pkts_burst[j], dst_port + j, portid); 1826 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1827 j++; 1828 /* fall-through */ 1829 case 1: 1830 process_packet(pkts_burst[j], dst_port + j, portid); 1831 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1832 j++; 1833 } 1834 1835 /* 1836 * Send packets out, through destination port. 1837 * Consecuteve pacekts with the same destination port 1838 * are already grouped together. 1839 * If destination port for the packet equals BAD_PORT, 1840 * then free the packet without sending it out. 1841 */ 1842 for (j = 0; j < nb_rx; j += k) { 1843 1844 int32_t m; 1845 uint16_t pn; 1846 1847 pn = dst_port[j]; 1848 k = pnum[j]; 1849 1850 if (likely(pn != BAD_PORT)) 1851 send_packetsx4(pn, pkts_burst + j, k); 1852 else 1853 for (m = j; m != j + k; m++) 1854 rte_pktmbuf_free(pkts_burst[m]); 1855 1856 } 1857 1858 #endif /* APP_LOOKUP_METHOD */ 1859 #else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */ 1860 1861 /* Prefetch first packets */ 1862 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) 1863 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); 1864 1865 /* Prefetch and forward already prefetched packets */ 1866 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1867 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1868 j + PREFETCH_OFFSET], void *)); 1869 l3fwd_simple_forward(pkts_burst[j], portid); 1870 } 1871 1872 /* Forward remaining prefetched packets */ 1873 for (; j < nb_rx; j++) 1874 l3fwd_simple_forward(pkts_burst[j], portid); 1875 1876 #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */ 1877 1878 } 1879 1880 #if (APP_CPU_LOAD > 0) 1881 1882 /* 1883 * CPU-load stats collector 1884 */ 1885 static int __rte_noreturn 1886 cpu_load_collector(__rte_unused void *arg) { 1887 unsigned i, j, k; 1888 uint64_t hits; 1889 uint64_t prev_tsc, diff_tsc, cur_tsc; 1890 uint64_t total[MAX_CPU] = { 0 }; 1891 unsigned min_cpu = MAX_CPU; 1892 unsigned max_cpu = 0; 1893 unsigned cpu_id; 1894 int busy_total = 0; 1895 int busy_flag = 0; 1896 1897 unsigned int n_thread_per_cpu[MAX_CPU] = { 0 }; 1898 struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD]; 1899 1900 struct thread_conf *thread_conf; 1901 1902 const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1903 US_PER_S * CPU_LOAD_TIMEOUT_US; 1904 1905 prev_tsc = 0; 1906 /* 1907 * Wait for all threads 1908 */ 1909 1910 printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread, 1911 n_tx_thread); 1912 1913 while (rte_atomic16_read(&rx_counter) < n_rx_thread) 1914 rte_pause(); 1915 1916 while (rte_atomic16_read(&tx_counter) < n_tx_thread) 1917 rte_pause(); 1918 1919 for (i = 0; i < n_rx_thread; i++) { 1920 1921 thread_conf = &rx_thread[i].conf; 1922 cpu_id = thread_conf->cpu_id; 1923 thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; 1924 1925 if (cpu_id > max_cpu) 1926 max_cpu = cpu_id; 1927 if (cpu_id < min_cpu) 1928 min_cpu = cpu_id; 1929 } 1930 for (i = 0; i < n_tx_thread; i++) { 1931 1932 thread_conf = &tx_thread[i].conf; 1933 cpu_id = thread_conf->cpu_id; 1934 thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; 1935 1936 if (thread_conf->cpu_id > max_cpu) 1937 max_cpu = thread_conf->cpu_id; 1938 if (thread_conf->cpu_id < min_cpu) 1939 min_cpu = thread_conf->cpu_id; 1940 } 1941 1942 while (1) { 1943 1944 cpu_load.counter++; 1945 for (i = min_cpu; i <= max_cpu; i++) { 1946 for (j = 0; j < MAX_CPU_COUNTER; j++) { 1947 for (k = 0; k < n_thread_per_cpu[i]; k++) 1948 if (thread_per_cpu[i][k]->busy[j]) { 1949 busy_flag = 1; 1950 break; 1951 } 1952 if (busy_flag) { 1953 cpu_load.hits[j][i]++; 1954 busy_total = 1; 1955 busy_flag = 0; 1956 } 1957 } 1958 1959 if (busy_total) { 1960 total[i]++; 1961 busy_total = 0; 1962 } 1963 } 1964 1965 cur_tsc = rte_rdtsc(); 1966 1967 diff_tsc = cur_tsc - prev_tsc; 1968 if (unlikely(diff_tsc > interval_tsc)) { 1969 1970 printf("\033c"); 1971 1972 printf("Cpu usage for %d rx threads and %d tx threads:\n\n", 1973 n_rx_thread, n_tx_thread); 1974 1975 printf("cpu# proc%% poll%% overhead%%\n\n"); 1976 1977 for (i = min_cpu; i <= max_cpu; i++) { 1978 hits = 0; 1979 printf("CPU %d:", i); 1980 for (j = 0; j < MAX_CPU_COUNTER; j++) { 1981 printf("%7" PRIu64 "", 1982 cpu_load.hits[j][i] * 100 / cpu_load.counter); 1983 hits += cpu_load.hits[j][i]; 1984 cpu_load.hits[j][i] = 0; 1985 } 1986 printf("%7" PRIu64 "\n", 1987 100 - total[i] * 100 / cpu_load.counter); 1988 total[i] = 0; 1989 } 1990 cpu_load.counter = 0; 1991 1992 prev_tsc = cur_tsc; 1993 } 1994 1995 } 1996 } 1997 #endif /* APP_CPU_LOAD */ 1998 1999 /* 2000 * Null processing lthread loop 2001 * 2002 * This loop is used to start empty scheduler on lcore. 2003 */ 2004 static void * 2005 lthread_null(__rte_unused void *args) 2006 { 2007 int lcore_id = rte_lcore_id(); 2008 2009 RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id); 2010 lthread_exit(NULL); 2011 return NULL; 2012 } 2013 2014 /* main processing loop */ 2015 static void * 2016 lthread_tx_per_ring(void *dummy) 2017 { 2018 int nb_rx; 2019 uint16_t portid; 2020 struct rte_ring *ring; 2021 struct thread_tx_conf *tx_conf; 2022 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2023 struct lthread_cond *ready; 2024 2025 tx_conf = (struct thread_tx_conf *)dummy; 2026 ring = tx_conf->ring; 2027 ready = *tx_conf->ready; 2028 2029 lthread_set_data((void *)tx_conf); 2030 2031 /* 2032 * Move this lthread to lcore 2033 */ 2034 lthread_set_affinity(tx_conf->conf.lcore_id); 2035 2036 RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id()); 2037 2038 nb_rx = 0; 2039 rte_atomic16_inc(&tx_counter); 2040 while (1) { 2041 2042 /* 2043 * Read packet from ring 2044 */ 2045 SET_CPU_BUSY(tx_conf, CPU_POLL); 2046 nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst, 2047 MAX_PKT_BURST, NULL); 2048 SET_CPU_IDLE(tx_conf, CPU_POLL); 2049 2050 if (nb_rx > 0) { 2051 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2052 portid = pkts_burst[0]->port; 2053 process_burst(pkts_burst, nb_rx, portid); 2054 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2055 lthread_yield(); 2056 } else 2057 lthread_cond_wait(ready, 0); 2058 2059 } 2060 return NULL; 2061 } 2062 2063 /* 2064 * Main tx-lthreads spawner lthread. 2065 * 2066 * This lthread is used to spawn one new lthread per ring from producers. 2067 * 2068 */ 2069 static void * 2070 lthread_tx(void *args) 2071 { 2072 struct lthread *lt; 2073 2074 unsigned lcore_id; 2075 uint16_t portid; 2076 struct thread_tx_conf *tx_conf; 2077 2078 tx_conf = (struct thread_tx_conf *)args; 2079 lthread_set_data((void *)tx_conf); 2080 2081 /* 2082 * Move this lthread to the selected lcore 2083 */ 2084 lthread_set_affinity(tx_conf->conf.lcore_id); 2085 2086 /* 2087 * Spawn tx readers (one per input ring) 2088 */ 2089 lthread_create(<, tx_conf->conf.lcore_id, lthread_tx_per_ring, 2090 (void *)tx_conf); 2091 2092 lcore_id = rte_lcore_id(); 2093 2094 RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id); 2095 2096 tx_conf->conf.cpu_id = sched_getcpu(); 2097 while (1) { 2098 2099 lthread_sleep(BURST_TX_DRAIN_US * 1000); 2100 2101 /* 2102 * TX burst queue drain 2103 */ 2104 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 2105 if (tx_conf->tx_mbufs[portid].len == 0) 2106 continue; 2107 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2108 send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); 2109 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2110 tx_conf->tx_mbufs[portid].len = 0; 2111 } 2112 2113 } 2114 return NULL; 2115 } 2116 2117 static void * 2118 lthread_rx(void *dummy) 2119 { 2120 int ret; 2121 uint16_t nb_rx; 2122 int i; 2123 uint16_t portid; 2124 uint8_t queueid; 2125 int worker_id; 2126 int len[RTE_MAX_LCORE] = { 0 }; 2127 int old_len, new_len; 2128 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2129 struct thread_rx_conf *rx_conf; 2130 2131 rx_conf = (struct thread_rx_conf *)dummy; 2132 lthread_set_data((void *)rx_conf); 2133 2134 /* 2135 * Move this lthread to lcore 2136 */ 2137 lthread_set_affinity(rx_conf->conf.lcore_id); 2138 2139 if (rx_conf->n_rx_queue == 0) { 2140 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id()); 2141 return NULL; 2142 } 2143 2144 RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id()); 2145 2146 for (i = 0; i < rx_conf->n_rx_queue; i++) { 2147 2148 portid = rx_conf->rx_queue_list[i].port_id; 2149 queueid = rx_conf->rx_queue_list[i].queue_id; 2150 RTE_LOG(INFO, L3FWD, 2151 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 2152 rte_lcore_id(), portid, queueid); 2153 } 2154 2155 /* 2156 * Init all condition variables (one per rx thread) 2157 */ 2158 for (i = 0; i < rx_conf->n_rx_queue; i++) 2159 lthread_cond_init(NULL, &rx_conf->ready[i], NULL); 2160 2161 worker_id = 0; 2162 2163 rx_conf->conf.cpu_id = sched_getcpu(); 2164 rte_atomic16_inc(&rx_counter); 2165 while (1) { 2166 2167 /* 2168 * Read packet from RX queues 2169 */ 2170 for (i = 0; i < rx_conf->n_rx_queue; ++i) { 2171 portid = rx_conf->rx_queue_list[i].port_id; 2172 queueid = rx_conf->rx_queue_list[i].queue_id; 2173 2174 SET_CPU_BUSY(rx_conf, CPU_POLL); 2175 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 2176 MAX_PKT_BURST); 2177 SET_CPU_IDLE(rx_conf, CPU_POLL); 2178 2179 if (nb_rx != 0) { 2180 worker_id = (worker_id + 1) % rx_conf->n_ring; 2181 old_len = len[worker_id]; 2182 2183 SET_CPU_BUSY(rx_conf, CPU_PROCESS); 2184 ret = rte_ring_sp_enqueue_burst( 2185 rx_conf->ring[worker_id], 2186 (void **) pkts_burst, 2187 nb_rx, NULL); 2188 2189 new_len = old_len + ret; 2190 2191 if (new_len >= BURST_SIZE) { 2192 lthread_cond_signal(rx_conf->ready[worker_id]); 2193 new_len = 0; 2194 } 2195 2196 len[worker_id] = new_len; 2197 2198 if (unlikely(ret < nb_rx)) { 2199 uint32_t k; 2200 2201 for (k = ret; k < nb_rx; k++) { 2202 struct rte_mbuf *m = pkts_burst[k]; 2203 2204 rte_pktmbuf_free(m); 2205 } 2206 } 2207 SET_CPU_IDLE(rx_conf, CPU_PROCESS); 2208 } 2209 2210 lthread_yield(); 2211 } 2212 } 2213 return NULL; 2214 } 2215 2216 /* 2217 * Start scheduler with initial lthread on lcore 2218 * 2219 * This lthread loop spawns all rx and tx lthreads on main lcore 2220 */ 2221 2222 static void * 2223 lthread_spawner(__rte_unused void *arg) 2224 { 2225 struct lthread *lt[MAX_THREAD]; 2226 int i; 2227 int n_thread = 0; 2228 2229 printf("Entering lthread_spawner\n"); 2230 2231 /* 2232 * Create producers (rx threads) on default lcore 2233 */ 2234 for (i = 0; i < n_rx_thread; i++) { 2235 rx_thread[i].conf.thread_id = i; 2236 lthread_create(<[n_thread], -1, lthread_rx, 2237 (void *)&rx_thread[i]); 2238 n_thread++; 2239 } 2240 2241 /* 2242 * Wait for all producers. Until some producers can be started on the same 2243 * scheduler as this lthread, yielding is required to let them to run and 2244 * prevent deadlock here. 2245 */ 2246 while (rte_atomic16_read(&rx_counter) < n_rx_thread) 2247 lthread_sleep(100000); 2248 2249 /* 2250 * Create consumers (tx threads) on default lcore_id 2251 */ 2252 for (i = 0; i < n_tx_thread; i++) { 2253 tx_thread[i].conf.thread_id = i; 2254 lthread_create(<[n_thread], -1, lthread_tx, 2255 (void *)&tx_thread[i]); 2256 n_thread++; 2257 } 2258 2259 /* 2260 * Wait for all threads finished 2261 */ 2262 for (i = 0; i < n_thread; i++) 2263 lthread_join(lt[i], NULL); 2264 2265 return NULL; 2266 } 2267 2268 /* 2269 * Start main scheduler with initial lthread spawning rx and tx lthreads 2270 * (main_lthread_main). 2271 */ 2272 static int 2273 lthread_main_spawner(__rte_unused void *arg) { 2274 struct lthread *lt; 2275 int lcore_id = rte_lcore_id(); 2276 2277 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2278 lthread_create(<, -1, lthread_spawner, NULL); 2279 lthread_run(); 2280 2281 return 0; 2282 } 2283 2284 /* 2285 * Start scheduler on lcore. 2286 */ 2287 static int 2288 sched_spawner(__rte_unused void *arg) { 2289 struct lthread *lt; 2290 int lcore_id = rte_lcore_id(); 2291 2292 #if (APP_CPU_LOAD) 2293 if (lcore_id == cpu_load_lcore_id) { 2294 cpu_load_collector(arg); 2295 return 0; 2296 } 2297 #endif /* APP_CPU_LOAD */ 2298 2299 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2300 lthread_create(<, -1, lthread_null, NULL); 2301 lthread_run(); 2302 2303 return 0; 2304 } 2305 2306 /* main processing loop */ 2307 static int __rte_noreturn 2308 pthread_tx(void *dummy) 2309 { 2310 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2311 uint64_t prev_tsc, diff_tsc, cur_tsc; 2312 int nb_rx; 2313 uint16_t portid; 2314 struct thread_tx_conf *tx_conf; 2315 2316 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 2317 US_PER_S * BURST_TX_DRAIN_US; 2318 2319 prev_tsc = 0; 2320 2321 tx_conf = (struct thread_tx_conf *)dummy; 2322 2323 RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id()); 2324 2325 tx_conf->conf.cpu_id = sched_getcpu(); 2326 rte_atomic16_inc(&tx_counter); 2327 while (1) { 2328 2329 cur_tsc = rte_rdtsc(); 2330 2331 /* 2332 * TX burst queue drain 2333 */ 2334 diff_tsc = cur_tsc - prev_tsc; 2335 if (unlikely(diff_tsc > drain_tsc)) { 2336 2337 /* 2338 * This could be optimized (use queueid instead of 2339 * portid), but it is not called so often 2340 */ 2341 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2342 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 2343 if (tx_conf->tx_mbufs[portid].len == 0) 2344 continue; 2345 send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); 2346 tx_conf->tx_mbufs[portid].len = 0; 2347 } 2348 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2349 2350 prev_tsc = cur_tsc; 2351 } 2352 2353 /* 2354 * Read packet from ring 2355 */ 2356 SET_CPU_BUSY(tx_conf, CPU_POLL); 2357 nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring, 2358 (void **)pkts_burst, MAX_PKT_BURST, NULL); 2359 SET_CPU_IDLE(tx_conf, CPU_POLL); 2360 2361 if (unlikely(nb_rx == 0)) { 2362 sched_yield(); 2363 continue; 2364 } 2365 2366 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2367 portid = pkts_burst[0]->port; 2368 process_burst(pkts_burst, nb_rx, portid); 2369 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2370 2371 } 2372 } 2373 2374 static int 2375 pthread_rx(void *dummy) 2376 { 2377 int i; 2378 int worker_id; 2379 uint32_t n; 2380 uint32_t nb_rx; 2381 unsigned lcore_id; 2382 uint8_t queueid; 2383 uint16_t portid; 2384 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2385 2386 struct thread_rx_conf *rx_conf; 2387 2388 lcore_id = rte_lcore_id(); 2389 rx_conf = (struct thread_rx_conf *)dummy; 2390 2391 if (rx_conf->n_rx_queue == 0) { 2392 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); 2393 return 0; 2394 } 2395 2396 RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id); 2397 2398 for (i = 0; i < rx_conf->n_rx_queue; i++) { 2399 2400 portid = rx_conf->rx_queue_list[i].port_id; 2401 queueid = rx_conf->rx_queue_list[i].queue_id; 2402 RTE_LOG(INFO, L3FWD, 2403 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 2404 lcore_id, portid, queueid); 2405 } 2406 2407 worker_id = 0; 2408 rx_conf->conf.cpu_id = sched_getcpu(); 2409 rte_atomic16_inc(&rx_counter); 2410 while (1) { 2411 2412 /* 2413 * Read packet from RX queues 2414 */ 2415 for (i = 0; i < rx_conf->n_rx_queue; ++i) { 2416 portid = rx_conf->rx_queue_list[i].port_id; 2417 queueid = rx_conf->rx_queue_list[i].queue_id; 2418 2419 SET_CPU_BUSY(rx_conf, CPU_POLL); 2420 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 2421 MAX_PKT_BURST); 2422 SET_CPU_IDLE(rx_conf, CPU_POLL); 2423 2424 if (nb_rx == 0) { 2425 sched_yield(); 2426 continue; 2427 } 2428 2429 SET_CPU_BUSY(rx_conf, CPU_PROCESS); 2430 worker_id = (worker_id + 1) % rx_conf->n_ring; 2431 n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id], 2432 (void **)pkts_burst, nb_rx, NULL); 2433 2434 if (unlikely(n != nb_rx)) { 2435 uint32_t k; 2436 2437 for (k = n; k < nb_rx; k++) { 2438 struct rte_mbuf *m = pkts_burst[k]; 2439 2440 rte_pktmbuf_free(m); 2441 } 2442 } 2443 2444 SET_CPU_IDLE(rx_conf, CPU_PROCESS); 2445 2446 } 2447 } 2448 } 2449 2450 /* 2451 * P-Thread spawner. 2452 */ 2453 static int 2454 pthread_run(__rte_unused void *arg) { 2455 int lcore_id = rte_lcore_id(); 2456 int i; 2457 2458 for (i = 0; i < n_rx_thread; i++) 2459 if (rx_thread[i].conf.lcore_id == lcore_id) { 2460 printf("Start rx thread on %d...\n", lcore_id); 2461 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2462 RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i]; 2463 pthread_rx((void *)&rx_thread[i]); 2464 return 0; 2465 } 2466 2467 for (i = 0; i < n_tx_thread; i++) 2468 if (tx_thread[i].conf.lcore_id == lcore_id) { 2469 printf("Start tx thread on %d...\n", lcore_id); 2470 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2471 RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i]; 2472 pthread_tx((void *)&tx_thread[i]); 2473 return 0; 2474 } 2475 2476 #if (APP_CPU_LOAD) 2477 if (lcore_id == cpu_load_lcore_id) 2478 cpu_load_collector(arg); 2479 #endif /* APP_CPU_LOAD */ 2480 2481 return 0; 2482 } 2483 2484 static int 2485 check_lcore_params(void) 2486 { 2487 uint8_t queue, lcore; 2488 uint16_t i; 2489 int socketid; 2490 2491 for (i = 0; i < nb_rx_thread_params; ++i) { 2492 queue = rx_thread_params[i].queue_id; 2493 if (queue >= MAX_RX_QUEUE_PER_PORT) { 2494 printf("invalid queue number: %hhu\n", queue); 2495 return -1; 2496 } 2497 lcore = rx_thread_params[i].lcore_id; 2498 if (!rte_lcore_is_enabled(lcore)) { 2499 printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); 2500 return -1; 2501 } 2502 socketid = rte_lcore_to_socket_id(lcore); 2503 if ((socketid != 0) && (numa_on == 0)) 2504 printf("warning: lcore %hhu is on socket %d with numa off\n", 2505 lcore, socketid); 2506 } 2507 return 0; 2508 } 2509 2510 static int 2511 check_port_config(void) 2512 { 2513 unsigned portid; 2514 uint16_t i; 2515 2516 for (i = 0; i < nb_rx_thread_params; ++i) { 2517 portid = rx_thread_params[i].port_id; 2518 if ((enabled_port_mask & (1 << portid)) == 0) { 2519 printf("port %u is not enabled in port mask\n", portid); 2520 return -1; 2521 } 2522 if (!rte_eth_dev_is_valid_port(portid)) { 2523 printf("port %u is not present on the board\n", portid); 2524 return -1; 2525 } 2526 } 2527 return 0; 2528 } 2529 2530 static uint8_t 2531 get_port_n_rx_queues(const uint16_t port) 2532 { 2533 int queue = -1; 2534 uint16_t i; 2535 2536 for (i = 0; i < nb_rx_thread_params; ++i) 2537 if (rx_thread_params[i].port_id == port && 2538 rx_thread_params[i].queue_id > queue) 2539 queue = rx_thread_params[i].queue_id; 2540 2541 return (uint8_t)(++queue); 2542 } 2543 2544 static int 2545 init_rx_rings(void) 2546 { 2547 unsigned socket_io; 2548 struct thread_rx_conf *rx_conf; 2549 struct thread_tx_conf *tx_conf; 2550 unsigned rx_thread_id, tx_thread_id; 2551 char name[256]; 2552 struct rte_ring *ring = NULL; 2553 2554 for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) { 2555 2556 tx_conf = &tx_thread[tx_thread_id]; 2557 2558 printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id, 2559 tx_conf->conf.thread_id); 2560 2561 rx_thread_id = tx_conf->conf.thread_id; 2562 if (rx_thread_id > n_tx_thread) { 2563 printf("connection from tx-thread %u to rx-thread %u fails " 2564 "(rx-thread not defined)\n", tx_thread_id, rx_thread_id); 2565 return -1; 2566 } 2567 2568 rx_conf = &rx_thread[rx_thread_id]; 2569 socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id); 2570 2571 snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u", 2572 socket_io, rx_thread_id, tx_thread_id); 2573 2574 ring = rte_ring_create(name, 1024 * 4, socket_io, 2575 RING_F_SP_ENQ | RING_F_SC_DEQ); 2576 2577 if (ring == NULL) { 2578 rte_panic("Cannot create ring to connect rx-thread %u " 2579 "with tx-thread %u\n", rx_thread_id, tx_thread_id); 2580 } 2581 2582 rx_conf->ring[rx_conf->n_ring] = ring; 2583 2584 tx_conf->ring = ring; 2585 tx_conf->ready = &rx_conf->ready[rx_conf->n_ring]; 2586 2587 rx_conf->n_ring++; 2588 } 2589 return 0; 2590 } 2591 2592 static int 2593 init_rx_queues(void) 2594 { 2595 uint16_t i, nb_rx_queue; 2596 uint8_t thread; 2597 2598 n_rx_thread = 0; 2599 2600 for (i = 0; i < nb_rx_thread_params; ++i) { 2601 thread = rx_thread_params[i].thread_id; 2602 nb_rx_queue = rx_thread[thread].n_rx_queue; 2603 2604 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 2605 printf("error: too many queues (%u) for thread: %u\n", 2606 (unsigned)nb_rx_queue + 1, (unsigned)thread); 2607 return -1; 2608 } 2609 2610 rx_thread[thread].conf.thread_id = thread; 2611 rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id; 2612 rx_thread[thread].rx_queue_list[nb_rx_queue].port_id = 2613 rx_thread_params[i].port_id; 2614 rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id = 2615 rx_thread_params[i].queue_id; 2616 rx_thread[thread].n_rx_queue++; 2617 2618 if (thread >= n_rx_thread) 2619 n_rx_thread = thread + 1; 2620 2621 } 2622 return 0; 2623 } 2624 2625 static int 2626 init_tx_threads(void) 2627 { 2628 int i; 2629 2630 n_tx_thread = 0; 2631 for (i = 0; i < nb_tx_thread_params; ++i) { 2632 tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id; 2633 tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id; 2634 n_tx_thread++; 2635 } 2636 return 0; 2637 } 2638 2639 /* display usage */ 2640 static void 2641 print_usage(const char *prgname) 2642 { 2643 printf("%s [EAL options] -- -p PORTMASK -P" 2644 " [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]" 2645 " [--tx (lcore,thread)[,(lcore,thread]]" 2646 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 2647 " [--parse-ptype]\n\n" 2648 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 2649 " -P : enable promiscuous mode\n" 2650 " --rx (port,queue,lcore,thread): rx queues configuration\n" 2651 " --tx (lcore,thread): tx threads configuration\n" 2652 " --stat-lcore LCORE: use lcore for stat collector\n" 2653 " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n" 2654 " --no-numa: optional, disable numa awareness\n" 2655 " --ipv6: optional, specify it if running ipv6 packets\n" 2656 " --enable-jumbo: enable jumbo frame" 2657 " which max packet len is PKTLEN in decimal (64-9600)\n" 2658 " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n" 2659 " --no-lthreads: turn off lthread model\n" 2660 " --parse-ptype: set to use software to analyze packet type\n\n", 2661 prgname); 2662 } 2663 2664 static int parse_max_pkt_len(const char *pktlen) 2665 { 2666 char *end = NULL; 2667 unsigned long len; 2668 2669 /* parse decimal string */ 2670 len = strtoul(pktlen, &end, 10); 2671 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 2672 return -1; 2673 2674 if (len == 0) 2675 return -1; 2676 2677 return len; 2678 } 2679 2680 static int 2681 parse_portmask(const char *portmask) 2682 { 2683 char *end = NULL; 2684 unsigned long pm; 2685 2686 /* parse hexadecimal string */ 2687 pm = strtoul(portmask, &end, 16); 2688 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 2689 return 0; 2690 2691 return pm; 2692 } 2693 2694 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2695 static int 2696 parse_hash_entry_number(const char *hash_entry_num) 2697 { 2698 char *end = NULL; 2699 unsigned long hash_en; 2700 2701 /* parse hexadecimal string */ 2702 hash_en = strtoul(hash_entry_num, &end, 16); 2703 if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0')) 2704 return -1; 2705 2706 if (hash_en == 0) 2707 return -1; 2708 2709 return hash_en; 2710 } 2711 #endif 2712 2713 static int 2714 parse_rx_config(const char *q_arg) 2715 { 2716 char s[256]; 2717 const char *p, *p0 = q_arg; 2718 char *end; 2719 enum fieldnames { 2720 FLD_PORT = 0, 2721 FLD_QUEUE, 2722 FLD_LCORE, 2723 FLD_THREAD, 2724 _NUM_FLD 2725 }; 2726 unsigned long int_fld[_NUM_FLD]; 2727 char *str_fld[_NUM_FLD]; 2728 int i; 2729 unsigned size; 2730 2731 nb_rx_thread_params = 0; 2732 2733 while ((p = strchr(p0, '(')) != NULL) { 2734 ++p; 2735 p0 = strchr(p, ')'); 2736 if (p0 == NULL) 2737 return -1; 2738 2739 size = p0 - p; 2740 if (size >= sizeof(s)) 2741 return -1; 2742 2743 snprintf(s, sizeof(s), "%.*s", size, p); 2744 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) 2745 return -1; 2746 for (i = 0; i < _NUM_FLD; i++) { 2747 errno = 0; 2748 int_fld[i] = strtoul(str_fld[i], &end, 0); 2749 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) 2750 return -1; 2751 } 2752 if (nb_rx_thread_params >= MAX_LCORE_PARAMS) { 2753 printf("exceeded max number of rx params: %hu\n", 2754 nb_rx_thread_params); 2755 return -1; 2756 } 2757 rx_thread_params_array[nb_rx_thread_params].port_id = 2758 int_fld[FLD_PORT]; 2759 rx_thread_params_array[nb_rx_thread_params].queue_id = 2760 (uint8_t)int_fld[FLD_QUEUE]; 2761 rx_thread_params_array[nb_rx_thread_params].lcore_id = 2762 (uint8_t)int_fld[FLD_LCORE]; 2763 rx_thread_params_array[nb_rx_thread_params].thread_id = 2764 (uint8_t)int_fld[FLD_THREAD]; 2765 ++nb_rx_thread_params; 2766 } 2767 rx_thread_params = rx_thread_params_array; 2768 return 0; 2769 } 2770 2771 static int 2772 parse_tx_config(const char *q_arg) 2773 { 2774 char s[256]; 2775 const char *p, *p0 = q_arg; 2776 char *end; 2777 enum fieldnames { 2778 FLD_LCORE = 0, 2779 FLD_THREAD, 2780 _NUM_FLD 2781 }; 2782 unsigned long int_fld[_NUM_FLD]; 2783 char *str_fld[_NUM_FLD]; 2784 int i; 2785 unsigned size; 2786 2787 nb_tx_thread_params = 0; 2788 2789 while ((p = strchr(p0, '(')) != NULL) { 2790 ++p; 2791 p0 = strchr(p, ')'); 2792 if (p0 == NULL) 2793 return -1; 2794 2795 size = p0 - p; 2796 if (size >= sizeof(s)) 2797 return -1; 2798 2799 snprintf(s, sizeof(s), "%.*s", size, p); 2800 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) 2801 return -1; 2802 for (i = 0; i < _NUM_FLD; i++) { 2803 errno = 0; 2804 int_fld[i] = strtoul(str_fld[i], &end, 0); 2805 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) 2806 return -1; 2807 } 2808 if (nb_tx_thread_params >= MAX_LCORE_PARAMS) { 2809 printf("exceeded max number of tx params: %hu\n", 2810 nb_tx_thread_params); 2811 return -1; 2812 } 2813 tx_thread_params_array[nb_tx_thread_params].lcore_id = 2814 (uint8_t)int_fld[FLD_LCORE]; 2815 tx_thread_params_array[nb_tx_thread_params].thread_id = 2816 (uint8_t)int_fld[FLD_THREAD]; 2817 ++nb_tx_thread_params; 2818 } 2819 tx_thread_params = tx_thread_params_array; 2820 2821 return 0; 2822 } 2823 2824 #if (APP_CPU_LOAD > 0) 2825 static int 2826 parse_stat_lcore(const char *stat_lcore) 2827 { 2828 char *end = NULL; 2829 unsigned long lcore_id; 2830 2831 lcore_id = strtoul(stat_lcore, &end, 10); 2832 if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0')) 2833 return -1; 2834 2835 return lcore_id; 2836 } 2837 #endif 2838 2839 static void 2840 parse_eth_dest(const char *optarg) 2841 { 2842 uint16_t portid; 2843 char *port_end; 2844 uint8_t c, *dest, peer_addr[6]; 2845 2846 errno = 0; 2847 portid = strtoul(optarg, &port_end, 10); 2848 if (errno != 0 || port_end == optarg || *port_end++ != ',') 2849 rte_exit(EXIT_FAILURE, 2850 "Invalid eth-dest: %s", optarg); 2851 if (portid >= RTE_MAX_ETHPORTS) 2852 rte_exit(EXIT_FAILURE, 2853 "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", 2854 portid, RTE_MAX_ETHPORTS); 2855 2856 if (cmdline_parse_etheraddr(NULL, port_end, 2857 &peer_addr, sizeof(peer_addr)) < 0) 2858 rte_exit(EXIT_FAILURE, 2859 "Invalid ethernet address: %s\n", 2860 port_end); 2861 dest = (uint8_t *)&dest_eth_addr[portid]; 2862 for (c = 0; c < 6; c++) 2863 dest[c] = peer_addr[c]; 2864 *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; 2865 } 2866 2867 #define CMD_LINE_OPT_RX_CONFIG "rx" 2868 #define CMD_LINE_OPT_TX_CONFIG "tx" 2869 #define CMD_LINE_OPT_STAT_LCORE "stat-lcore" 2870 #define CMD_LINE_OPT_ETH_DEST "eth-dest" 2871 #define CMD_LINE_OPT_NO_NUMA "no-numa" 2872 #define CMD_LINE_OPT_IPV6 "ipv6" 2873 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" 2874 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num" 2875 #define CMD_LINE_OPT_NO_LTHREADS "no-lthreads" 2876 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 2877 2878 /* Parse the argument given in the command line of the application */ 2879 static int 2880 parse_args(int argc, char **argv) 2881 { 2882 int opt, ret; 2883 char **argvopt; 2884 int option_index; 2885 char *prgname = argv[0]; 2886 static struct option lgopts[] = { 2887 {CMD_LINE_OPT_RX_CONFIG, 1, 0, 0}, 2888 {CMD_LINE_OPT_TX_CONFIG, 1, 0, 0}, 2889 {CMD_LINE_OPT_STAT_LCORE, 1, 0, 0}, 2890 {CMD_LINE_OPT_ETH_DEST, 1, 0, 0}, 2891 {CMD_LINE_OPT_NO_NUMA, 0, 0, 0}, 2892 {CMD_LINE_OPT_IPV6, 0, 0, 0}, 2893 {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, 2894 {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0}, 2895 {CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0}, 2896 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 2897 {NULL, 0, 0, 0} 2898 }; 2899 2900 argvopt = argv; 2901 2902 while ((opt = getopt_long(argc, argvopt, "p:P", 2903 lgopts, &option_index)) != EOF) { 2904 2905 switch (opt) { 2906 /* portmask */ 2907 case 'p': 2908 enabled_port_mask = parse_portmask(optarg); 2909 if (enabled_port_mask == 0) { 2910 printf("invalid portmask\n"); 2911 print_usage(prgname); 2912 return -1; 2913 } 2914 break; 2915 case 'P': 2916 printf("Promiscuous mode selected\n"); 2917 promiscuous_on = 1; 2918 break; 2919 2920 /* long options */ 2921 case 0: 2922 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG, 2923 sizeof(CMD_LINE_OPT_RX_CONFIG))) { 2924 ret = parse_rx_config(optarg); 2925 if (ret) { 2926 printf("invalid rx-config\n"); 2927 print_usage(prgname); 2928 return -1; 2929 } 2930 } 2931 2932 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG, 2933 sizeof(CMD_LINE_OPT_TX_CONFIG))) { 2934 ret = parse_tx_config(optarg); 2935 if (ret) { 2936 printf("invalid tx-config\n"); 2937 print_usage(prgname); 2938 return -1; 2939 } 2940 } 2941 2942 #if (APP_CPU_LOAD > 0) 2943 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE, 2944 sizeof(CMD_LINE_OPT_STAT_LCORE))) { 2945 cpu_load_lcore_id = parse_stat_lcore(optarg); 2946 } 2947 #endif 2948 2949 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST, 2950 sizeof(CMD_LINE_OPT_ETH_DEST))) 2951 parse_eth_dest(optarg); 2952 2953 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA, 2954 sizeof(CMD_LINE_OPT_NO_NUMA))) { 2955 printf("numa is disabled\n"); 2956 numa_on = 0; 2957 } 2958 2959 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2960 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6, 2961 sizeof(CMD_LINE_OPT_IPV6))) { 2962 printf("ipv6 is specified\n"); 2963 ipv6 = 1; 2964 } 2965 #endif 2966 2967 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS, 2968 sizeof(CMD_LINE_OPT_NO_LTHREADS))) { 2969 printf("l-threads model is disabled\n"); 2970 lthreads_on = 0; 2971 } 2972 2973 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_PARSE_PTYPE, 2974 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 2975 printf("software packet type parsing enabled\n"); 2976 parse_ptype_on = 1; 2977 } 2978 2979 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO, 2980 sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) { 2981 struct option lenopts = {"max-pkt-len", required_argument, 0, 2982 0}; 2983 2984 printf("jumbo frame is enabled - disabling simple TX path\n"); 2985 port_conf.rxmode.offloads |= 2986 DEV_RX_OFFLOAD_JUMBO_FRAME; 2987 port_conf.txmode.offloads |= 2988 DEV_TX_OFFLOAD_MULTI_SEGS; 2989 2990 /* if no max-pkt-len set, use the default value 2991 * RTE_ETHER_MAX_LEN 2992 */ 2993 if (0 == getopt_long(argc, argvopt, "", &lenopts, 2994 &option_index)) { 2995 2996 ret = parse_max_pkt_len(optarg); 2997 if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) { 2998 printf("invalid packet length\n"); 2999 print_usage(prgname); 3000 return -1; 3001 } 3002 port_conf.rxmode.max_rx_pkt_len = ret; 3003 } 3004 printf("set jumbo frame max packet length to %u\n", 3005 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 3006 } 3007 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 3008 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM, 3009 sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) { 3010 ret = parse_hash_entry_number(optarg); 3011 if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) { 3012 hash_entry_number = ret; 3013 } else { 3014 printf("invalid hash entry number\n"); 3015 print_usage(prgname); 3016 return -1; 3017 } 3018 } 3019 #endif 3020 break; 3021 3022 default: 3023 print_usage(prgname); 3024 return -1; 3025 } 3026 } 3027 3028 if (optind >= 0) 3029 argv[optind-1] = prgname; 3030 3031 ret = optind-1; 3032 optind = 1; /* reset getopt lib */ 3033 return ret; 3034 } 3035 3036 static void 3037 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 3038 { 3039 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 3040 3041 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 3042 printf("%s%s", name, buf); 3043 } 3044 3045 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 3046 3047 static void convert_ipv4_5tuple(struct ipv4_5tuple *key1, 3048 union ipv4_5tuple_host *key2) 3049 { 3050 key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst); 3051 key2->ip_src = rte_cpu_to_be_32(key1->ip_src); 3052 key2->port_dst = rte_cpu_to_be_16(key1->port_dst); 3053 key2->port_src = rte_cpu_to_be_16(key1->port_src); 3054 key2->proto = key1->proto; 3055 key2->pad0 = 0; 3056 key2->pad1 = 0; 3057 } 3058 3059 static void convert_ipv6_5tuple(struct ipv6_5tuple *key1, 3060 union ipv6_5tuple_host *key2) 3061 { 3062 uint32_t i; 3063 3064 for (i = 0; i < 16; i++) { 3065 key2->ip_dst[i] = key1->ip_dst[i]; 3066 key2->ip_src[i] = key1->ip_src[i]; 3067 } 3068 key2->port_dst = rte_cpu_to_be_16(key1->port_dst); 3069 key2->port_src = rte_cpu_to_be_16(key1->port_src); 3070 key2->proto = key1->proto; 3071 key2->pad0 = 0; 3072 key2->pad1 = 0; 3073 key2->reserve = 0; 3074 } 3075 3076 #define BYTE_VALUE_MAX 256 3077 #define ALL_32_BITS 0xffffffff 3078 #define BIT_8_TO_15 0x0000ff00 3079 static inline void 3080 populate_ipv4_few_flow_into_table(const struct rte_hash *h) 3081 { 3082 uint32_t i; 3083 int32_t ret; 3084 uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array); 3085 3086 mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); 3087 for (i = 0; i < array_len; i++) { 3088 struct ipv4_l3fwd_route entry; 3089 union ipv4_5tuple_host newkey; 3090 3091 entry = ipv4_l3fwd_route_array[i]; 3092 convert_ipv4_5tuple(&entry.key, &newkey); 3093 ret = rte_hash_add_key(h, (void *)&newkey); 3094 if (ret < 0) { 3095 rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 3096 " to the l3fwd hash.\n", i); 3097 } 3098 ipv4_l3fwd_out_if[ret] = entry.if_out; 3099 } 3100 printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len); 3101 } 3102 3103 #define BIT_16_TO_23 0x00ff0000 3104 static inline void 3105 populate_ipv6_few_flow_into_table(const struct rte_hash *h) 3106 { 3107 uint32_t i; 3108 int32_t ret; 3109 uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array); 3110 3111 mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); 3112 mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); 3113 for (i = 0; i < array_len; i++) { 3114 struct ipv6_l3fwd_route entry; 3115 union ipv6_5tuple_host newkey; 3116 3117 entry = ipv6_l3fwd_route_array[i]; 3118 convert_ipv6_5tuple(&entry.key, &newkey); 3119 ret = rte_hash_add_key(h, (void *)&newkey); 3120 if (ret < 0) { 3121 rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 3122 " to the l3fwd hash.\n", i); 3123 } 3124 ipv6_l3fwd_out_if[ret] = entry.if_out; 3125 } 3126 printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len); 3127 } 3128 3129 #define NUMBER_PORT_USED 4 3130 static inline void 3131 populate_ipv4_many_flow_into_table(const struct rte_hash *h, 3132 unsigned int nr_flow) 3133 { 3134 unsigned i; 3135 3136 mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); 3137 3138 for (i = 0; i < nr_flow; i++) { 3139 struct ipv4_l3fwd_route entry; 3140 union ipv4_5tuple_host newkey; 3141 uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); 3142 uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % 3143 BYTE_VALUE_MAX); 3144 uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * 3145 BYTE_VALUE_MAX)); 3146 /* Create the ipv4 exact match flow */ 3147 memset(&entry, 0, sizeof(entry)); 3148 switch (i & (NUMBER_PORT_USED - 1)) { 3149 case 0: 3150 entry = ipv4_l3fwd_route_array[0]; 3151 entry.key.ip_dst = RTE_IPV4(101, c, b, a); 3152 break; 3153 case 1: 3154 entry = ipv4_l3fwd_route_array[1]; 3155 entry.key.ip_dst = RTE_IPV4(201, c, b, a); 3156 break; 3157 case 2: 3158 entry = ipv4_l3fwd_route_array[2]; 3159 entry.key.ip_dst = RTE_IPV4(111, c, b, a); 3160 break; 3161 case 3: 3162 entry = ipv4_l3fwd_route_array[3]; 3163 entry.key.ip_dst = RTE_IPV4(211, c, b, a); 3164 break; 3165 }; 3166 convert_ipv4_5tuple(&entry.key, &newkey); 3167 int32_t ret = rte_hash_add_key(h, (void *)&newkey); 3168 3169 if (ret < 0) 3170 rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); 3171 3172 ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out; 3173 3174 } 3175 printf("Hash: Adding 0x%x keys\n", nr_flow); 3176 } 3177 3178 static inline void 3179 populate_ipv6_many_flow_into_table(const struct rte_hash *h, 3180 unsigned int nr_flow) 3181 { 3182 unsigned i; 3183 3184 mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); 3185 mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); 3186 for (i = 0; i < nr_flow; i++) { 3187 struct ipv6_l3fwd_route entry; 3188 union ipv6_5tuple_host newkey; 3189 3190 uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); 3191 uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % 3192 BYTE_VALUE_MAX); 3193 uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * 3194 BYTE_VALUE_MAX)); 3195 3196 /* Create the ipv6 exact match flow */ 3197 memset(&entry, 0, sizeof(entry)); 3198 switch (i & (NUMBER_PORT_USED - 1)) { 3199 case 0: 3200 entry = ipv6_l3fwd_route_array[0]; 3201 break; 3202 case 1: 3203 entry = ipv6_l3fwd_route_array[1]; 3204 break; 3205 case 2: 3206 entry = ipv6_l3fwd_route_array[2]; 3207 break; 3208 case 3: 3209 entry = ipv6_l3fwd_route_array[3]; 3210 break; 3211 }; 3212 entry.key.ip_dst[13] = c; 3213 entry.key.ip_dst[14] = b; 3214 entry.key.ip_dst[15] = a; 3215 convert_ipv6_5tuple(&entry.key, &newkey); 3216 int32_t ret = rte_hash_add_key(h, (void *)&newkey); 3217 3218 if (ret < 0) 3219 rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); 3220 3221 ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out; 3222 3223 } 3224 printf("Hash: Adding 0x%x keys\n", nr_flow); 3225 } 3226 3227 static void 3228 setup_hash(int socketid) 3229 { 3230 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 3231 .name = NULL, 3232 .entries = L3FWD_HASH_ENTRIES, 3233 .key_len = sizeof(union ipv4_5tuple_host), 3234 .hash_func = ipv4_hash_crc, 3235 .hash_func_init_val = 0, 3236 }; 3237 3238 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 3239 .name = NULL, 3240 .entries = L3FWD_HASH_ENTRIES, 3241 .key_len = sizeof(union ipv6_5tuple_host), 3242 .hash_func = ipv6_hash_crc, 3243 .hash_func_init_val = 0, 3244 }; 3245 3246 char s[64]; 3247 3248 /* create ipv4 hash */ 3249 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 3250 ipv4_l3fwd_hash_params.name = s; 3251 ipv4_l3fwd_hash_params.socket_id = socketid; 3252 ipv4_l3fwd_lookup_struct[socketid] = 3253 rte_hash_create(&ipv4_l3fwd_hash_params); 3254 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 3255 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 3256 "socket %d\n", socketid); 3257 3258 /* create ipv6 hash */ 3259 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 3260 ipv6_l3fwd_hash_params.name = s; 3261 ipv6_l3fwd_hash_params.socket_id = socketid; 3262 ipv6_l3fwd_lookup_struct[socketid] = 3263 rte_hash_create(&ipv6_l3fwd_hash_params); 3264 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 3265 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 3266 "socket %d\n", socketid); 3267 3268 if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) { 3269 /* For testing hash matching with a large number of flows we 3270 * generate millions of IP 5-tuples with an incremented dst 3271 * address to initialize the hash table. */ 3272 if (ipv6 == 0) { 3273 /* populate the ipv4 hash */ 3274 populate_ipv4_many_flow_into_table( 3275 ipv4_l3fwd_lookup_struct[socketid], hash_entry_number); 3276 } else { 3277 /* populate the ipv6 hash */ 3278 populate_ipv6_many_flow_into_table( 3279 ipv6_l3fwd_lookup_struct[socketid], hash_entry_number); 3280 } 3281 } else { 3282 /* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize 3283 * the hash table */ 3284 if (ipv6 == 0) { 3285 /* populate the ipv4 hash */ 3286 populate_ipv4_few_flow_into_table( 3287 ipv4_l3fwd_lookup_struct[socketid]); 3288 } else { 3289 /* populate the ipv6 hash */ 3290 populate_ipv6_few_flow_into_table( 3291 ipv6_l3fwd_lookup_struct[socketid]); 3292 } 3293 } 3294 } 3295 #endif 3296 3297 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 3298 static void 3299 setup_lpm(int socketid) 3300 { 3301 struct rte_lpm6_config config; 3302 struct rte_lpm_config lpm_ipv4_config; 3303 unsigned i; 3304 int ret; 3305 char s[64]; 3306 3307 /* create the LPM table */ 3308 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 3309 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 3310 lpm_ipv4_config.number_tbl8s = 256; 3311 lpm_ipv4_config.flags = 0; 3312 ipv4_l3fwd_lookup_struct[socketid] = 3313 rte_lpm_create(s, socketid, &lpm_ipv4_config); 3314 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 3315 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 3316 " on socket %d\n", socketid); 3317 3318 /* populate the LPM table */ 3319 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 3320 3321 /* skip unused ports */ 3322 if ((1 << ipv4_l3fwd_route_array[i].if_out & 3323 enabled_port_mask) == 0) 3324 continue; 3325 3326 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 3327 ipv4_l3fwd_route_array[i].ip, 3328 ipv4_l3fwd_route_array[i].depth, 3329 ipv4_l3fwd_route_array[i].if_out); 3330 3331 if (ret < 0) { 3332 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 3333 "l3fwd LPM table on socket %d\n", 3334 i, socketid); 3335 } 3336 3337 printf("LPM: Adding route 0x%08x / %d (%d)\n", 3338 (unsigned)ipv4_l3fwd_route_array[i].ip, 3339 ipv4_l3fwd_route_array[i].depth, 3340 ipv4_l3fwd_route_array[i].if_out); 3341 } 3342 3343 /* create the LPM6 table */ 3344 snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); 3345 3346 config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; 3347 config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; 3348 config.flags = 0; 3349 ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid, 3350 &config); 3351 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 3352 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 3353 " on socket %d\n", socketid); 3354 3355 /* populate the LPM table */ 3356 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 3357 3358 /* skip unused ports */ 3359 if ((1 << ipv6_l3fwd_route_array[i].if_out & 3360 enabled_port_mask) == 0) 3361 continue; 3362 3363 ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid], 3364 ipv6_l3fwd_route_array[i].ip, 3365 ipv6_l3fwd_route_array[i].depth, 3366 ipv6_l3fwd_route_array[i].if_out); 3367 3368 if (ret < 0) { 3369 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 3370 "l3fwd LPM table on socket %d\n", 3371 i, socketid); 3372 } 3373 3374 printf("LPM: Adding route %s / %d (%d)\n", 3375 "IPV6", 3376 ipv6_l3fwd_route_array[i].depth, 3377 ipv6_l3fwd_route_array[i].if_out); 3378 } 3379 } 3380 #endif 3381 3382 static int 3383 init_mem(unsigned nb_mbuf) 3384 { 3385 struct lcore_conf *qconf; 3386 int socketid; 3387 unsigned lcore_id; 3388 char s[64]; 3389 3390 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 3391 if (rte_lcore_is_enabled(lcore_id) == 0) 3392 continue; 3393 3394 if (numa_on) 3395 socketid = rte_lcore_to_socket_id(lcore_id); 3396 else 3397 socketid = 0; 3398 3399 if (socketid >= NB_SOCKETS) { 3400 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 3401 socketid, lcore_id, NB_SOCKETS); 3402 } 3403 if (pktmbuf_pool[socketid] == NULL) { 3404 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 3405 pktmbuf_pool[socketid] = 3406 rte_pktmbuf_pool_create(s, nb_mbuf, 3407 MEMPOOL_CACHE_SIZE, 0, 3408 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 3409 if (pktmbuf_pool[socketid] == NULL) 3410 rte_exit(EXIT_FAILURE, 3411 "Cannot init mbuf pool on socket %d\n", socketid); 3412 else 3413 printf("Allocated mbuf pool on socket %d\n", socketid); 3414 3415 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 3416 setup_lpm(socketid); 3417 #else 3418 setup_hash(socketid); 3419 #endif 3420 } 3421 qconf = &lcore_conf[lcore_id]; 3422 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 3423 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 3424 } 3425 return 0; 3426 } 3427 3428 /* Check the link status of all ports in up to 9s, and print them finally */ 3429 static void 3430 check_all_ports_link_status(uint32_t port_mask) 3431 { 3432 #define CHECK_INTERVAL 100 /* 100ms */ 3433 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 3434 uint16_t portid; 3435 uint8_t count, all_ports_up, print_flag = 0; 3436 struct rte_eth_link link; 3437 int ret; 3438 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 3439 3440 printf("\nChecking link status"); 3441 fflush(stdout); 3442 for (count = 0; count <= MAX_CHECK_TIME; count++) { 3443 all_ports_up = 1; 3444 RTE_ETH_FOREACH_DEV(portid) { 3445 if ((port_mask & (1 << portid)) == 0) 3446 continue; 3447 memset(&link, 0, sizeof(link)); 3448 ret = rte_eth_link_get_nowait(portid, &link); 3449 if (ret < 0) { 3450 all_ports_up = 0; 3451 if (print_flag == 1) 3452 printf("Port %u link get failed: %s\n", 3453 portid, rte_strerror(-ret)); 3454 continue; 3455 } 3456 /* print link status if flag set */ 3457 if (print_flag == 1) { 3458 rte_eth_link_to_str(link_status_text, 3459 sizeof(link_status_text), &link); 3460 printf("Port %d %s\n", portid, 3461 link_status_text); 3462 continue; 3463 } 3464 /* clear all_ports_up flag if any link down */ 3465 if (link.link_status == ETH_LINK_DOWN) { 3466 all_ports_up = 0; 3467 break; 3468 } 3469 } 3470 /* after finally printing all link status, get out */ 3471 if (print_flag == 1) 3472 break; 3473 3474 if (all_ports_up == 0) { 3475 printf("."); 3476 fflush(stdout); 3477 rte_delay_ms(CHECK_INTERVAL); 3478 } 3479 3480 /* set the print_flag if all ports up or timeout */ 3481 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 3482 print_flag = 1; 3483 printf("done\n"); 3484 } 3485 } 3486 } 3487 3488 int 3489 main(int argc, char **argv) 3490 { 3491 struct rte_eth_dev_info dev_info; 3492 struct rte_eth_txconf *txconf; 3493 int ret; 3494 int i; 3495 unsigned nb_ports; 3496 uint16_t queueid, portid; 3497 unsigned lcore_id; 3498 uint32_t n_tx_queue, nb_lcores; 3499 uint8_t nb_rx_queue, queue, socketid; 3500 3501 /* init EAL */ 3502 ret = rte_eal_init(argc, argv); 3503 if (ret < 0) 3504 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 3505 argc -= ret; 3506 argv += ret; 3507 3508 ret = rte_timer_subsystem_init(); 3509 if (ret < 0) 3510 rte_exit(EXIT_FAILURE, "Failed to initialize timer subystem\n"); 3511 3512 /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ 3513 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 3514 dest_eth_addr[portid] = RTE_ETHER_LOCAL_ADMIN_ADDR + 3515 ((uint64_t)portid << 40); 3516 *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; 3517 } 3518 3519 /* parse application arguments (after the EAL ones) */ 3520 ret = parse_args(argc, argv); 3521 if (ret < 0) 3522 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 3523 3524 if (check_lcore_params() < 0) 3525 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 3526 3527 printf("Initializing rx-queues...\n"); 3528 ret = init_rx_queues(); 3529 if (ret < 0) 3530 rte_exit(EXIT_FAILURE, "init_rx_queues failed\n"); 3531 3532 printf("Initializing tx-threads...\n"); 3533 ret = init_tx_threads(); 3534 if (ret < 0) 3535 rte_exit(EXIT_FAILURE, "init_tx_threads failed\n"); 3536 3537 printf("Initializing rings...\n"); 3538 ret = init_rx_rings(); 3539 if (ret < 0) 3540 rte_exit(EXIT_FAILURE, "init_rx_rings failed\n"); 3541 3542 nb_ports = rte_eth_dev_count_avail(); 3543 3544 if (check_port_config() < 0) 3545 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 3546 3547 nb_lcores = rte_lcore_count(); 3548 3549 /* initialize all ports */ 3550 RTE_ETH_FOREACH_DEV(portid) { 3551 struct rte_eth_conf local_port_conf = port_conf; 3552 3553 /* skip ports that are not enabled */ 3554 if ((enabled_port_mask & (1 << portid)) == 0) { 3555 printf("\nSkipping disabled port %d\n", portid); 3556 continue; 3557 } 3558 3559 /* init port */ 3560 printf("Initializing port %d ... ", portid); 3561 fflush(stdout); 3562 3563 nb_rx_queue = get_port_n_rx_queues(portid); 3564 n_tx_queue = nb_lcores; 3565 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 3566 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 3567 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 3568 nb_rx_queue, (unsigned)n_tx_queue); 3569 3570 ret = rte_eth_dev_info_get(portid, &dev_info); 3571 if (ret != 0) 3572 rte_exit(EXIT_FAILURE, 3573 "Error during getting device (port %u) info: %s\n", 3574 portid, strerror(-ret)); 3575 3576 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 3577 local_port_conf.txmode.offloads |= 3578 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 3579 3580 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 3581 dev_info.flow_type_rss_offloads; 3582 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 3583 port_conf.rx_adv_conf.rss_conf.rss_hf) { 3584 printf("Port %u modified RSS hash function based on hardware support," 3585 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 3586 portid, 3587 port_conf.rx_adv_conf.rss_conf.rss_hf, 3588 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 3589 } 3590 3591 ret = rte_eth_dev_configure(portid, nb_rx_queue, 3592 (uint16_t)n_tx_queue, &local_port_conf); 3593 if (ret < 0) 3594 rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", 3595 ret, portid); 3596 3597 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 3598 &nb_txd); 3599 if (ret < 0) 3600 rte_exit(EXIT_FAILURE, 3601 "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n", 3602 ret, portid); 3603 3604 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 3605 if (ret < 0) 3606 rte_exit(EXIT_FAILURE, 3607 "rte_eth_macaddr_get: err=%d, port=%d\n", 3608 ret, portid); 3609 3610 print_ethaddr(" Address:", &ports_eth_addr[portid]); 3611 printf(", "); 3612 print_ethaddr("Destination:", 3613 (const struct rte_ether_addr *)&dest_eth_addr[portid]); 3614 printf(", "); 3615 3616 /* 3617 * prepare src MACs for each port. 3618 */ 3619 rte_ether_addr_copy(&ports_eth_addr[portid], 3620 (struct rte_ether_addr *)(val_eth + portid) + 1); 3621 3622 /* init memory */ 3623 ret = init_mem(NB_MBUF); 3624 if (ret < 0) 3625 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 3626 3627 /* init one TX queue per couple (lcore,port) */ 3628 queueid = 0; 3629 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 3630 if (rte_lcore_is_enabled(lcore_id) == 0) 3631 continue; 3632 3633 if (numa_on) 3634 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 3635 else 3636 socketid = 0; 3637 3638 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 3639 fflush(stdout); 3640 3641 txconf = &dev_info.default_txconf; 3642 txconf->offloads = local_port_conf.txmode.offloads; 3643 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 3644 socketid, txconf); 3645 if (ret < 0) 3646 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " 3647 "port=%d\n", ret, portid); 3648 3649 tx_thread[lcore_id].tx_queue_id[portid] = queueid; 3650 queueid++; 3651 } 3652 printf("\n"); 3653 } 3654 3655 for (i = 0; i < n_rx_thread; i++) { 3656 lcore_id = rx_thread[i].conf.lcore_id; 3657 3658 if (rte_lcore_is_enabled(lcore_id) == 0) { 3659 rte_exit(EXIT_FAILURE, 3660 "Cannot start Rx thread on lcore %u: lcore disabled\n", 3661 lcore_id 3662 ); 3663 } 3664 3665 printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ", 3666 i, lcore_id); 3667 fflush(stdout); 3668 3669 /* init RX queues */ 3670 for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) { 3671 struct rte_eth_rxconf rxq_conf; 3672 3673 portid = rx_thread[i].rx_queue_list[queue].port_id; 3674 queueid = rx_thread[i].rx_queue_list[queue].queue_id; 3675 3676 if (numa_on) 3677 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 3678 else 3679 socketid = 0; 3680 3681 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 3682 fflush(stdout); 3683 3684 ret = rte_eth_dev_info_get(portid, &dev_info); 3685 if (ret != 0) 3686 rte_exit(EXIT_FAILURE, 3687 "Error during getting device (port %u) info: %s\n", 3688 portid, strerror(-ret)); 3689 3690 rxq_conf = dev_info.default_rxconf; 3691 rxq_conf.offloads = port_conf.rxmode.offloads; 3692 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 3693 socketid, 3694 &rxq_conf, 3695 pktmbuf_pool[socketid]); 3696 if (ret < 0) 3697 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, " 3698 "port=%d\n", ret, portid); 3699 } 3700 } 3701 3702 printf("\n"); 3703 3704 /* start ports */ 3705 RTE_ETH_FOREACH_DEV(portid) { 3706 if ((enabled_port_mask & (1 << portid)) == 0) 3707 continue; 3708 3709 /* Start device */ 3710 ret = rte_eth_dev_start(portid); 3711 if (ret < 0) 3712 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 3713 ret, portid); 3714 3715 /* 3716 * If enabled, put device in promiscuous mode. 3717 * This allows IO forwarding mode to forward packets 3718 * to itself through 2 cross-connected ports of the 3719 * target machine. 3720 */ 3721 if (promiscuous_on) { 3722 ret = rte_eth_promiscuous_enable(portid); 3723 if (ret != 0) 3724 rte_exit(EXIT_FAILURE, 3725 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 3726 rte_strerror(-ret), portid); 3727 } 3728 } 3729 3730 for (i = 0; i < n_rx_thread; i++) { 3731 lcore_id = rx_thread[i].conf.lcore_id; 3732 if (rte_lcore_is_enabled(lcore_id) == 0) 3733 continue; 3734 3735 /* check if hw packet type is supported */ 3736 for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) { 3737 portid = rx_thread[i].rx_queue_list[queue].port_id; 3738 queueid = rx_thread[i].rx_queue_list[queue].queue_id; 3739 3740 if (parse_ptype_on) { 3741 if (!rte_eth_add_rx_callback(portid, queueid, 3742 cb_parse_ptype, NULL)) 3743 rte_exit(EXIT_FAILURE, 3744 "Failed to add rx callback: " 3745 "port=%d\n", portid); 3746 } else if (!check_ptype(portid)) 3747 rte_exit(EXIT_FAILURE, 3748 "Port %d cannot parse packet type.\n\n" 3749 "Please add --parse-ptype to use sw " 3750 "packet type analyzer.\n\n", 3751 portid); 3752 } 3753 } 3754 3755 check_all_ports_link_status(enabled_port_mask); 3756 3757 if (lthreads_on) { 3758 printf("Starting L-Threading Model\n"); 3759 3760 #if (APP_CPU_LOAD > 0) 3761 if (cpu_load_lcore_id > 0) 3762 /* Use one lcore for cpu load collector */ 3763 nb_lcores--; 3764 #endif 3765 3766 lthread_num_schedulers_set(nb_lcores); 3767 rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MAIN); 3768 lthread_main_spawner(NULL); 3769 3770 } else { 3771 printf("Starting P-Threading Model\n"); 3772 /* launch per-lcore init on every lcore */ 3773 rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MAIN); 3774 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3775 if (rte_eal_wait_lcore(lcore_id) < 0) 3776 return -1; 3777 } 3778 } 3779 3780 return 0; 3781 } 3782