1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 16 #include <rte_common.h> 17 #include <rte_vect.h> 18 #include <rte_byteorder.h> 19 #include <rte_log.h> 20 #include <rte_memory.h> 21 #include <rte_memcpy.h> 22 #include <rte_eal.h> 23 #include <rte_launch.h> 24 #include <rte_atomic.h> 25 #include <rte_cycles.h> 26 #include <rte_prefetch.h> 27 #include <rte_lcore.h> 28 #include <rte_per_lcore.h> 29 #include <rte_branch_prediction.h> 30 #include <rte_interrupts.h> 31 #include <rte_random.h> 32 #include <rte_debug.h> 33 #include <rte_ether.h> 34 #include <rte_ethdev.h> 35 #include <rte_ring.h> 36 #include <rte_mempool.h> 37 #include <rte_mbuf.h> 38 #include <rte_ip.h> 39 #include <rte_tcp.h> 40 #include <rte_udp.h> 41 #include <rte_string_fns.h> 42 #include <rte_pause.h> 43 44 #include <cmdline_parse.h> 45 #include <cmdline_parse_etheraddr.h> 46 47 #include <lthread_api.h> 48 49 #define APP_LOOKUP_EXACT_MATCH 0 50 #define APP_LOOKUP_LPM 1 51 #define DO_RFC_1812_CHECKS 52 53 /* Enable cpu-load stats 0-off, 1-on */ 54 #define APP_CPU_LOAD 1 55 56 #ifndef APP_LOOKUP_METHOD 57 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 58 #endif 59 60 #ifndef __GLIBC__ /* sched_getcpu() is glibc specific */ 61 #define sched_getcpu() rte_lcore_id() 62 #endif 63 64 static int 65 check_ptype(int portid) 66 { 67 int i, ret; 68 int ipv4 = 0, ipv6 = 0; 69 70 ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL, 71 0); 72 if (ret <= 0) 73 return 0; 74 75 uint32_t ptypes[ret]; 76 77 ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, 78 ptypes, ret); 79 for (i = 0; i < ret; ++i) { 80 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 81 ipv4 = 1; 82 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 83 ipv6 = 1; 84 } 85 86 if (ipv4 && ipv6) 87 return 1; 88 89 return 0; 90 } 91 92 static inline void 93 parse_ptype(struct rte_mbuf *m) 94 { 95 struct ether_hdr *eth_hdr; 96 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 97 uint16_t ether_type; 98 99 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 100 ether_type = eth_hdr->ether_type; 101 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) 102 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 103 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) 104 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 105 106 m->packet_type = packet_type; 107 } 108 109 static uint16_t 110 cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue, 111 struct rte_mbuf *pkts[], uint16_t nb_pkts, 112 __rte_unused uint16_t max_pkts, __rte_unused void *user_param) 113 { 114 unsigned int i; 115 116 for (i = 0; i < nb_pkts; i++) 117 parse_ptype(pkts[i]); 118 119 return nb_pkts; 120 } 121 122 /* 123 * When set to zero, simple forwaring path is eanbled. 124 * When set to one, optimized forwarding path is enabled. 125 * Note that LPM optimisation path uses SSE4.1 instructions. 126 */ 127 #define ENABLE_MULTI_BUFFER_OPTIMIZE 1 128 129 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 130 #include <rte_hash.h> 131 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 132 #include <rte_lpm.h> 133 #include <rte_lpm6.h> 134 #else 135 #error "APP_LOOKUP_METHOD set to incorrect value" 136 #endif 137 138 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 139 140 #define MAX_JUMBO_PKT_LEN 9600 141 142 #define IPV6_ADDR_LEN 16 143 144 #define MEMPOOL_CACHE_SIZE 256 145 146 /* 147 * This expression is used to calculate the number of mbufs needed depending on 148 * user input, taking into account memory for rx and tx hardware rings, cache 149 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 150 * NB_MBUF never goes below a minimum value of 8192 151 */ 152 153 #define NB_MBUF RTE_MAX(\ 154 (nb_ports*nb_rx_queue*nb_rxd + \ 155 nb_ports*nb_lcores*MAX_PKT_BURST + \ 156 nb_ports*n_tx_queue*nb_txd + \ 157 nb_lcores*MEMPOOL_CACHE_SIZE), \ 158 (unsigned)8192) 159 160 #define MAX_PKT_BURST 32 161 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 162 163 /* 164 * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send. 165 */ 166 #define MAX_TX_BURST (MAX_PKT_BURST / 2) 167 #define BURST_SIZE MAX_TX_BURST 168 169 #define NB_SOCKETS 8 170 171 /* Configure how many packets ahead to prefetch, when reading packets */ 172 #define PREFETCH_OFFSET 3 173 174 /* Used to mark destination port as 'invalid'. */ 175 #define BAD_PORT ((uint16_t)-1) 176 177 #define FWDSTEP 4 178 179 /* 180 * Configurable number of RX/TX ring descriptors 181 */ 182 #define RTE_TEST_RX_DESC_DEFAULT 1024 183 #define RTE_TEST_TX_DESC_DEFAULT 1024 184 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 185 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 186 187 /* ethernet addresses of ports */ 188 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; 189 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 190 191 static xmm_t val_eth[RTE_MAX_ETHPORTS]; 192 193 /* replace first 12B of the ethernet header. */ 194 #define MASK_ETH 0x3f 195 196 /* mask of enabled ports */ 197 static uint32_t enabled_port_mask; 198 static int promiscuous_on; /**< Set in promiscuous mode off by default. */ 199 static int numa_on = 1; /**< NUMA is enabled by default. */ 200 static int parse_ptype_on; 201 202 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 203 static int ipv6; /**< ipv6 is false by default. */ 204 #endif 205 206 #if (APP_CPU_LOAD == 1) 207 208 #define MAX_CPU RTE_MAX_LCORE 209 #define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000) /**< Timeout for collecting 5s */ 210 211 #define CPU_PROCESS 0 212 #define CPU_POLL 1 213 #define MAX_CPU_COUNTER 2 214 215 struct cpu_load { 216 uint16_t n_cpu; 217 uint64_t counter; 218 uint64_t hits[MAX_CPU_COUNTER][MAX_CPU]; 219 } __rte_cache_aligned; 220 221 static struct cpu_load cpu_load; 222 static int cpu_load_lcore_id = -1; 223 224 #define SET_CPU_BUSY(thread, counter) \ 225 thread->conf.busy[counter] = 1 226 227 #define SET_CPU_IDLE(thread, counter) \ 228 thread->conf.busy[counter] = 0 229 230 #define IS_CPU_BUSY(thread, counter) \ 231 (thread->conf.busy[counter] > 0) 232 233 #else 234 235 #define SET_CPU_BUSY(thread, counter) 236 #define SET_CPU_IDLE(thread, counter) 237 #define IS_CPU_BUSY(thread, counter) 0 238 239 #endif 240 241 struct mbuf_table { 242 uint16_t len; 243 struct rte_mbuf *m_table[MAX_PKT_BURST]; 244 }; 245 246 struct lcore_rx_queue { 247 uint16_t port_id; 248 uint8_t queue_id; 249 } __rte_cache_aligned; 250 251 #define MAX_RX_QUEUE_PER_LCORE 16 252 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 253 #define MAX_RX_QUEUE_PER_PORT 128 254 255 #define MAX_LCORE_PARAMS 1024 256 struct rx_thread_params { 257 uint16_t port_id; 258 uint8_t queue_id; 259 uint8_t lcore_id; 260 uint8_t thread_id; 261 } __rte_cache_aligned; 262 263 static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS]; 264 static struct rx_thread_params rx_thread_params_array_default[] = { 265 {0, 0, 2, 0}, 266 {0, 1, 2, 1}, 267 {0, 2, 2, 2}, 268 {1, 0, 2, 3}, 269 {1, 1, 2, 4}, 270 {1, 2, 2, 5}, 271 {2, 0, 2, 6}, 272 {3, 0, 3, 7}, 273 {3, 1, 3, 8}, 274 }; 275 276 static struct rx_thread_params *rx_thread_params = 277 rx_thread_params_array_default; 278 static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default); 279 280 struct tx_thread_params { 281 uint8_t lcore_id; 282 uint8_t thread_id; 283 } __rte_cache_aligned; 284 285 static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS]; 286 static struct tx_thread_params tx_thread_params_array_default[] = { 287 {4, 0}, 288 {5, 1}, 289 {6, 2}, 290 {7, 3}, 291 {8, 4}, 292 {9, 5}, 293 {10, 6}, 294 {11, 7}, 295 {12, 8}, 296 }; 297 298 static struct tx_thread_params *tx_thread_params = 299 tx_thread_params_array_default; 300 static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default); 301 302 static struct rte_eth_conf port_conf = { 303 .rxmode = { 304 .mq_mode = ETH_MQ_RX_RSS, 305 .max_rx_pkt_len = ETHER_MAX_LEN, 306 .split_hdr_size = 0, 307 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 308 }, 309 .rx_adv_conf = { 310 .rss_conf = { 311 .rss_key = NULL, 312 .rss_hf = ETH_RSS_TCP, 313 }, 314 }, 315 .txmode = { 316 .mq_mode = ETH_MQ_TX_NONE, 317 }, 318 }; 319 320 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; 321 322 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 323 324 #include <rte_hash_crc.h> 325 #define DEFAULT_HASH_FUNC rte_hash_crc 326 327 struct ipv4_5tuple { 328 uint32_t ip_dst; 329 uint32_t ip_src; 330 uint16_t port_dst; 331 uint16_t port_src; 332 uint8_t proto; 333 } __attribute__((__packed__)); 334 335 union ipv4_5tuple_host { 336 struct { 337 uint8_t pad0; 338 uint8_t proto; 339 uint16_t pad1; 340 uint32_t ip_src; 341 uint32_t ip_dst; 342 uint16_t port_src; 343 uint16_t port_dst; 344 }; 345 __m128i xmm; 346 }; 347 348 #define XMM_NUM_IN_IPV6_5TUPLE 3 349 350 struct ipv6_5tuple { 351 uint8_t ip_dst[IPV6_ADDR_LEN]; 352 uint8_t ip_src[IPV6_ADDR_LEN]; 353 uint16_t port_dst; 354 uint16_t port_src; 355 uint8_t proto; 356 } __attribute__((__packed__)); 357 358 union ipv6_5tuple_host { 359 struct { 360 uint16_t pad0; 361 uint8_t proto; 362 uint8_t pad1; 363 uint8_t ip_src[IPV6_ADDR_LEN]; 364 uint8_t ip_dst[IPV6_ADDR_LEN]; 365 uint16_t port_src; 366 uint16_t port_dst; 367 uint64_t reserve; 368 }; 369 __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE]; 370 }; 371 372 struct ipv4_l3fwd_route { 373 struct ipv4_5tuple key; 374 uint8_t if_out; 375 }; 376 377 struct ipv6_l3fwd_route { 378 struct ipv6_5tuple key; 379 uint8_t if_out; 380 }; 381 382 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 383 {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0}, 384 {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1}, 385 {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2}, 386 {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3}, 387 }; 388 389 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 390 {{ 391 {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 392 {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 393 0x05}, 394 101, 11, IPPROTO_TCP}, 0}, 395 396 {{ 397 {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 398 {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 399 0x05}, 400 102, 12, IPPROTO_TCP}, 1}, 401 402 {{ 403 {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 404 {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 405 0x05}, 406 101, 11, IPPROTO_TCP}, 2}, 407 408 {{ 409 {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0}, 410 {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 411 0x05}, 412 102, 12, IPPROTO_TCP}, 3}, 413 }; 414 415 typedef struct rte_hash lookup_struct_t; 416 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 417 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 418 419 #ifdef RTE_ARCH_X86_64 420 /* default to 4 million hash entries (approx) */ 421 #define L3FWD_HASH_ENTRIES (1024*1024*4) 422 #else 423 /* 32-bit has less address-space for hugepage memory, limit to 1M entries */ 424 #define L3FWD_HASH_ENTRIES (1024*1024*1) 425 #endif 426 #define HASH_ENTRY_NUMBER_DEFAULT 4 427 428 static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT; 429 430 static inline uint32_t 431 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, 432 uint32_t init_val) 433 { 434 const union ipv4_5tuple_host *k; 435 uint32_t t; 436 const uint32_t *p; 437 438 k = data; 439 t = k->proto; 440 p = (const uint32_t *)&k->port_src; 441 442 init_val = rte_hash_crc_4byte(t, init_val); 443 init_val = rte_hash_crc_4byte(k->ip_src, init_val); 444 init_val = rte_hash_crc_4byte(k->ip_dst, init_val); 445 init_val = rte_hash_crc_4byte(*p, init_val); 446 return init_val; 447 } 448 449 static inline uint32_t 450 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, 451 uint32_t init_val) 452 { 453 const union ipv6_5tuple_host *k; 454 uint32_t t; 455 const uint32_t *p; 456 const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; 457 const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; 458 459 k = data; 460 t = k->proto; 461 p = (const uint32_t *)&k->port_src; 462 463 ip_src0 = (const uint32_t *) k->ip_src; 464 ip_src1 = (const uint32_t *)(k->ip_src + 4); 465 ip_src2 = (const uint32_t *)(k->ip_src + 8); 466 ip_src3 = (const uint32_t *)(k->ip_src + 12); 467 ip_dst0 = (const uint32_t *) k->ip_dst; 468 ip_dst1 = (const uint32_t *)(k->ip_dst + 4); 469 ip_dst2 = (const uint32_t *)(k->ip_dst + 8); 470 ip_dst3 = (const uint32_t *)(k->ip_dst + 12); 471 init_val = rte_hash_crc_4byte(t, init_val); 472 init_val = rte_hash_crc_4byte(*ip_src0, init_val); 473 init_val = rte_hash_crc_4byte(*ip_src1, init_val); 474 init_val = rte_hash_crc_4byte(*ip_src2, init_val); 475 init_val = rte_hash_crc_4byte(*ip_src3, init_val); 476 init_val = rte_hash_crc_4byte(*ip_dst0, init_val); 477 init_val = rte_hash_crc_4byte(*ip_dst1, init_val); 478 init_val = rte_hash_crc_4byte(*ip_dst2, init_val); 479 init_val = rte_hash_crc_4byte(*ip_dst3, init_val); 480 init_val = rte_hash_crc_4byte(*p, init_val); 481 return init_val; 482 } 483 484 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) 485 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) 486 487 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 488 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 489 490 #endif 491 492 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 493 struct ipv4_l3fwd_route { 494 uint32_t ip; 495 uint8_t depth; 496 uint8_t if_out; 497 }; 498 499 struct ipv6_l3fwd_route { 500 uint8_t ip[16]; 501 uint8_t depth; 502 uint8_t if_out; 503 }; 504 505 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 506 {IPv4(1, 1, 1, 0), 24, 0}, 507 {IPv4(2, 1, 1, 0), 24, 1}, 508 {IPv4(3, 1, 1, 0), 24, 2}, 509 {IPv4(4, 1, 1, 0), 24, 3}, 510 {IPv4(5, 1, 1, 0), 24, 4}, 511 {IPv4(6, 1, 1, 0), 24, 5}, 512 {IPv4(7, 1, 1, 0), 24, 6}, 513 {IPv4(8, 1, 1, 0), 24, 7}, 514 }; 515 516 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 517 {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0}, 518 {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1}, 519 {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2}, 520 {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3}, 521 {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4}, 522 {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5}, 523 {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6}, 524 {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7}, 525 }; 526 527 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array) 528 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array) 529 530 #define IPV4_L3FWD_LPM_MAX_RULES 1024 531 #define IPV6_L3FWD_LPM_MAX_RULES 1024 532 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) 533 534 typedef struct rte_lpm lookup_struct_t; 535 typedef struct rte_lpm6 lookup6_struct_t; 536 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 537 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 538 #endif 539 540 struct lcore_conf { 541 lookup_struct_t *ipv4_lookup_struct; 542 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 543 lookup6_struct_t *ipv6_lookup_struct; 544 #else 545 lookup_struct_t *ipv6_lookup_struct; 546 #endif 547 void *data; 548 } __rte_cache_aligned; 549 550 static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; 551 RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf); 552 553 #define MAX_RX_QUEUE_PER_THREAD 16 554 #define MAX_TX_PORT_PER_THREAD RTE_MAX_ETHPORTS 555 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 556 #define MAX_RX_QUEUE_PER_PORT 128 557 558 #define MAX_RX_THREAD 1024 559 #define MAX_TX_THREAD 1024 560 #define MAX_THREAD (MAX_RX_THREAD + MAX_TX_THREAD) 561 562 /** 563 * Producers and consumers threads configuration 564 */ 565 static int lthreads_on = 1; /**< Use lthreads for processing*/ 566 567 rte_atomic16_t rx_counter; /**< Number of spawned rx threads */ 568 rte_atomic16_t tx_counter; /**< Number of spawned tx threads */ 569 570 struct thread_conf { 571 uint16_t lcore_id; /**< Initial lcore for rx thread */ 572 uint16_t cpu_id; /**< Cpu id for cpu load stats counter */ 573 uint16_t thread_id; /**< Thread ID */ 574 575 #if (APP_CPU_LOAD > 0) 576 int busy[MAX_CPU_COUNTER]; 577 #endif 578 }; 579 580 struct thread_rx_conf { 581 struct thread_conf conf; 582 583 uint16_t n_rx_queue; 584 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 585 586 uint16_t n_ring; /**< Number of output rings */ 587 struct rte_ring *ring[RTE_MAX_LCORE]; 588 struct lthread_cond *ready[RTE_MAX_LCORE]; 589 590 #if (APP_CPU_LOAD > 0) 591 int busy[MAX_CPU_COUNTER]; 592 #endif 593 } __rte_cache_aligned; 594 595 uint16_t n_rx_thread; 596 struct thread_rx_conf rx_thread[MAX_RX_THREAD]; 597 598 struct thread_tx_conf { 599 struct thread_conf conf; 600 601 uint16_t tx_queue_id[RTE_MAX_LCORE]; 602 struct mbuf_table tx_mbufs[RTE_MAX_LCORE]; 603 604 struct rte_ring *ring; 605 struct lthread_cond **ready; 606 607 } __rte_cache_aligned; 608 609 uint16_t n_tx_thread; 610 struct thread_tx_conf tx_thread[MAX_TX_THREAD]; 611 612 /* Send burst of packets on an output interface */ 613 static inline int 614 send_burst(struct thread_tx_conf *qconf, uint16_t n, uint16_t port) 615 { 616 struct rte_mbuf **m_table; 617 int ret; 618 uint16_t queueid; 619 620 queueid = qconf->tx_queue_id[port]; 621 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 622 623 ret = rte_eth_tx_burst(port, queueid, m_table, n); 624 if (unlikely(ret < n)) { 625 do { 626 rte_pktmbuf_free(m_table[ret]); 627 } while (++ret < n); 628 } 629 630 return 0; 631 } 632 633 /* Enqueue a single packet, and send burst if queue is filled */ 634 static inline int 635 send_single_packet(struct rte_mbuf *m, uint16_t port) 636 { 637 uint16_t len; 638 struct thread_tx_conf *qconf; 639 640 if (lthreads_on) 641 qconf = (struct thread_tx_conf *)lthread_get_data(); 642 else 643 qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; 644 645 len = qconf->tx_mbufs[port].len; 646 qconf->tx_mbufs[port].m_table[len] = m; 647 len++; 648 649 /* enough pkts to be sent */ 650 if (unlikely(len == MAX_PKT_BURST)) { 651 send_burst(qconf, MAX_PKT_BURST, port); 652 len = 0; 653 } 654 655 qconf->tx_mbufs[port].len = len; 656 return 0; 657 } 658 659 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 660 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 661 static __rte_always_inline void 662 send_packetsx4(uint16_t port, 663 struct rte_mbuf *m[], uint32_t num) 664 { 665 uint32_t len, j, n; 666 struct thread_tx_conf *qconf; 667 668 if (lthreads_on) 669 qconf = (struct thread_tx_conf *)lthread_get_data(); 670 else 671 qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data; 672 673 len = qconf->tx_mbufs[port].len; 674 675 /* 676 * If TX buffer for that queue is empty, and we have enough packets, 677 * then send them straightway. 678 */ 679 if (num >= MAX_TX_BURST && len == 0) { 680 n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); 681 if (unlikely(n < num)) { 682 do { 683 rte_pktmbuf_free(m[n]); 684 } while (++n < num); 685 } 686 return; 687 } 688 689 /* 690 * Put packets into TX buffer for that queue. 691 */ 692 693 n = len + num; 694 n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; 695 696 j = 0; 697 switch (n % FWDSTEP) { 698 while (j < n) { 699 case 0: 700 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 701 j++; 702 /* fall-through */ 703 case 3: 704 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 705 j++; 706 /* fall-through */ 707 case 2: 708 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 709 j++; 710 /* fall-through */ 711 case 1: 712 qconf->tx_mbufs[port].m_table[len + j] = m[j]; 713 j++; 714 } 715 } 716 717 len += n; 718 719 /* enough pkts to be sent */ 720 if (unlikely(len == MAX_PKT_BURST)) { 721 722 send_burst(qconf, MAX_PKT_BURST, port); 723 724 /* copy rest of the packets into the TX buffer. */ 725 len = num - n; 726 j = 0; 727 switch (len % FWDSTEP) { 728 while (j < len) { 729 case 0: 730 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 731 j++; 732 /* fall-through */ 733 case 3: 734 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 735 j++; 736 /* fall-through */ 737 case 2: 738 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 739 j++; 740 /* fall-through */ 741 case 1: 742 qconf->tx_mbufs[port].m_table[j] = m[n + j]; 743 j++; 744 } 745 } 746 } 747 748 qconf->tx_mbufs[port].len = len; 749 } 750 #endif /* APP_LOOKUP_LPM */ 751 752 #ifdef DO_RFC_1812_CHECKS 753 static inline int 754 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) 755 { 756 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 757 /* 758 * 1. The packet length reported by the Link Layer must be large 759 * enough to hold the minimum length legal IP datagram (20 bytes). 760 */ 761 if (link_len < sizeof(struct ipv4_hdr)) 762 return -1; 763 764 /* 2. The IP checksum must be correct. */ 765 /* this is checked in H/W */ 766 767 /* 768 * 3. The IP version number must be 4. If the version number is not 4 769 * then the packet may be another version of IP, such as IPng or 770 * ST-II. 771 */ 772 if (((pkt->version_ihl) >> 4) != 4) 773 return -3; 774 /* 775 * 4. The IP header length field must be large enough to hold the 776 * minimum length legal IP datagram (20 bytes = 5 words). 777 */ 778 if ((pkt->version_ihl & 0xf) < 5) 779 return -4; 780 781 /* 782 * 5. The IP total length field must be large enough to hold the IP 783 * datagram header, whose length is specified in the IP header length 784 * field. 785 */ 786 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) 787 return -5; 788 789 return 0; 790 } 791 #endif 792 793 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 794 795 static __m128i mask0; 796 static __m128i mask1; 797 static __m128i mask2; 798 static inline uint16_t 799 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid, 800 lookup_struct_t *ipv4_l3fwd_lookup_struct) 801 { 802 int ret = 0; 803 union ipv4_5tuple_host key; 804 805 ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live); 806 __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr)); 807 /* Get 5 tuple: dst port, src port, dst IP address, src IP address and 808 protocol */ 809 key.xmm = _mm_and_si128(data, mask0); 810 /* Find destination port */ 811 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 812 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 813 } 814 815 static inline uint16_t 816 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, 817 lookup_struct_t *ipv6_l3fwd_lookup_struct) 818 { 819 int ret = 0; 820 union ipv6_5tuple_host key; 821 822 ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len); 823 __m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr)); 824 __m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + 825 sizeof(__m128i))); 826 __m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) + 827 sizeof(__m128i) + sizeof(__m128i))); 828 /* Get part of 5 tuple: src IP address lower 96 bits and protocol */ 829 key.xmm[0] = _mm_and_si128(data0, mask1); 830 /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address 831 higher 32 bits */ 832 key.xmm[1] = data1; 833 /* Get part of 5 tuple: dst port and src port and dst IP address higher 834 32 bits */ 835 key.xmm[2] = _mm_and_si128(data2, mask2); 836 837 /* Find destination port */ 838 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 839 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 840 } 841 #endif 842 843 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 844 845 static inline uint16_t 846 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid, 847 lookup_struct_t *ipv4_l3fwd_lookup_struct) 848 { 849 uint32_t next_hop; 850 851 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 852 rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr), 853 &next_hop) == 0) ? next_hop : portid); 854 } 855 856 static inline uint16_t 857 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, 858 lookup6_struct_t *ipv6_l3fwd_lookup_struct) 859 { 860 uint32_t next_hop; 861 862 return ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, 863 ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ? 864 next_hop : portid); 865 } 866 #endif 867 868 static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid) 869 __attribute__((unused)); 870 871 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \ 872 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 873 874 #define MASK_ALL_PKTS 0xff 875 #define EXCLUDE_1ST_PKT 0xfe 876 #define EXCLUDE_2ND_PKT 0xfd 877 #define EXCLUDE_3RD_PKT 0xfb 878 #define EXCLUDE_4TH_PKT 0xf7 879 #define EXCLUDE_5TH_PKT 0xef 880 #define EXCLUDE_6TH_PKT 0xdf 881 #define EXCLUDE_7TH_PKT 0xbf 882 #define EXCLUDE_8TH_PKT 0x7f 883 884 static inline void 885 simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid) 886 { 887 struct ether_hdr *eth_hdr[8]; 888 struct ipv4_hdr *ipv4_hdr[8]; 889 uint16_t dst_port[8]; 890 int32_t ret[8]; 891 union ipv4_5tuple_host key[8]; 892 __m128i data[8]; 893 894 eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); 895 eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); 896 eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); 897 eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); 898 eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); 899 eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); 900 eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); 901 eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); 902 903 /* Handle IPv4 headers.*/ 904 ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *, 905 sizeof(struct ether_hdr)); 906 ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *, 907 sizeof(struct ether_hdr)); 908 ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *, 909 sizeof(struct ether_hdr)); 910 ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *, 911 sizeof(struct ether_hdr)); 912 ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *, 913 sizeof(struct ether_hdr)); 914 ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *, 915 sizeof(struct ether_hdr)); 916 ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *, 917 sizeof(struct ether_hdr)); 918 ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *, 919 sizeof(struct ether_hdr)); 920 921 #ifdef DO_RFC_1812_CHECKS 922 /* Check to make sure the packet is valid (RFC1812) */ 923 uint8_t valid_mask = MASK_ALL_PKTS; 924 925 if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) { 926 rte_pktmbuf_free(m[0]); 927 valid_mask &= EXCLUDE_1ST_PKT; 928 } 929 if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) { 930 rte_pktmbuf_free(m[1]); 931 valid_mask &= EXCLUDE_2ND_PKT; 932 } 933 if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) { 934 rte_pktmbuf_free(m[2]); 935 valid_mask &= EXCLUDE_3RD_PKT; 936 } 937 if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) { 938 rte_pktmbuf_free(m[3]); 939 valid_mask &= EXCLUDE_4TH_PKT; 940 } 941 if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) { 942 rte_pktmbuf_free(m[4]); 943 valid_mask &= EXCLUDE_5TH_PKT; 944 } 945 if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) { 946 rte_pktmbuf_free(m[5]); 947 valid_mask &= EXCLUDE_6TH_PKT; 948 } 949 if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) { 950 rte_pktmbuf_free(m[6]); 951 valid_mask &= EXCLUDE_7TH_PKT; 952 } 953 if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) { 954 rte_pktmbuf_free(m[7]); 955 valid_mask &= EXCLUDE_8TH_PKT; 956 } 957 if (unlikely(valid_mask != MASK_ALL_PKTS)) { 958 if (valid_mask == 0) 959 return; 960 961 uint8_t i = 0; 962 963 for (i = 0; i < 8; i++) 964 if ((0x1 << i) & valid_mask) 965 l3fwd_simple_forward(m[i], portid); 966 } 967 #endif /* End of #ifdef DO_RFC_1812_CHECKS */ 968 969 data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, 970 sizeof(struct ether_hdr) + 971 offsetof(struct ipv4_hdr, time_to_live))); 972 data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, 973 sizeof(struct ether_hdr) + 974 offsetof(struct ipv4_hdr, time_to_live))); 975 data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, 976 sizeof(struct ether_hdr) + 977 offsetof(struct ipv4_hdr, time_to_live))); 978 data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, 979 sizeof(struct ether_hdr) + 980 offsetof(struct ipv4_hdr, time_to_live))); 981 data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, 982 sizeof(struct ether_hdr) + 983 offsetof(struct ipv4_hdr, time_to_live))); 984 data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, 985 sizeof(struct ether_hdr) + 986 offsetof(struct ipv4_hdr, time_to_live))); 987 data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, 988 sizeof(struct ether_hdr) + 989 offsetof(struct ipv4_hdr, time_to_live))); 990 data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, 991 sizeof(struct ether_hdr) + 992 offsetof(struct ipv4_hdr, time_to_live))); 993 994 key[0].xmm = _mm_and_si128(data[0], mask0); 995 key[1].xmm = _mm_and_si128(data[1], mask0); 996 key[2].xmm = _mm_and_si128(data[2], mask0); 997 key[3].xmm = _mm_and_si128(data[3], mask0); 998 key[4].xmm = _mm_and_si128(data[4], mask0); 999 key[5].xmm = _mm_and_si128(data[5], mask0); 1000 key[6].xmm = _mm_and_si128(data[6], mask0); 1001 key[7].xmm = _mm_and_si128(data[7], mask0); 1002 1003 const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], 1004 &key[4], &key[5], &key[6], &key[7]}; 1005 1006 rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, 1007 &key_array[0], 8, ret); 1008 dst_port[0] = ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]); 1009 dst_port[1] = ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]); 1010 dst_port[2] = ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]); 1011 dst_port[3] = ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]); 1012 dst_port[4] = ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]); 1013 dst_port[5] = ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]); 1014 dst_port[6] = ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]); 1015 dst_port[7] = ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]); 1016 1017 if (dst_port[0] >= RTE_MAX_ETHPORTS || 1018 (enabled_port_mask & 1 << dst_port[0]) == 0) 1019 dst_port[0] = portid; 1020 if (dst_port[1] >= RTE_MAX_ETHPORTS || 1021 (enabled_port_mask & 1 << dst_port[1]) == 0) 1022 dst_port[1] = portid; 1023 if (dst_port[2] >= RTE_MAX_ETHPORTS || 1024 (enabled_port_mask & 1 << dst_port[2]) == 0) 1025 dst_port[2] = portid; 1026 if (dst_port[3] >= RTE_MAX_ETHPORTS || 1027 (enabled_port_mask & 1 << dst_port[3]) == 0) 1028 dst_port[3] = portid; 1029 if (dst_port[4] >= RTE_MAX_ETHPORTS || 1030 (enabled_port_mask & 1 << dst_port[4]) == 0) 1031 dst_port[4] = portid; 1032 if (dst_port[5] >= RTE_MAX_ETHPORTS || 1033 (enabled_port_mask & 1 << dst_port[5]) == 0) 1034 dst_port[5] = portid; 1035 if (dst_port[6] >= RTE_MAX_ETHPORTS || 1036 (enabled_port_mask & 1 << dst_port[6]) == 0) 1037 dst_port[6] = portid; 1038 if (dst_port[7] >= RTE_MAX_ETHPORTS || 1039 (enabled_port_mask & 1 << dst_port[7]) == 0) 1040 dst_port[7] = portid; 1041 1042 #ifdef DO_RFC_1812_CHECKS 1043 /* Update time to live and header checksum */ 1044 --(ipv4_hdr[0]->time_to_live); 1045 --(ipv4_hdr[1]->time_to_live); 1046 --(ipv4_hdr[2]->time_to_live); 1047 --(ipv4_hdr[3]->time_to_live); 1048 ++(ipv4_hdr[0]->hdr_checksum); 1049 ++(ipv4_hdr[1]->hdr_checksum); 1050 ++(ipv4_hdr[2]->hdr_checksum); 1051 ++(ipv4_hdr[3]->hdr_checksum); 1052 --(ipv4_hdr[4]->time_to_live); 1053 --(ipv4_hdr[5]->time_to_live); 1054 --(ipv4_hdr[6]->time_to_live); 1055 --(ipv4_hdr[7]->time_to_live); 1056 ++(ipv4_hdr[4]->hdr_checksum); 1057 ++(ipv4_hdr[5]->hdr_checksum); 1058 ++(ipv4_hdr[6]->hdr_checksum); 1059 ++(ipv4_hdr[7]->hdr_checksum); 1060 #endif 1061 1062 /* dst addr */ 1063 *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; 1064 *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; 1065 *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; 1066 *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; 1067 *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; 1068 *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; 1069 *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; 1070 *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; 1071 1072 /* src addr */ 1073 ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); 1074 ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); 1075 ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); 1076 ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); 1077 ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); 1078 ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); 1079 ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); 1080 ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); 1081 1082 send_single_packet(m[0], (uint8_t)dst_port[0]); 1083 send_single_packet(m[1], (uint8_t)dst_port[1]); 1084 send_single_packet(m[2], (uint8_t)dst_port[2]); 1085 send_single_packet(m[3], (uint8_t)dst_port[3]); 1086 send_single_packet(m[4], (uint8_t)dst_port[4]); 1087 send_single_packet(m[5], (uint8_t)dst_port[5]); 1088 send_single_packet(m[6], (uint8_t)dst_port[6]); 1089 send_single_packet(m[7], (uint8_t)dst_port[7]); 1090 1091 } 1092 1093 static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0, 1094 __m128i mask1, union ipv6_5tuple_host *key) 1095 { 1096 __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1097 __m128i *, sizeof(struct ether_hdr) + 1098 offsetof(struct ipv6_hdr, payload_len))); 1099 __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1100 __m128i *, sizeof(struct ether_hdr) + 1101 offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i))); 1102 __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, 1103 __m128i *, sizeof(struct ether_hdr) + 1104 offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) + 1105 sizeof(__m128i))); 1106 key->xmm[0] = _mm_and_si128(tmpdata0, mask0); 1107 key->xmm[1] = tmpdata1; 1108 key->xmm[2] = _mm_and_si128(tmpdata2, mask1); 1109 } 1110 1111 static inline void 1112 simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid) 1113 { 1114 int32_t ret[8]; 1115 uint16_t dst_port[8]; 1116 struct ether_hdr *eth_hdr[8]; 1117 union ipv6_5tuple_host key[8]; 1118 1119 __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8]; 1120 1121 eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); 1122 eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); 1123 eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); 1124 eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); 1125 eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); 1126 eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); 1127 eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); 1128 eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); 1129 1130 /* Handle IPv6 headers.*/ 1131 ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *, 1132 sizeof(struct ether_hdr)); 1133 ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *, 1134 sizeof(struct ether_hdr)); 1135 ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *, 1136 sizeof(struct ether_hdr)); 1137 ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *, 1138 sizeof(struct ether_hdr)); 1139 ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *, 1140 sizeof(struct ether_hdr)); 1141 ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *, 1142 sizeof(struct ether_hdr)); 1143 ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *, 1144 sizeof(struct ether_hdr)); 1145 ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *, 1146 sizeof(struct ether_hdr)); 1147 1148 get_ipv6_5tuple(m[0], mask1, mask2, &key[0]); 1149 get_ipv6_5tuple(m[1], mask1, mask2, &key[1]); 1150 get_ipv6_5tuple(m[2], mask1, mask2, &key[2]); 1151 get_ipv6_5tuple(m[3], mask1, mask2, &key[3]); 1152 get_ipv6_5tuple(m[4], mask1, mask2, &key[4]); 1153 get_ipv6_5tuple(m[5], mask1, mask2, &key[5]); 1154 get_ipv6_5tuple(m[6], mask1, mask2, &key[6]); 1155 get_ipv6_5tuple(m[7], mask1, mask2, &key[7]); 1156 1157 const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], 1158 &key[4], &key[5], &key[6], &key[7]}; 1159 1160 rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, 1161 &key_array[0], 4, ret); 1162 dst_port[0] = ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]); 1163 dst_port[1] = ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]); 1164 dst_port[2] = ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]); 1165 dst_port[3] = ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]); 1166 dst_port[4] = ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]); 1167 dst_port[5] = ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]); 1168 dst_port[6] = ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]); 1169 dst_port[7] = ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]); 1170 1171 if (dst_port[0] >= RTE_MAX_ETHPORTS || 1172 (enabled_port_mask & 1 << dst_port[0]) == 0) 1173 dst_port[0] = portid; 1174 if (dst_port[1] >= RTE_MAX_ETHPORTS || 1175 (enabled_port_mask & 1 << dst_port[1]) == 0) 1176 dst_port[1] = portid; 1177 if (dst_port[2] >= RTE_MAX_ETHPORTS || 1178 (enabled_port_mask & 1 << dst_port[2]) == 0) 1179 dst_port[2] = portid; 1180 if (dst_port[3] >= RTE_MAX_ETHPORTS || 1181 (enabled_port_mask & 1 << dst_port[3]) == 0) 1182 dst_port[3] = portid; 1183 if (dst_port[4] >= RTE_MAX_ETHPORTS || 1184 (enabled_port_mask & 1 << dst_port[4]) == 0) 1185 dst_port[4] = portid; 1186 if (dst_port[5] >= RTE_MAX_ETHPORTS || 1187 (enabled_port_mask & 1 << dst_port[5]) == 0) 1188 dst_port[5] = portid; 1189 if (dst_port[6] >= RTE_MAX_ETHPORTS || 1190 (enabled_port_mask & 1 << dst_port[6]) == 0) 1191 dst_port[6] = portid; 1192 if (dst_port[7] >= RTE_MAX_ETHPORTS || 1193 (enabled_port_mask & 1 << dst_port[7]) == 0) 1194 dst_port[7] = portid; 1195 1196 /* dst addr */ 1197 *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; 1198 *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; 1199 *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; 1200 *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; 1201 *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; 1202 *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; 1203 *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; 1204 *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; 1205 1206 /* src addr */ 1207 ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); 1208 ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); 1209 ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); 1210 ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); 1211 ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); 1212 ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); 1213 ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); 1214 ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); 1215 1216 send_single_packet(m[0], dst_port[0]); 1217 send_single_packet(m[1], dst_port[1]); 1218 send_single_packet(m[2], dst_port[2]); 1219 send_single_packet(m[3], dst_port[3]); 1220 send_single_packet(m[4], dst_port[4]); 1221 send_single_packet(m[5], dst_port[5]); 1222 send_single_packet(m[6], dst_port[6]); 1223 send_single_packet(m[7], dst_port[7]); 1224 1225 } 1226 #endif /* APP_LOOKUP_METHOD */ 1227 1228 static __rte_always_inline void 1229 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid) 1230 { 1231 struct ether_hdr *eth_hdr; 1232 struct ipv4_hdr *ipv4_hdr; 1233 uint16_t dst_port; 1234 1235 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 1236 1237 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 1238 /* Handle IPv4 headers.*/ 1239 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, 1240 sizeof(struct ether_hdr)); 1241 1242 #ifdef DO_RFC_1812_CHECKS 1243 /* Check to make sure the packet is valid (RFC1812) */ 1244 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 1245 rte_pktmbuf_free(m); 1246 return; 1247 } 1248 #endif 1249 1250 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 1251 RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct); 1252 if (dst_port >= RTE_MAX_ETHPORTS || 1253 (enabled_port_mask & 1 << dst_port) == 0) 1254 dst_port = portid; 1255 1256 #ifdef DO_RFC_1812_CHECKS 1257 /* Update time to live and header checksum */ 1258 --(ipv4_hdr->time_to_live); 1259 ++(ipv4_hdr->hdr_checksum); 1260 #endif 1261 /* dst addr */ 1262 *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; 1263 1264 /* src addr */ 1265 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 1266 1267 send_single_packet(m, dst_port); 1268 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 1269 /* Handle IPv6 headers.*/ 1270 struct ipv6_hdr *ipv6_hdr; 1271 1272 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, 1273 sizeof(struct ether_hdr)); 1274 1275 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 1276 RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct); 1277 1278 if (dst_port >= RTE_MAX_ETHPORTS || 1279 (enabled_port_mask & 1 << dst_port) == 0) 1280 dst_port = portid; 1281 1282 /* dst addr */ 1283 *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; 1284 1285 /* src addr */ 1286 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 1287 1288 send_single_packet(m, dst_port); 1289 } else 1290 /* Free the mbuf that contains non-IPV4/IPV6 packet */ 1291 rte_pktmbuf_free(m); 1292 } 1293 1294 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1295 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1296 #ifdef DO_RFC_1812_CHECKS 1297 1298 #define IPV4_MIN_VER_IHL 0x45 1299 #define IPV4_MAX_VER_IHL 0x4f 1300 #define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) 1301 1302 /* Minimum value of IPV4 total length (20B) in network byte order. */ 1303 #define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) 1304 1305 /* 1306 * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: 1307 * - The IP version number must be 4. 1308 * - The IP header length field must be large enough to hold the 1309 * minimum length legal IP datagram (20 bytes = 5 words). 1310 * - The IP total length field must be large enough to hold the IP 1311 * datagram header, whose length is specified in the IP header length 1312 * field. 1313 * If we encounter invalid IPV4 packet, then set destination port for it 1314 * to BAD_PORT value. 1315 */ 1316 static __rte_always_inline void 1317 rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) 1318 { 1319 uint8_t ihl; 1320 1321 if (RTE_ETH_IS_IPV4_HDR(ptype)) { 1322 ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; 1323 1324 ipv4_hdr->time_to_live--; 1325 ipv4_hdr->hdr_checksum++; 1326 1327 if (ihl > IPV4_MAX_VER_IHL_DIFF || 1328 ((uint8_t)ipv4_hdr->total_length == 0 && 1329 ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) { 1330 dp[0] = BAD_PORT; 1331 } 1332 } 1333 } 1334 1335 #else 1336 #define rfc1812_process(mb, dp, ptype) do { } while (0) 1337 #endif /* DO_RFC_1812_CHECKS */ 1338 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */ 1339 1340 1341 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1342 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1343 1344 static __rte_always_inline uint16_t 1345 get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint16_t portid) 1346 { 1347 uint32_t next_hop; 1348 struct ipv6_hdr *ipv6_hdr; 1349 struct ether_hdr *eth_hdr; 1350 1351 if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { 1352 return (uint16_t) ((rte_lpm_lookup( 1353 RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dst_ipv4, 1354 &next_hop) == 0) ? next_hop : portid); 1355 1356 } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { 1357 1358 eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); 1359 ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); 1360 1361 return (uint16_t) ((rte_lpm6_lookup( 1362 RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct, 1363 ipv6_hdr->dst_addr, &next_hop) == 0) ? 1364 next_hop : portid); 1365 1366 } 1367 1368 return portid; 1369 } 1370 1371 static inline void 1372 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint16_t portid) 1373 { 1374 struct ether_hdr *eth_hdr; 1375 struct ipv4_hdr *ipv4_hdr; 1376 uint32_t dst_ipv4; 1377 uint16_t dp; 1378 __m128i te, ve; 1379 1380 eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); 1381 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 1382 1383 dst_ipv4 = ipv4_hdr->dst_addr; 1384 dst_ipv4 = rte_be_to_cpu_32(dst_ipv4); 1385 dp = get_dst_port(pkt, dst_ipv4, portid); 1386 1387 te = _mm_load_si128((__m128i *)eth_hdr); 1388 ve = val_eth[dp]; 1389 1390 dst_port[0] = dp; 1391 rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type); 1392 1393 te = _mm_blend_epi16(te, ve, MASK_ETH); 1394 _mm_store_si128((__m128i *)eth_hdr, te); 1395 } 1396 1397 /* 1398 * Read packet_type and destination IPV4 addresses from 4 mbufs. 1399 */ 1400 static inline void 1401 processx4_step1(struct rte_mbuf *pkt[FWDSTEP], 1402 __m128i *dip, 1403 uint32_t *ipv4_flag) 1404 { 1405 struct ipv4_hdr *ipv4_hdr; 1406 struct ether_hdr *eth_hdr; 1407 uint32_t x0, x1, x2, x3; 1408 1409 eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *); 1410 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 1411 x0 = ipv4_hdr->dst_addr; 1412 ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; 1413 1414 eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *); 1415 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 1416 x1 = ipv4_hdr->dst_addr; 1417 ipv4_flag[0] &= pkt[1]->packet_type; 1418 1419 eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *); 1420 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 1421 x2 = ipv4_hdr->dst_addr; 1422 ipv4_flag[0] &= pkt[2]->packet_type; 1423 1424 eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *); 1425 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 1426 x3 = ipv4_hdr->dst_addr; 1427 ipv4_flag[0] &= pkt[3]->packet_type; 1428 1429 dip[0] = _mm_set_epi32(x3, x2, x1, x0); 1430 } 1431 1432 /* 1433 * Lookup into LPM for destination port. 1434 * If lookup fails, use incoming port (portid) as destination port. 1435 */ 1436 static inline void 1437 processx4_step2(__m128i dip, 1438 uint32_t ipv4_flag, 1439 uint16_t portid, 1440 struct rte_mbuf *pkt[FWDSTEP], 1441 uint16_t dprt[FWDSTEP]) 1442 { 1443 rte_xmm_t dst; 1444 const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 1445 4, 5, 6, 7, 0, 1, 2, 3); 1446 1447 /* Byte swap 4 IPV4 addresses. */ 1448 dip = _mm_shuffle_epi8(dip, bswap_mask); 1449 1450 /* if all 4 packets are IPV4. */ 1451 if (likely(ipv4_flag)) { 1452 rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip, 1453 dst.u32, portid); 1454 1455 /* get rid of unused upper 16 bit for each dport. */ 1456 dst.x = _mm_packs_epi32(dst.x, dst.x); 1457 *(uint64_t *)dprt = dst.u64[0]; 1458 } else { 1459 dst.x = dip; 1460 dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid); 1461 dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid); 1462 dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid); 1463 dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid); 1464 } 1465 } 1466 1467 /* 1468 * Update source and destination MAC addresses in the ethernet header. 1469 * Perform RFC1812 checks and updates for IPV4 packets. 1470 */ 1471 static inline void 1472 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) 1473 { 1474 __m128i te[FWDSTEP]; 1475 __m128i ve[FWDSTEP]; 1476 __m128i *p[FWDSTEP]; 1477 1478 p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); 1479 p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); 1480 p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); 1481 p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); 1482 1483 ve[0] = val_eth[dst_port[0]]; 1484 te[0] = _mm_load_si128(p[0]); 1485 1486 ve[1] = val_eth[dst_port[1]]; 1487 te[1] = _mm_load_si128(p[1]); 1488 1489 ve[2] = val_eth[dst_port[2]]; 1490 te[2] = _mm_load_si128(p[2]); 1491 1492 ve[3] = val_eth[dst_port[3]]; 1493 te[3] = _mm_load_si128(p[3]); 1494 1495 /* Update first 12 bytes, keep rest bytes intact. */ 1496 te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); 1497 te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH); 1498 te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); 1499 te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); 1500 1501 _mm_store_si128(p[0], te[0]); 1502 _mm_store_si128(p[1], te[1]); 1503 _mm_store_si128(p[2], te[2]); 1504 _mm_store_si128(p[3], te[3]); 1505 1506 rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), 1507 &dst_port[0], pkt[0]->packet_type); 1508 rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), 1509 &dst_port[1], pkt[1]->packet_type); 1510 rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), 1511 &dst_port[2], pkt[2]->packet_type); 1512 rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), 1513 &dst_port[3], pkt[3]->packet_type); 1514 } 1515 1516 /* 1517 * We group consecutive packets with the same destionation port into one burst. 1518 * To avoid extra latency this is done together with some other packet 1519 * processing, but after we made a final decision about packet's destination. 1520 * To do this we maintain: 1521 * pnum - array of number of consecutive packets with the same dest port for 1522 * each packet in the input burst. 1523 * lp - pointer to the last updated element in the pnum. 1524 * dlp - dest port value lp corresponds to. 1525 */ 1526 1527 #define GRPSZ (1 << FWDSTEP) 1528 #define GRPMSK (GRPSZ - 1) 1529 1530 #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ 1531 if (likely((dlp) == (dcp)[(idx)])) { \ 1532 (lp)[0]++; \ 1533 } else { \ 1534 (dlp) = (dcp)[idx]; \ 1535 (lp) = (pn) + (idx); \ 1536 (lp)[0] = 1; \ 1537 } \ 1538 } while (0) 1539 1540 /* 1541 * Group consecutive packets with the same destination port in bursts of 4. 1542 * Suppose we have array of destionation ports: 1543 * dst_port[] = {a, b, c, d,, e, ... } 1544 * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. 1545 * We doing 4 comparisons at once and the result is 4 bit mask. 1546 * This mask is used as an index into prebuild array of pnum values. 1547 */ 1548 static inline uint16_t * 1549 port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) 1550 { 1551 static const struct { 1552 uint64_t pnum; /* prebuild 4 values for pnum[]. */ 1553 int32_t idx; /* index for new last updated elemnet. */ 1554 uint16_t lpv; /* add value to the last updated element. */ 1555 } gptbl[GRPSZ] = { 1556 { 1557 /* 0: a != b, b != c, c != d, d != e */ 1558 .pnum = UINT64_C(0x0001000100010001), 1559 .idx = 4, 1560 .lpv = 0, 1561 }, 1562 { 1563 /* 1: a == b, b != c, c != d, d != e */ 1564 .pnum = UINT64_C(0x0001000100010002), 1565 .idx = 4, 1566 .lpv = 1, 1567 }, 1568 { 1569 /* 2: a != b, b == c, c != d, d != e */ 1570 .pnum = UINT64_C(0x0001000100020001), 1571 .idx = 4, 1572 .lpv = 0, 1573 }, 1574 { 1575 /* 3: a == b, b == c, c != d, d != e */ 1576 .pnum = UINT64_C(0x0001000100020003), 1577 .idx = 4, 1578 .lpv = 2, 1579 }, 1580 { 1581 /* 4: a != b, b != c, c == d, d != e */ 1582 .pnum = UINT64_C(0x0001000200010001), 1583 .idx = 4, 1584 .lpv = 0, 1585 }, 1586 { 1587 /* 5: a == b, b != c, c == d, d != e */ 1588 .pnum = UINT64_C(0x0001000200010002), 1589 .idx = 4, 1590 .lpv = 1, 1591 }, 1592 { 1593 /* 6: a != b, b == c, c == d, d != e */ 1594 .pnum = UINT64_C(0x0001000200030001), 1595 .idx = 4, 1596 .lpv = 0, 1597 }, 1598 { 1599 /* 7: a == b, b == c, c == d, d != e */ 1600 .pnum = UINT64_C(0x0001000200030004), 1601 .idx = 4, 1602 .lpv = 3, 1603 }, 1604 { 1605 /* 8: a != b, b != c, c != d, d == e */ 1606 .pnum = UINT64_C(0x0002000100010001), 1607 .idx = 3, 1608 .lpv = 0, 1609 }, 1610 { 1611 /* 9: a == b, b != c, c != d, d == e */ 1612 .pnum = UINT64_C(0x0002000100010002), 1613 .idx = 3, 1614 .lpv = 1, 1615 }, 1616 { 1617 /* 0xa: a != b, b == c, c != d, d == e */ 1618 .pnum = UINT64_C(0x0002000100020001), 1619 .idx = 3, 1620 .lpv = 0, 1621 }, 1622 { 1623 /* 0xb: a == b, b == c, c != d, d == e */ 1624 .pnum = UINT64_C(0x0002000100020003), 1625 .idx = 3, 1626 .lpv = 2, 1627 }, 1628 { 1629 /* 0xc: a != b, b != c, c == d, d == e */ 1630 .pnum = UINT64_C(0x0002000300010001), 1631 .idx = 2, 1632 .lpv = 0, 1633 }, 1634 { 1635 /* 0xd: a == b, b != c, c == d, d == e */ 1636 .pnum = UINT64_C(0x0002000300010002), 1637 .idx = 2, 1638 .lpv = 1, 1639 }, 1640 { 1641 /* 0xe: a != b, b == c, c == d, d == e */ 1642 .pnum = UINT64_C(0x0002000300040001), 1643 .idx = 1, 1644 .lpv = 0, 1645 }, 1646 { 1647 /* 0xf: a == b, b == c, c == d, d == e */ 1648 .pnum = UINT64_C(0x0002000300040005), 1649 .idx = 0, 1650 .lpv = 4, 1651 }, 1652 }; 1653 1654 union { 1655 uint16_t u16[FWDSTEP + 1]; 1656 uint64_t u64; 1657 } *pnum = (void *)pn; 1658 1659 int32_t v; 1660 1661 dp1 = _mm_cmpeq_epi16(dp1, dp2); 1662 dp1 = _mm_unpacklo_epi16(dp1, dp1); 1663 v = _mm_movemask_ps((__m128)dp1); 1664 1665 /* update last port counter. */ 1666 lp[0] += gptbl[v].lpv; 1667 1668 /* if dest port value has changed. */ 1669 if (v != GRPMSK) { 1670 pnum->u64 = gptbl[v].pnum; 1671 pnum->u16[FWDSTEP] = 1; 1672 lp = pnum->u16 + gptbl[v].idx; 1673 } 1674 1675 return lp; 1676 } 1677 1678 #endif /* APP_LOOKUP_METHOD */ 1679 1680 static void 1681 process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, 1682 uint16_t portid) 1683 { 1684 1685 int j; 1686 1687 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ 1688 (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) 1689 int32_t k; 1690 uint16_t dlp; 1691 uint16_t *lp; 1692 uint16_t dst_port[MAX_PKT_BURST]; 1693 __m128i dip[MAX_PKT_BURST / FWDSTEP]; 1694 uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; 1695 uint16_t pnum[MAX_PKT_BURST + 1]; 1696 #endif 1697 1698 1699 #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1) 1700 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1701 { 1702 /* 1703 * Send nb_rx - nb_rx%8 packets 1704 * in groups of 8. 1705 */ 1706 int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); 1707 1708 for (j = 0; j < n; j += 8) { 1709 uint32_t pkt_type = 1710 pkts_burst[j]->packet_type & 1711 pkts_burst[j+1]->packet_type & 1712 pkts_burst[j+2]->packet_type & 1713 pkts_burst[j+3]->packet_type & 1714 pkts_burst[j+4]->packet_type & 1715 pkts_burst[j+5]->packet_type & 1716 pkts_burst[j+6]->packet_type & 1717 pkts_burst[j+7]->packet_type; 1718 if (pkt_type & RTE_PTYPE_L3_IPV4) { 1719 simple_ipv4_fwd_8pkts(&pkts_burst[j], portid); 1720 } else if (pkt_type & 1721 RTE_PTYPE_L3_IPV6) { 1722 simple_ipv6_fwd_8pkts(&pkts_burst[j], portid); 1723 } else { 1724 l3fwd_simple_forward(pkts_burst[j], portid); 1725 l3fwd_simple_forward(pkts_burst[j+1], portid); 1726 l3fwd_simple_forward(pkts_burst[j+2], portid); 1727 l3fwd_simple_forward(pkts_burst[j+3], portid); 1728 l3fwd_simple_forward(pkts_burst[j+4], portid); 1729 l3fwd_simple_forward(pkts_burst[j+5], portid); 1730 l3fwd_simple_forward(pkts_burst[j+6], portid); 1731 l3fwd_simple_forward(pkts_burst[j+7], portid); 1732 } 1733 } 1734 for (; j < nb_rx ; j++) 1735 l3fwd_simple_forward(pkts_burst[j], portid); 1736 } 1737 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1738 1739 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1740 for (j = 0; j != k; j += FWDSTEP) 1741 processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], 1742 &ipv4_flag[j / FWDSTEP]); 1743 1744 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1745 for (j = 0; j != k; j += FWDSTEP) 1746 processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], 1747 portid, &pkts_burst[j], &dst_port[j]); 1748 1749 /* 1750 * Finish packet processing and group consecutive 1751 * packets with the same destination port. 1752 */ 1753 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 1754 if (k != 0) { 1755 __m128i dp1, dp2; 1756 1757 lp = pnum; 1758 lp[0] = 1; 1759 1760 processx4_step3(pkts_burst, dst_port); 1761 1762 /* dp1: <d[0], d[1], d[2], d[3], ... > */ 1763 dp1 = _mm_loadu_si128((__m128i *)dst_port); 1764 1765 for (j = FWDSTEP; j != k; j += FWDSTEP) { 1766 processx4_step3(&pkts_burst[j], &dst_port[j]); 1767 1768 /* 1769 * dp2: 1770 * <d[j-3], d[j-2], d[j-1], d[j], ... > 1771 */ 1772 dp2 = _mm_loadu_si128( 1773 (__m128i *)&dst_port[j - FWDSTEP + 1]); 1774 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 1775 1776 /* 1777 * dp1: 1778 * <d[j], d[j+1], d[j+2], d[j+3], ... > 1779 */ 1780 dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) * 1781 sizeof(dst_port[0])); 1782 } 1783 1784 /* 1785 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > 1786 */ 1787 dp2 = _mm_shufflelo_epi16(dp1, 0xf9); 1788 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 1789 1790 /* 1791 * remove values added by the last repeated 1792 * dst port. 1793 */ 1794 lp[0]--; 1795 dlp = dst_port[j - 1]; 1796 } else { 1797 /* set dlp and lp to the never used values. */ 1798 dlp = BAD_PORT - 1; 1799 lp = pnum + MAX_PKT_BURST; 1800 } 1801 1802 /* Process up to last 3 packets one by one. */ 1803 switch (nb_rx % FWDSTEP) { 1804 case 3: 1805 process_packet(pkts_burst[j], dst_port + j, portid); 1806 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1807 j++; 1808 /* fall-through */ 1809 case 2: 1810 process_packet(pkts_burst[j], dst_port + j, portid); 1811 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1812 j++; 1813 /* fall-through */ 1814 case 1: 1815 process_packet(pkts_burst[j], dst_port + j, portid); 1816 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 1817 j++; 1818 } 1819 1820 /* 1821 * Send packets out, through destination port. 1822 * Consecuteve pacekts with the same destination port 1823 * are already grouped together. 1824 * If destination port for the packet equals BAD_PORT, 1825 * then free the packet without sending it out. 1826 */ 1827 for (j = 0; j < nb_rx; j += k) { 1828 1829 int32_t m; 1830 uint16_t pn; 1831 1832 pn = dst_port[j]; 1833 k = pnum[j]; 1834 1835 if (likely(pn != BAD_PORT)) 1836 send_packetsx4(pn, pkts_burst + j, k); 1837 else 1838 for (m = j; m != j + k; m++) 1839 rte_pktmbuf_free(pkts_burst[m]); 1840 1841 } 1842 1843 #endif /* APP_LOOKUP_METHOD */ 1844 #else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */ 1845 1846 /* Prefetch first packets */ 1847 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) 1848 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); 1849 1850 /* Prefetch and forward already prefetched packets */ 1851 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1852 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1853 j + PREFETCH_OFFSET], void *)); 1854 l3fwd_simple_forward(pkts_burst[j], portid); 1855 } 1856 1857 /* Forward remaining prefetched packets */ 1858 for (; j < nb_rx; j++) 1859 l3fwd_simple_forward(pkts_burst[j], portid); 1860 1861 #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */ 1862 1863 } 1864 1865 #if (APP_CPU_LOAD > 0) 1866 1867 /* 1868 * CPU-load stats collector 1869 */ 1870 static int 1871 cpu_load_collector(__rte_unused void *arg) { 1872 unsigned i, j, k; 1873 uint64_t hits; 1874 uint64_t prev_tsc, diff_tsc, cur_tsc; 1875 uint64_t total[MAX_CPU] = { 0 }; 1876 unsigned min_cpu = MAX_CPU; 1877 unsigned max_cpu = 0; 1878 unsigned cpu_id; 1879 int busy_total = 0; 1880 int busy_flag = 0; 1881 1882 unsigned int n_thread_per_cpu[MAX_CPU] = { 0 }; 1883 struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD]; 1884 1885 struct thread_conf *thread_conf; 1886 1887 const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1888 US_PER_S * CPU_LOAD_TIMEOUT_US; 1889 1890 prev_tsc = 0; 1891 /* 1892 * Wait for all threads 1893 */ 1894 1895 printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread, 1896 n_tx_thread); 1897 1898 while (rte_atomic16_read(&rx_counter) < n_rx_thread) 1899 rte_pause(); 1900 1901 while (rte_atomic16_read(&tx_counter) < n_tx_thread) 1902 rte_pause(); 1903 1904 for (i = 0; i < n_rx_thread; i++) { 1905 1906 thread_conf = &rx_thread[i].conf; 1907 cpu_id = thread_conf->cpu_id; 1908 thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; 1909 1910 if (cpu_id > max_cpu) 1911 max_cpu = cpu_id; 1912 if (cpu_id < min_cpu) 1913 min_cpu = cpu_id; 1914 } 1915 for (i = 0; i < n_tx_thread; i++) { 1916 1917 thread_conf = &tx_thread[i].conf; 1918 cpu_id = thread_conf->cpu_id; 1919 thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf; 1920 1921 if (thread_conf->cpu_id > max_cpu) 1922 max_cpu = thread_conf->cpu_id; 1923 if (thread_conf->cpu_id < min_cpu) 1924 min_cpu = thread_conf->cpu_id; 1925 } 1926 1927 while (1) { 1928 1929 cpu_load.counter++; 1930 for (i = min_cpu; i <= max_cpu; i++) { 1931 for (j = 0; j < MAX_CPU_COUNTER; j++) { 1932 for (k = 0; k < n_thread_per_cpu[i]; k++) 1933 if (thread_per_cpu[i][k]->busy[j]) { 1934 busy_flag = 1; 1935 break; 1936 } 1937 if (busy_flag) { 1938 cpu_load.hits[j][i]++; 1939 busy_total = 1; 1940 busy_flag = 0; 1941 } 1942 } 1943 1944 if (busy_total) { 1945 total[i]++; 1946 busy_total = 0; 1947 } 1948 } 1949 1950 cur_tsc = rte_rdtsc(); 1951 1952 diff_tsc = cur_tsc - prev_tsc; 1953 if (unlikely(diff_tsc > interval_tsc)) { 1954 1955 printf("\033c"); 1956 1957 printf("Cpu usage for %d rx threads and %d tx threads:\n\n", 1958 n_rx_thread, n_tx_thread); 1959 1960 printf("cpu# proc%% poll%% overhead%%\n\n"); 1961 1962 for (i = min_cpu; i <= max_cpu; i++) { 1963 hits = 0; 1964 printf("CPU %d:", i); 1965 for (j = 0; j < MAX_CPU_COUNTER; j++) { 1966 printf("%7" PRIu64 "", 1967 cpu_load.hits[j][i] * 100 / cpu_load.counter); 1968 hits += cpu_load.hits[j][i]; 1969 cpu_load.hits[j][i] = 0; 1970 } 1971 printf("%7" PRIu64 "\n", 1972 100 - total[i] * 100 / cpu_load.counter); 1973 total[i] = 0; 1974 } 1975 cpu_load.counter = 0; 1976 1977 prev_tsc = cur_tsc; 1978 } 1979 1980 } 1981 } 1982 #endif /* APP_CPU_LOAD */ 1983 1984 /* 1985 * Null processing lthread loop 1986 * 1987 * This loop is used to start empty scheduler on lcore. 1988 */ 1989 static void * 1990 lthread_null(__rte_unused void *args) 1991 { 1992 int lcore_id = rte_lcore_id(); 1993 1994 RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id); 1995 lthread_exit(NULL); 1996 return NULL; 1997 } 1998 1999 /* main processing loop */ 2000 static void * 2001 lthread_tx_per_ring(void *dummy) 2002 { 2003 int nb_rx; 2004 uint16_t portid; 2005 struct rte_ring *ring; 2006 struct thread_tx_conf *tx_conf; 2007 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2008 struct lthread_cond *ready; 2009 2010 tx_conf = (struct thread_tx_conf *)dummy; 2011 ring = tx_conf->ring; 2012 ready = *tx_conf->ready; 2013 2014 lthread_set_data((void *)tx_conf); 2015 2016 /* 2017 * Move this lthread to lcore 2018 */ 2019 lthread_set_affinity(tx_conf->conf.lcore_id); 2020 2021 RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id()); 2022 2023 nb_rx = 0; 2024 rte_atomic16_inc(&tx_counter); 2025 while (1) { 2026 2027 /* 2028 * Read packet from ring 2029 */ 2030 SET_CPU_BUSY(tx_conf, CPU_POLL); 2031 nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst, 2032 MAX_PKT_BURST, NULL); 2033 SET_CPU_IDLE(tx_conf, CPU_POLL); 2034 2035 if (nb_rx > 0) { 2036 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2037 portid = pkts_burst[0]->port; 2038 process_burst(pkts_burst, nb_rx, portid); 2039 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2040 lthread_yield(); 2041 } else 2042 lthread_cond_wait(ready, 0); 2043 2044 } 2045 return NULL; 2046 } 2047 2048 /* 2049 * Main tx-lthreads spawner lthread. 2050 * 2051 * This lthread is used to spawn one new lthread per ring from producers. 2052 * 2053 */ 2054 static void * 2055 lthread_tx(void *args) 2056 { 2057 struct lthread *lt; 2058 2059 unsigned lcore_id; 2060 uint16_t portid; 2061 struct thread_tx_conf *tx_conf; 2062 2063 tx_conf = (struct thread_tx_conf *)args; 2064 lthread_set_data((void *)tx_conf); 2065 2066 /* 2067 * Move this lthread to the selected lcore 2068 */ 2069 lthread_set_affinity(tx_conf->conf.lcore_id); 2070 2071 /* 2072 * Spawn tx readers (one per input ring) 2073 */ 2074 lthread_create(<, tx_conf->conf.lcore_id, lthread_tx_per_ring, 2075 (void *)tx_conf); 2076 2077 lcore_id = rte_lcore_id(); 2078 2079 RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id); 2080 2081 tx_conf->conf.cpu_id = sched_getcpu(); 2082 while (1) { 2083 2084 lthread_sleep(BURST_TX_DRAIN_US * 1000); 2085 2086 /* 2087 * TX burst queue drain 2088 */ 2089 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 2090 if (tx_conf->tx_mbufs[portid].len == 0) 2091 continue; 2092 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2093 send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); 2094 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2095 tx_conf->tx_mbufs[portid].len = 0; 2096 } 2097 2098 } 2099 return NULL; 2100 } 2101 2102 static void * 2103 lthread_rx(void *dummy) 2104 { 2105 int ret; 2106 uint16_t nb_rx; 2107 int i; 2108 uint16_t portid; 2109 uint8_t queueid; 2110 int worker_id; 2111 int len[RTE_MAX_LCORE] = { 0 }; 2112 int old_len, new_len; 2113 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2114 struct thread_rx_conf *rx_conf; 2115 2116 rx_conf = (struct thread_rx_conf *)dummy; 2117 lthread_set_data((void *)rx_conf); 2118 2119 /* 2120 * Move this lthread to lcore 2121 */ 2122 lthread_set_affinity(rx_conf->conf.lcore_id); 2123 2124 if (rx_conf->n_rx_queue == 0) { 2125 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id()); 2126 return NULL; 2127 } 2128 2129 RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id()); 2130 2131 for (i = 0; i < rx_conf->n_rx_queue; i++) { 2132 2133 portid = rx_conf->rx_queue_list[i].port_id; 2134 queueid = rx_conf->rx_queue_list[i].queue_id; 2135 RTE_LOG(INFO, L3FWD, 2136 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 2137 rte_lcore_id(), portid, queueid); 2138 } 2139 2140 /* 2141 * Init all condition variables (one per rx thread) 2142 */ 2143 for (i = 0; i < rx_conf->n_rx_queue; i++) 2144 lthread_cond_init(NULL, &rx_conf->ready[i], NULL); 2145 2146 worker_id = 0; 2147 2148 rx_conf->conf.cpu_id = sched_getcpu(); 2149 rte_atomic16_inc(&rx_counter); 2150 while (1) { 2151 2152 /* 2153 * Read packet from RX queues 2154 */ 2155 for (i = 0; i < rx_conf->n_rx_queue; ++i) { 2156 portid = rx_conf->rx_queue_list[i].port_id; 2157 queueid = rx_conf->rx_queue_list[i].queue_id; 2158 2159 SET_CPU_BUSY(rx_conf, CPU_POLL); 2160 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 2161 MAX_PKT_BURST); 2162 SET_CPU_IDLE(rx_conf, CPU_POLL); 2163 2164 if (nb_rx != 0) { 2165 worker_id = (worker_id + 1) % rx_conf->n_ring; 2166 old_len = len[worker_id]; 2167 2168 SET_CPU_BUSY(rx_conf, CPU_PROCESS); 2169 ret = rte_ring_sp_enqueue_burst( 2170 rx_conf->ring[worker_id], 2171 (void **) pkts_burst, 2172 nb_rx, NULL); 2173 2174 new_len = old_len + ret; 2175 2176 if (new_len >= BURST_SIZE) { 2177 lthread_cond_signal(rx_conf->ready[worker_id]); 2178 new_len = 0; 2179 } 2180 2181 len[worker_id] = new_len; 2182 2183 if (unlikely(ret < nb_rx)) { 2184 uint32_t k; 2185 2186 for (k = ret; k < nb_rx; k++) { 2187 struct rte_mbuf *m = pkts_burst[k]; 2188 2189 rte_pktmbuf_free(m); 2190 } 2191 } 2192 SET_CPU_IDLE(rx_conf, CPU_PROCESS); 2193 } 2194 2195 lthread_yield(); 2196 } 2197 } 2198 return NULL; 2199 } 2200 2201 /* 2202 * Start scheduler with initial lthread on lcore 2203 * 2204 * This lthread loop spawns all rx and tx lthreads on master lcore 2205 */ 2206 2207 static void * 2208 lthread_spawner(__rte_unused void *arg) 2209 { 2210 struct lthread *lt[MAX_THREAD]; 2211 int i; 2212 int n_thread = 0; 2213 2214 printf("Entering lthread_spawner\n"); 2215 2216 /* 2217 * Create producers (rx threads) on default lcore 2218 */ 2219 for (i = 0; i < n_rx_thread; i++) { 2220 rx_thread[i].conf.thread_id = i; 2221 lthread_create(<[n_thread], -1, lthread_rx, 2222 (void *)&rx_thread[i]); 2223 n_thread++; 2224 } 2225 2226 /* 2227 * Wait for all producers. Until some producers can be started on the same 2228 * scheduler as this lthread, yielding is required to let them to run and 2229 * prevent deadlock here. 2230 */ 2231 while (rte_atomic16_read(&rx_counter) < n_rx_thread) 2232 lthread_sleep(100000); 2233 2234 /* 2235 * Create consumers (tx threads) on default lcore_id 2236 */ 2237 for (i = 0; i < n_tx_thread; i++) { 2238 tx_thread[i].conf.thread_id = i; 2239 lthread_create(<[n_thread], -1, lthread_tx, 2240 (void *)&tx_thread[i]); 2241 n_thread++; 2242 } 2243 2244 /* 2245 * Wait for all threads finished 2246 */ 2247 for (i = 0; i < n_thread; i++) 2248 lthread_join(lt[i], NULL); 2249 2250 return NULL; 2251 } 2252 2253 /* 2254 * Start master scheduler with initial lthread spawning rx and tx lthreads 2255 * (main_lthread_master). 2256 */ 2257 static int 2258 lthread_master_spawner(__rte_unused void *arg) { 2259 struct lthread *lt; 2260 int lcore_id = rte_lcore_id(); 2261 2262 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2263 lthread_create(<, -1, lthread_spawner, NULL); 2264 lthread_run(); 2265 2266 return 0; 2267 } 2268 2269 /* 2270 * Start scheduler on lcore. 2271 */ 2272 static int 2273 sched_spawner(__rte_unused void *arg) { 2274 struct lthread *lt; 2275 int lcore_id = rte_lcore_id(); 2276 2277 #if (APP_CPU_LOAD) 2278 if (lcore_id == cpu_load_lcore_id) { 2279 cpu_load_collector(arg); 2280 return 0; 2281 } 2282 #endif /* APP_CPU_LOAD */ 2283 2284 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2285 lthread_create(<, -1, lthread_null, NULL); 2286 lthread_run(); 2287 2288 return 0; 2289 } 2290 2291 /* main processing loop */ 2292 static int 2293 pthread_tx(void *dummy) 2294 { 2295 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2296 uint64_t prev_tsc, diff_tsc, cur_tsc; 2297 int nb_rx; 2298 uint16_t portid; 2299 struct thread_tx_conf *tx_conf; 2300 2301 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 2302 US_PER_S * BURST_TX_DRAIN_US; 2303 2304 prev_tsc = 0; 2305 2306 tx_conf = (struct thread_tx_conf *)dummy; 2307 2308 RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id()); 2309 2310 tx_conf->conf.cpu_id = sched_getcpu(); 2311 rte_atomic16_inc(&tx_counter); 2312 while (1) { 2313 2314 cur_tsc = rte_rdtsc(); 2315 2316 /* 2317 * TX burst queue drain 2318 */ 2319 diff_tsc = cur_tsc - prev_tsc; 2320 if (unlikely(diff_tsc > drain_tsc)) { 2321 2322 /* 2323 * This could be optimized (use queueid instead of 2324 * portid), but it is not called so often 2325 */ 2326 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2327 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 2328 if (tx_conf->tx_mbufs[portid].len == 0) 2329 continue; 2330 send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid); 2331 tx_conf->tx_mbufs[portid].len = 0; 2332 } 2333 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2334 2335 prev_tsc = cur_tsc; 2336 } 2337 2338 /* 2339 * Read packet from ring 2340 */ 2341 SET_CPU_BUSY(tx_conf, CPU_POLL); 2342 nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring, 2343 (void **)pkts_burst, MAX_PKT_BURST, NULL); 2344 SET_CPU_IDLE(tx_conf, CPU_POLL); 2345 2346 if (unlikely(nb_rx == 0)) { 2347 sched_yield(); 2348 continue; 2349 } 2350 2351 SET_CPU_BUSY(tx_conf, CPU_PROCESS); 2352 portid = pkts_burst[0]->port; 2353 process_burst(pkts_burst, nb_rx, portid); 2354 SET_CPU_IDLE(tx_conf, CPU_PROCESS); 2355 2356 } 2357 } 2358 2359 static int 2360 pthread_rx(void *dummy) 2361 { 2362 int i; 2363 int worker_id; 2364 uint32_t n; 2365 uint32_t nb_rx; 2366 unsigned lcore_id; 2367 uint8_t queueid; 2368 uint16_t portid; 2369 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2370 2371 struct thread_rx_conf *rx_conf; 2372 2373 lcore_id = rte_lcore_id(); 2374 rx_conf = (struct thread_rx_conf *)dummy; 2375 2376 if (rx_conf->n_rx_queue == 0) { 2377 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); 2378 return 0; 2379 } 2380 2381 RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id); 2382 2383 for (i = 0; i < rx_conf->n_rx_queue; i++) { 2384 2385 portid = rx_conf->rx_queue_list[i].port_id; 2386 queueid = rx_conf->rx_queue_list[i].queue_id; 2387 RTE_LOG(INFO, L3FWD, 2388 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 2389 lcore_id, portid, queueid); 2390 } 2391 2392 worker_id = 0; 2393 rx_conf->conf.cpu_id = sched_getcpu(); 2394 rte_atomic16_inc(&rx_counter); 2395 while (1) { 2396 2397 /* 2398 * Read packet from RX queues 2399 */ 2400 for (i = 0; i < rx_conf->n_rx_queue; ++i) { 2401 portid = rx_conf->rx_queue_list[i].port_id; 2402 queueid = rx_conf->rx_queue_list[i].queue_id; 2403 2404 SET_CPU_BUSY(rx_conf, CPU_POLL); 2405 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 2406 MAX_PKT_BURST); 2407 SET_CPU_IDLE(rx_conf, CPU_POLL); 2408 2409 if (nb_rx == 0) { 2410 sched_yield(); 2411 continue; 2412 } 2413 2414 SET_CPU_BUSY(rx_conf, CPU_PROCESS); 2415 worker_id = (worker_id + 1) % rx_conf->n_ring; 2416 n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id], 2417 (void **)pkts_burst, nb_rx, NULL); 2418 2419 if (unlikely(n != nb_rx)) { 2420 uint32_t k; 2421 2422 for (k = n; k < nb_rx; k++) { 2423 struct rte_mbuf *m = pkts_burst[k]; 2424 2425 rte_pktmbuf_free(m); 2426 } 2427 } 2428 2429 SET_CPU_IDLE(rx_conf, CPU_PROCESS); 2430 2431 } 2432 } 2433 } 2434 2435 /* 2436 * P-Thread spawner. 2437 */ 2438 static int 2439 pthread_run(__rte_unused void *arg) { 2440 int lcore_id = rte_lcore_id(); 2441 int i; 2442 2443 for (i = 0; i < n_rx_thread; i++) 2444 if (rx_thread[i].conf.lcore_id == lcore_id) { 2445 printf("Start rx thread on %d...\n", lcore_id); 2446 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2447 RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i]; 2448 pthread_rx((void *)&rx_thread[i]); 2449 return 0; 2450 } 2451 2452 for (i = 0; i < n_tx_thread; i++) 2453 if (tx_thread[i].conf.lcore_id == lcore_id) { 2454 printf("Start tx thread on %d...\n", lcore_id); 2455 RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id]; 2456 RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i]; 2457 pthread_tx((void *)&tx_thread[i]); 2458 return 0; 2459 } 2460 2461 #if (APP_CPU_LOAD) 2462 if (lcore_id == cpu_load_lcore_id) 2463 cpu_load_collector(arg); 2464 #endif /* APP_CPU_LOAD */ 2465 2466 return 0; 2467 } 2468 2469 static int 2470 check_lcore_params(void) 2471 { 2472 uint8_t queue, lcore; 2473 uint16_t i; 2474 int socketid; 2475 2476 for (i = 0; i < nb_rx_thread_params; ++i) { 2477 queue = rx_thread_params[i].queue_id; 2478 if (queue >= MAX_RX_QUEUE_PER_PORT) { 2479 printf("invalid queue number: %hhu\n", queue); 2480 return -1; 2481 } 2482 lcore = rx_thread_params[i].lcore_id; 2483 if (!rte_lcore_is_enabled(lcore)) { 2484 printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); 2485 return -1; 2486 } 2487 socketid = rte_lcore_to_socket_id(lcore); 2488 if ((socketid != 0) && (numa_on == 0)) 2489 printf("warning: lcore %hhu is on socket %d with numa off\n", 2490 lcore, socketid); 2491 } 2492 return 0; 2493 } 2494 2495 static int 2496 check_port_config(void) 2497 { 2498 unsigned portid; 2499 uint16_t i; 2500 2501 for (i = 0; i < nb_rx_thread_params; ++i) { 2502 portid = rx_thread_params[i].port_id; 2503 if ((enabled_port_mask & (1 << portid)) == 0) { 2504 printf("port %u is not enabled in port mask\n", portid); 2505 return -1; 2506 } 2507 if (!rte_eth_dev_is_valid_port(portid)) { 2508 printf("port %u is not present on the board\n", portid); 2509 return -1; 2510 } 2511 } 2512 return 0; 2513 } 2514 2515 static uint8_t 2516 get_port_n_rx_queues(const uint16_t port) 2517 { 2518 int queue = -1; 2519 uint16_t i; 2520 2521 for (i = 0; i < nb_rx_thread_params; ++i) 2522 if (rx_thread_params[i].port_id == port && 2523 rx_thread_params[i].queue_id > queue) 2524 queue = rx_thread_params[i].queue_id; 2525 2526 return (uint8_t)(++queue); 2527 } 2528 2529 static int 2530 init_rx_rings(void) 2531 { 2532 unsigned socket_io; 2533 struct thread_rx_conf *rx_conf; 2534 struct thread_tx_conf *tx_conf; 2535 unsigned rx_thread_id, tx_thread_id; 2536 char name[256]; 2537 struct rte_ring *ring = NULL; 2538 2539 for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) { 2540 2541 tx_conf = &tx_thread[tx_thread_id]; 2542 2543 printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id, 2544 tx_conf->conf.thread_id); 2545 2546 rx_thread_id = tx_conf->conf.thread_id; 2547 if (rx_thread_id > n_tx_thread) { 2548 printf("connection from tx-thread %u to rx-thread %u fails " 2549 "(rx-thread not defined)\n", tx_thread_id, rx_thread_id); 2550 return -1; 2551 } 2552 2553 rx_conf = &rx_thread[rx_thread_id]; 2554 socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id); 2555 2556 snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u", 2557 socket_io, rx_thread_id, tx_thread_id); 2558 2559 ring = rte_ring_create(name, 1024 * 4, socket_io, 2560 RING_F_SP_ENQ | RING_F_SC_DEQ); 2561 2562 if (ring == NULL) { 2563 rte_panic("Cannot create ring to connect rx-thread %u " 2564 "with tx-thread %u\n", rx_thread_id, tx_thread_id); 2565 } 2566 2567 rx_conf->ring[rx_conf->n_ring] = ring; 2568 2569 tx_conf->ring = ring; 2570 tx_conf->ready = &rx_conf->ready[rx_conf->n_ring]; 2571 2572 rx_conf->n_ring++; 2573 } 2574 return 0; 2575 } 2576 2577 static int 2578 init_rx_queues(void) 2579 { 2580 uint16_t i, nb_rx_queue; 2581 uint8_t thread; 2582 2583 n_rx_thread = 0; 2584 2585 for (i = 0; i < nb_rx_thread_params; ++i) { 2586 thread = rx_thread_params[i].thread_id; 2587 nb_rx_queue = rx_thread[thread].n_rx_queue; 2588 2589 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 2590 printf("error: too many queues (%u) for thread: %u\n", 2591 (unsigned)nb_rx_queue + 1, (unsigned)thread); 2592 return -1; 2593 } 2594 2595 rx_thread[thread].conf.thread_id = thread; 2596 rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id; 2597 rx_thread[thread].rx_queue_list[nb_rx_queue].port_id = 2598 rx_thread_params[i].port_id; 2599 rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id = 2600 rx_thread_params[i].queue_id; 2601 rx_thread[thread].n_rx_queue++; 2602 2603 if (thread >= n_rx_thread) 2604 n_rx_thread = thread + 1; 2605 2606 } 2607 return 0; 2608 } 2609 2610 static int 2611 init_tx_threads(void) 2612 { 2613 int i; 2614 2615 n_tx_thread = 0; 2616 for (i = 0; i < nb_tx_thread_params; ++i) { 2617 tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id; 2618 tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id; 2619 n_tx_thread++; 2620 } 2621 return 0; 2622 } 2623 2624 /* display usage */ 2625 static void 2626 print_usage(const char *prgname) 2627 { 2628 printf("%s [EAL options] -- -p PORTMASK -P" 2629 " [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]" 2630 " [--tx (lcore,thread)[,(lcore,thread]]" 2631 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 2632 " [--parse-ptype]\n\n" 2633 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 2634 " -P : enable promiscuous mode\n" 2635 " --rx (port,queue,lcore,thread): rx queues configuration\n" 2636 " --tx (lcore,thread): tx threads configuration\n" 2637 " --stat-lcore LCORE: use lcore for stat collector\n" 2638 " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n" 2639 " --no-numa: optional, disable numa awareness\n" 2640 " --ipv6: optional, specify it if running ipv6 packets\n" 2641 " --enable-jumbo: enable jumbo frame" 2642 " which max packet len is PKTLEN in decimal (64-9600)\n" 2643 " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n" 2644 " --no-lthreads: turn off lthread model\n" 2645 " --parse-ptype: set to use software to analyze packet type\n\n", 2646 prgname); 2647 } 2648 2649 static int parse_max_pkt_len(const char *pktlen) 2650 { 2651 char *end = NULL; 2652 unsigned long len; 2653 2654 /* parse decimal string */ 2655 len = strtoul(pktlen, &end, 10); 2656 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 2657 return -1; 2658 2659 if (len == 0) 2660 return -1; 2661 2662 return len; 2663 } 2664 2665 static int 2666 parse_portmask(const char *portmask) 2667 { 2668 char *end = NULL; 2669 unsigned long pm; 2670 2671 /* parse hexadecimal string */ 2672 pm = strtoul(portmask, &end, 16); 2673 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 2674 return -1; 2675 2676 if (pm == 0) 2677 return -1; 2678 2679 return pm; 2680 } 2681 2682 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2683 static int 2684 parse_hash_entry_number(const char *hash_entry_num) 2685 { 2686 char *end = NULL; 2687 unsigned long hash_en; 2688 2689 /* parse hexadecimal string */ 2690 hash_en = strtoul(hash_entry_num, &end, 16); 2691 if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0')) 2692 return -1; 2693 2694 if (hash_en == 0) 2695 return -1; 2696 2697 return hash_en; 2698 } 2699 #endif 2700 2701 static int 2702 parse_rx_config(const char *q_arg) 2703 { 2704 char s[256]; 2705 const char *p, *p0 = q_arg; 2706 char *end; 2707 enum fieldnames { 2708 FLD_PORT = 0, 2709 FLD_QUEUE, 2710 FLD_LCORE, 2711 FLD_THREAD, 2712 _NUM_FLD 2713 }; 2714 unsigned long int_fld[_NUM_FLD]; 2715 char *str_fld[_NUM_FLD]; 2716 int i; 2717 unsigned size; 2718 2719 nb_rx_thread_params = 0; 2720 2721 while ((p = strchr(p0, '(')) != NULL) { 2722 ++p; 2723 p0 = strchr(p, ')'); 2724 if (p0 == NULL) 2725 return -1; 2726 2727 size = p0 - p; 2728 if (size >= sizeof(s)) 2729 return -1; 2730 2731 snprintf(s, sizeof(s), "%.*s", size, p); 2732 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) 2733 return -1; 2734 for (i = 0; i < _NUM_FLD; i++) { 2735 errno = 0; 2736 int_fld[i] = strtoul(str_fld[i], &end, 0); 2737 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) 2738 return -1; 2739 } 2740 if (nb_rx_thread_params >= MAX_LCORE_PARAMS) { 2741 printf("exceeded max number of rx params: %hu\n", 2742 nb_rx_thread_params); 2743 return -1; 2744 } 2745 rx_thread_params_array[nb_rx_thread_params].port_id = 2746 int_fld[FLD_PORT]; 2747 rx_thread_params_array[nb_rx_thread_params].queue_id = 2748 (uint8_t)int_fld[FLD_QUEUE]; 2749 rx_thread_params_array[nb_rx_thread_params].lcore_id = 2750 (uint8_t)int_fld[FLD_LCORE]; 2751 rx_thread_params_array[nb_rx_thread_params].thread_id = 2752 (uint8_t)int_fld[FLD_THREAD]; 2753 ++nb_rx_thread_params; 2754 } 2755 rx_thread_params = rx_thread_params_array; 2756 return 0; 2757 } 2758 2759 static int 2760 parse_tx_config(const char *q_arg) 2761 { 2762 char s[256]; 2763 const char *p, *p0 = q_arg; 2764 char *end; 2765 enum fieldnames { 2766 FLD_LCORE = 0, 2767 FLD_THREAD, 2768 _NUM_FLD 2769 }; 2770 unsigned long int_fld[_NUM_FLD]; 2771 char *str_fld[_NUM_FLD]; 2772 int i; 2773 unsigned size; 2774 2775 nb_tx_thread_params = 0; 2776 2777 while ((p = strchr(p0, '(')) != NULL) { 2778 ++p; 2779 p0 = strchr(p, ')'); 2780 if (p0 == NULL) 2781 return -1; 2782 2783 size = p0 - p; 2784 if (size >= sizeof(s)) 2785 return -1; 2786 2787 snprintf(s, sizeof(s), "%.*s", size, p); 2788 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) 2789 return -1; 2790 for (i = 0; i < _NUM_FLD; i++) { 2791 errno = 0; 2792 int_fld[i] = strtoul(str_fld[i], &end, 0); 2793 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) 2794 return -1; 2795 } 2796 if (nb_tx_thread_params >= MAX_LCORE_PARAMS) { 2797 printf("exceeded max number of tx params: %hu\n", 2798 nb_tx_thread_params); 2799 return -1; 2800 } 2801 tx_thread_params_array[nb_tx_thread_params].lcore_id = 2802 (uint8_t)int_fld[FLD_LCORE]; 2803 tx_thread_params_array[nb_tx_thread_params].thread_id = 2804 (uint8_t)int_fld[FLD_THREAD]; 2805 ++nb_tx_thread_params; 2806 } 2807 tx_thread_params = tx_thread_params_array; 2808 2809 return 0; 2810 } 2811 2812 #if (APP_CPU_LOAD > 0) 2813 static int 2814 parse_stat_lcore(const char *stat_lcore) 2815 { 2816 char *end = NULL; 2817 unsigned long lcore_id; 2818 2819 lcore_id = strtoul(stat_lcore, &end, 10); 2820 if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0')) 2821 return -1; 2822 2823 return lcore_id; 2824 } 2825 #endif 2826 2827 static void 2828 parse_eth_dest(const char *optarg) 2829 { 2830 uint16_t portid; 2831 char *port_end; 2832 uint8_t c, *dest, peer_addr[6]; 2833 2834 errno = 0; 2835 portid = strtoul(optarg, &port_end, 10); 2836 if (errno != 0 || port_end == optarg || *port_end++ != ',') 2837 rte_exit(EXIT_FAILURE, 2838 "Invalid eth-dest: %s", optarg); 2839 if (portid >= RTE_MAX_ETHPORTS) 2840 rte_exit(EXIT_FAILURE, 2841 "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", 2842 portid, RTE_MAX_ETHPORTS); 2843 2844 if (cmdline_parse_etheraddr(NULL, port_end, 2845 &peer_addr, sizeof(peer_addr)) < 0) 2846 rte_exit(EXIT_FAILURE, 2847 "Invalid ethernet address: %s\n", 2848 port_end); 2849 dest = (uint8_t *)&dest_eth_addr[portid]; 2850 for (c = 0; c < 6; c++) 2851 dest[c] = peer_addr[c]; 2852 *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; 2853 } 2854 2855 #define CMD_LINE_OPT_RX_CONFIG "rx" 2856 #define CMD_LINE_OPT_TX_CONFIG "tx" 2857 #define CMD_LINE_OPT_STAT_LCORE "stat-lcore" 2858 #define CMD_LINE_OPT_ETH_DEST "eth-dest" 2859 #define CMD_LINE_OPT_NO_NUMA "no-numa" 2860 #define CMD_LINE_OPT_IPV6 "ipv6" 2861 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" 2862 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num" 2863 #define CMD_LINE_OPT_NO_LTHREADS "no-lthreads" 2864 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 2865 2866 /* Parse the argument given in the command line of the application */ 2867 static int 2868 parse_args(int argc, char **argv) 2869 { 2870 int opt, ret; 2871 char **argvopt; 2872 int option_index; 2873 char *prgname = argv[0]; 2874 static struct option lgopts[] = { 2875 {CMD_LINE_OPT_RX_CONFIG, 1, 0, 0}, 2876 {CMD_LINE_OPT_TX_CONFIG, 1, 0, 0}, 2877 {CMD_LINE_OPT_STAT_LCORE, 1, 0, 0}, 2878 {CMD_LINE_OPT_ETH_DEST, 1, 0, 0}, 2879 {CMD_LINE_OPT_NO_NUMA, 0, 0, 0}, 2880 {CMD_LINE_OPT_IPV6, 0, 0, 0}, 2881 {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, 2882 {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0}, 2883 {CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0}, 2884 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 2885 {NULL, 0, 0, 0} 2886 }; 2887 2888 argvopt = argv; 2889 2890 while ((opt = getopt_long(argc, argvopt, "p:P", 2891 lgopts, &option_index)) != EOF) { 2892 2893 switch (opt) { 2894 /* portmask */ 2895 case 'p': 2896 enabled_port_mask = parse_portmask(optarg); 2897 if (enabled_port_mask == 0) { 2898 printf("invalid portmask\n"); 2899 print_usage(prgname); 2900 return -1; 2901 } 2902 break; 2903 case 'P': 2904 printf("Promiscuous mode selected\n"); 2905 promiscuous_on = 1; 2906 break; 2907 2908 /* long options */ 2909 case 0: 2910 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG, 2911 sizeof(CMD_LINE_OPT_RX_CONFIG))) { 2912 ret = parse_rx_config(optarg); 2913 if (ret) { 2914 printf("invalid rx-config\n"); 2915 print_usage(prgname); 2916 return -1; 2917 } 2918 } 2919 2920 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG, 2921 sizeof(CMD_LINE_OPT_TX_CONFIG))) { 2922 ret = parse_tx_config(optarg); 2923 if (ret) { 2924 printf("invalid tx-config\n"); 2925 print_usage(prgname); 2926 return -1; 2927 } 2928 } 2929 2930 #if (APP_CPU_LOAD > 0) 2931 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE, 2932 sizeof(CMD_LINE_OPT_STAT_LCORE))) { 2933 cpu_load_lcore_id = parse_stat_lcore(optarg); 2934 } 2935 #endif 2936 2937 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST, 2938 sizeof(CMD_LINE_OPT_ETH_DEST))) 2939 parse_eth_dest(optarg); 2940 2941 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA, 2942 sizeof(CMD_LINE_OPT_NO_NUMA))) { 2943 printf("numa is disabled\n"); 2944 numa_on = 0; 2945 } 2946 2947 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2948 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6, 2949 sizeof(CMD_LINE_OPT_IPV6))) { 2950 printf("ipv6 is specified\n"); 2951 ipv6 = 1; 2952 } 2953 #endif 2954 2955 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS, 2956 sizeof(CMD_LINE_OPT_NO_LTHREADS))) { 2957 printf("l-threads model is disabled\n"); 2958 lthreads_on = 0; 2959 } 2960 2961 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_PARSE_PTYPE, 2962 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 2963 printf("software packet type parsing enabled\n"); 2964 parse_ptype_on = 1; 2965 } 2966 2967 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO, 2968 sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) { 2969 struct option lenopts = {"max-pkt-len", required_argument, 0, 2970 0}; 2971 2972 printf("jumbo frame is enabled - disabling simple TX path\n"); 2973 port_conf.rxmode.offloads |= 2974 DEV_RX_OFFLOAD_JUMBO_FRAME; 2975 port_conf.txmode.offloads |= 2976 DEV_TX_OFFLOAD_MULTI_SEGS; 2977 2978 /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ 2979 if (0 == getopt_long(argc, argvopt, "", &lenopts, 2980 &option_index)) { 2981 2982 ret = parse_max_pkt_len(optarg); 2983 if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) { 2984 printf("invalid packet length\n"); 2985 print_usage(prgname); 2986 return -1; 2987 } 2988 port_conf.rxmode.max_rx_pkt_len = ret; 2989 } 2990 printf("set jumbo frame max packet length to %u\n", 2991 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 2992 } 2993 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2994 if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM, 2995 sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) { 2996 ret = parse_hash_entry_number(optarg); 2997 if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) { 2998 hash_entry_number = ret; 2999 } else { 3000 printf("invalid hash entry number\n"); 3001 print_usage(prgname); 3002 return -1; 3003 } 3004 } 3005 #endif 3006 break; 3007 3008 default: 3009 print_usage(prgname); 3010 return -1; 3011 } 3012 } 3013 3014 if (optind >= 0) 3015 argv[optind-1] = prgname; 3016 3017 ret = optind-1; 3018 optind = 1; /* reset getopt lib */ 3019 return ret; 3020 } 3021 3022 static void 3023 print_ethaddr(const char *name, const struct ether_addr *eth_addr) 3024 { 3025 char buf[ETHER_ADDR_FMT_SIZE]; 3026 3027 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); 3028 printf("%s%s", name, buf); 3029 } 3030 3031 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 3032 3033 static void convert_ipv4_5tuple(struct ipv4_5tuple *key1, 3034 union ipv4_5tuple_host *key2) 3035 { 3036 key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst); 3037 key2->ip_src = rte_cpu_to_be_32(key1->ip_src); 3038 key2->port_dst = rte_cpu_to_be_16(key1->port_dst); 3039 key2->port_src = rte_cpu_to_be_16(key1->port_src); 3040 key2->proto = key1->proto; 3041 key2->pad0 = 0; 3042 key2->pad1 = 0; 3043 } 3044 3045 static void convert_ipv6_5tuple(struct ipv6_5tuple *key1, 3046 union ipv6_5tuple_host *key2) 3047 { 3048 uint32_t i; 3049 3050 for (i = 0; i < 16; i++) { 3051 key2->ip_dst[i] = key1->ip_dst[i]; 3052 key2->ip_src[i] = key1->ip_src[i]; 3053 } 3054 key2->port_dst = rte_cpu_to_be_16(key1->port_dst); 3055 key2->port_src = rte_cpu_to_be_16(key1->port_src); 3056 key2->proto = key1->proto; 3057 key2->pad0 = 0; 3058 key2->pad1 = 0; 3059 key2->reserve = 0; 3060 } 3061 3062 #define BYTE_VALUE_MAX 256 3063 #define ALL_32_BITS 0xffffffff 3064 #define BIT_8_TO_15 0x0000ff00 3065 static inline void 3066 populate_ipv4_few_flow_into_table(const struct rte_hash *h) 3067 { 3068 uint32_t i; 3069 int32_t ret; 3070 uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array); 3071 3072 mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); 3073 for (i = 0; i < array_len; i++) { 3074 struct ipv4_l3fwd_route entry; 3075 union ipv4_5tuple_host newkey; 3076 3077 entry = ipv4_l3fwd_route_array[i]; 3078 convert_ipv4_5tuple(&entry.key, &newkey); 3079 ret = rte_hash_add_key(h, (void *)&newkey); 3080 if (ret < 0) { 3081 rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 3082 " to the l3fwd hash.\n", i); 3083 } 3084 ipv4_l3fwd_out_if[ret] = entry.if_out; 3085 } 3086 printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len); 3087 } 3088 3089 #define BIT_16_TO_23 0x00ff0000 3090 static inline void 3091 populate_ipv6_few_flow_into_table(const struct rte_hash *h) 3092 { 3093 uint32_t i; 3094 int32_t ret; 3095 uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array); 3096 3097 mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); 3098 mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); 3099 for (i = 0; i < array_len; i++) { 3100 struct ipv6_l3fwd_route entry; 3101 union ipv6_5tuple_host newkey; 3102 3103 entry = ipv6_l3fwd_route_array[i]; 3104 convert_ipv6_5tuple(&entry.key, &newkey); 3105 ret = rte_hash_add_key(h, (void *)&newkey); 3106 if (ret < 0) { 3107 rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32 3108 " to the l3fwd hash.\n", i); 3109 } 3110 ipv6_l3fwd_out_if[ret] = entry.if_out; 3111 } 3112 printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len); 3113 } 3114 3115 #define NUMBER_PORT_USED 4 3116 static inline void 3117 populate_ipv4_many_flow_into_table(const struct rte_hash *h, 3118 unsigned int nr_flow) 3119 { 3120 unsigned i; 3121 3122 mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15); 3123 3124 for (i = 0; i < nr_flow; i++) { 3125 struct ipv4_l3fwd_route entry; 3126 union ipv4_5tuple_host newkey; 3127 uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); 3128 uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % 3129 BYTE_VALUE_MAX); 3130 uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * 3131 BYTE_VALUE_MAX)); 3132 /* Create the ipv4 exact match flow */ 3133 memset(&entry, 0, sizeof(entry)); 3134 switch (i & (NUMBER_PORT_USED - 1)) { 3135 case 0: 3136 entry = ipv4_l3fwd_route_array[0]; 3137 entry.key.ip_dst = IPv4(101, c, b, a); 3138 break; 3139 case 1: 3140 entry = ipv4_l3fwd_route_array[1]; 3141 entry.key.ip_dst = IPv4(201, c, b, a); 3142 break; 3143 case 2: 3144 entry = ipv4_l3fwd_route_array[2]; 3145 entry.key.ip_dst = IPv4(111, c, b, a); 3146 break; 3147 case 3: 3148 entry = ipv4_l3fwd_route_array[3]; 3149 entry.key.ip_dst = IPv4(211, c, b, a); 3150 break; 3151 }; 3152 convert_ipv4_5tuple(&entry.key, &newkey); 3153 int32_t ret = rte_hash_add_key(h, (void *)&newkey); 3154 3155 if (ret < 0) 3156 rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); 3157 3158 ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out; 3159 3160 } 3161 printf("Hash: Adding 0x%x keys\n", nr_flow); 3162 } 3163 3164 static inline void 3165 populate_ipv6_many_flow_into_table(const struct rte_hash *h, 3166 unsigned int nr_flow) 3167 { 3168 unsigned i; 3169 3170 mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23); 3171 mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); 3172 for (i = 0; i < nr_flow; i++) { 3173 struct ipv6_l3fwd_route entry; 3174 union ipv6_5tuple_host newkey; 3175 3176 uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX); 3177 uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) % 3178 BYTE_VALUE_MAX); 3179 uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX * 3180 BYTE_VALUE_MAX)); 3181 3182 /* Create the ipv6 exact match flow */ 3183 memset(&entry, 0, sizeof(entry)); 3184 switch (i & (NUMBER_PORT_USED - 1)) { 3185 case 0: 3186 entry = ipv6_l3fwd_route_array[0]; 3187 break; 3188 case 1: 3189 entry = ipv6_l3fwd_route_array[1]; 3190 break; 3191 case 2: 3192 entry = ipv6_l3fwd_route_array[2]; 3193 break; 3194 case 3: 3195 entry = ipv6_l3fwd_route_array[3]; 3196 break; 3197 }; 3198 entry.key.ip_dst[13] = c; 3199 entry.key.ip_dst[14] = b; 3200 entry.key.ip_dst[15] = a; 3201 convert_ipv6_5tuple(&entry.key, &newkey); 3202 int32_t ret = rte_hash_add_key(h, (void *)&newkey); 3203 3204 if (ret < 0) 3205 rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i); 3206 3207 ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out; 3208 3209 } 3210 printf("Hash: Adding 0x%x keys\n", nr_flow); 3211 } 3212 3213 static void 3214 setup_hash(int socketid) 3215 { 3216 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 3217 .name = NULL, 3218 .entries = L3FWD_HASH_ENTRIES, 3219 .key_len = sizeof(union ipv4_5tuple_host), 3220 .hash_func = ipv4_hash_crc, 3221 .hash_func_init_val = 0, 3222 }; 3223 3224 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 3225 .name = NULL, 3226 .entries = L3FWD_HASH_ENTRIES, 3227 .key_len = sizeof(union ipv6_5tuple_host), 3228 .hash_func = ipv6_hash_crc, 3229 .hash_func_init_val = 0, 3230 }; 3231 3232 char s[64]; 3233 3234 /* create ipv4 hash */ 3235 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 3236 ipv4_l3fwd_hash_params.name = s; 3237 ipv4_l3fwd_hash_params.socket_id = socketid; 3238 ipv4_l3fwd_lookup_struct[socketid] = 3239 rte_hash_create(&ipv4_l3fwd_hash_params); 3240 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 3241 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 3242 "socket %d\n", socketid); 3243 3244 /* create ipv6 hash */ 3245 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 3246 ipv6_l3fwd_hash_params.name = s; 3247 ipv6_l3fwd_hash_params.socket_id = socketid; 3248 ipv6_l3fwd_lookup_struct[socketid] = 3249 rte_hash_create(&ipv6_l3fwd_hash_params); 3250 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 3251 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 3252 "socket %d\n", socketid); 3253 3254 if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) { 3255 /* For testing hash matching with a large number of flows we 3256 * generate millions of IP 5-tuples with an incremented dst 3257 * address to initialize the hash table. */ 3258 if (ipv6 == 0) { 3259 /* populate the ipv4 hash */ 3260 populate_ipv4_many_flow_into_table( 3261 ipv4_l3fwd_lookup_struct[socketid], hash_entry_number); 3262 } else { 3263 /* populate the ipv6 hash */ 3264 populate_ipv6_many_flow_into_table( 3265 ipv6_l3fwd_lookup_struct[socketid], hash_entry_number); 3266 } 3267 } else { 3268 /* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize 3269 * the hash table */ 3270 if (ipv6 == 0) { 3271 /* populate the ipv4 hash */ 3272 populate_ipv4_few_flow_into_table( 3273 ipv4_l3fwd_lookup_struct[socketid]); 3274 } else { 3275 /* populate the ipv6 hash */ 3276 populate_ipv6_few_flow_into_table( 3277 ipv6_l3fwd_lookup_struct[socketid]); 3278 } 3279 } 3280 } 3281 #endif 3282 3283 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 3284 static void 3285 setup_lpm(int socketid) 3286 { 3287 struct rte_lpm6_config config; 3288 struct rte_lpm_config lpm_ipv4_config; 3289 unsigned i; 3290 int ret; 3291 char s[64]; 3292 3293 /* create the LPM table */ 3294 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 3295 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 3296 lpm_ipv4_config.number_tbl8s = 256; 3297 lpm_ipv4_config.flags = 0; 3298 ipv4_l3fwd_lookup_struct[socketid] = 3299 rte_lpm_create(s, socketid, &lpm_ipv4_config); 3300 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 3301 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 3302 " on socket %d\n", socketid); 3303 3304 /* populate the LPM table */ 3305 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 3306 3307 /* skip unused ports */ 3308 if ((1 << ipv4_l3fwd_route_array[i].if_out & 3309 enabled_port_mask) == 0) 3310 continue; 3311 3312 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 3313 ipv4_l3fwd_route_array[i].ip, 3314 ipv4_l3fwd_route_array[i].depth, 3315 ipv4_l3fwd_route_array[i].if_out); 3316 3317 if (ret < 0) { 3318 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 3319 "l3fwd LPM table on socket %d\n", 3320 i, socketid); 3321 } 3322 3323 printf("LPM: Adding route 0x%08x / %d (%d)\n", 3324 (unsigned)ipv4_l3fwd_route_array[i].ip, 3325 ipv4_l3fwd_route_array[i].depth, 3326 ipv4_l3fwd_route_array[i].if_out); 3327 } 3328 3329 /* create the LPM6 table */ 3330 snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); 3331 3332 config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; 3333 config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; 3334 config.flags = 0; 3335 ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid, 3336 &config); 3337 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 3338 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 3339 " on socket %d\n", socketid); 3340 3341 /* populate the LPM table */ 3342 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 3343 3344 /* skip unused ports */ 3345 if ((1 << ipv6_l3fwd_route_array[i].if_out & 3346 enabled_port_mask) == 0) 3347 continue; 3348 3349 ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid], 3350 ipv6_l3fwd_route_array[i].ip, 3351 ipv6_l3fwd_route_array[i].depth, 3352 ipv6_l3fwd_route_array[i].if_out); 3353 3354 if (ret < 0) { 3355 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 3356 "l3fwd LPM table on socket %d\n", 3357 i, socketid); 3358 } 3359 3360 printf("LPM: Adding route %s / %d (%d)\n", 3361 "IPV6", 3362 ipv6_l3fwd_route_array[i].depth, 3363 ipv6_l3fwd_route_array[i].if_out); 3364 } 3365 } 3366 #endif 3367 3368 static int 3369 init_mem(unsigned nb_mbuf) 3370 { 3371 struct lcore_conf *qconf; 3372 int socketid; 3373 unsigned lcore_id; 3374 char s[64]; 3375 3376 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 3377 if (rte_lcore_is_enabled(lcore_id) == 0) 3378 continue; 3379 3380 if (numa_on) 3381 socketid = rte_lcore_to_socket_id(lcore_id); 3382 else 3383 socketid = 0; 3384 3385 if (socketid >= NB_SOCKETS) { 3386 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n", 3387 socketid, lcore_id, NB_SOCKETS); 3388 } 3389 if (pktmbuf_pool[socketid] == NULL) { 3390 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 3391 pktmbuf_pool[socketid] = 3392 rte_pktmbuf_pool_create(s, nb_mbuf, 3393 MEMPOOL_CACHE_SIZE, 0, 3394 RTE_MBUF_DEFAULT_BUF_SIZE, socketid); 3395 if (pktmbuf_pool[socketid] == NULL) 3396 rte_exit(EXIT_FAILURE, 3397 "Cannot init mbuf pool on socket %d\n", socketid); 3398 else 3399 printf("Allocated mbuf pool on socket %d\n", socketid); 3400 3401 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 3402 setup_lpm(socketid); 3403 #else 3404 setup_hash(socketid); 3405 #endif 3406 } 3407 qconf = &lcore_conf[lcore_id]; 3408 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 3409 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 3410 } 3411 return 0; 3412 } 3413 3414 /* Check the link status of all ports in up to 9s, and print them finally */ 3415 static void 3416 check_all_ports_link_status(uint32_t port_mask) 3417 { 3418 #define CHECK_INTERVAL 100 /* 100ms */ 3419 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 3420 uint16_t portid; 3421 uint8_t count, all_ports_up, print_flag = 0; 3422 struct rte_eth_link link; 3423 3424 printf("\nChecking link status"); 3425 fflush(stdout); 3426 for (count = 0; count <= MAX_CHECK_TIME; count++) { 3427 all_ports_up = 1; 3428 RTE_ETH_FOREACH_DEV(portid) { 3429 if ((port_mask & (1 << portid)) == 0) 3430 continue; 3431 memset(&link, 0, sizeof(link)); 3432 rte_eth_link_get_nowait(portid, &link); 3433 /* print link status if flag set */ 3434 if (print_flag == 1) { 3435 if (link.link_status) 3436 printf( 3437 "Port%d Link Up. Speed %u Mbps - %s\n", 3438 portid, link.link_speed, 3439 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 3440 ("full-duplex") : ("half-duplex\n")); 3441 else 3442 printf("Port %d Link Down\n", portid); 3443 continue; 3444 } 3445 /* clear all_ports_up flag if any link down */ 3446 if (link.link_status == ETH_LINK_DOWN) { 3447 all_ports_up = 0; 3448 break; 3449 } 3450 } 3451 /* after finally printing all link status, get out */ 3452 if (print_flag == 1) 3453 break; 3454 3455 if (all_ports_up == 0) { 3456 printf("."); 3457 fflush(stdout); 3458 rte_delay_ms(CHECK_INTERVAL); 3459 } 3460 3461 /* set the print_flag if all ports up or timeout */ 3462 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 3463 print_flag = 1; 3464 printf("done\n"); 3465 } 3466 } 3467 } 3468 3469 int 3470 main(int argc, char **argv) 3471 { 3472 struct rte_eth_dev_info dev_info; 3473 struct rte_eth_txconf *txconf; 3474 int ret; 3475 int i; 3476 unsigned nb_ports; 3477 uint16_t queueid, portid; 3478 unsigned lcore_id; 3479 uint32_t n_tx_queue, nb_lcores; 3480 uint8_t nb_rx_queue, queue, socketid; 3481 3482 /* init EAL */ 3483 ret = rte_eal_init(argc, argv); 3484 if (ret < 0) 3485 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 3486 argc -= ret; 3487 argv += ret; 3488 3489 /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ 3490 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 3491 dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR + 3492 ((uint64_t)portid << 40); 3493 *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; 3494 } 3495 3496 /* parse application arguments (after the EAL ones) */ 3497 ret = parse_args(argc, argv); 3498 if (ret < 0) 3499 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 3500 3501 if (check_lcore_params() < 0) 3502 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 3503 3504 printf("Initializing rx-queues...\n"); 3505 ret = init_rx_queues(); 3506 if (ret < 0) 3507 rte_exit(EXIT_FAILURE, "init_rx_queues failed\n"); 3508 3509 printf("Initializing tx-threads...\n"); 3510 ret = init_tx_threads(); 3511 if (ret < 0) 3512 rte_exit(EXIT_FAILURE, "init_tx_threads failed\n"); 3513 3514 printf("Initializing rings...\n"); 3515 ret = init_rx_rings(); 3516 if (ret < 0) 3517 rte_exit(EXIT_FAILURE, "init_rx_rings failed\n"); 3518 3519 nb_ports = rte_eth_dev_count_avail(); 3520 3521 if (check_port_config() < 0) 3522 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 3523 3524 nb_lcores = rte_lcore_count(); 3525 3526 /* initialize all ports */ 3527 RTE_ETH_FOREACH_DEV(portid) { 3528 struct rte_eth_conf local_port_conf = port_conf; 3529 3530 /* skip ports that are not enabled */ 3531 if ((enabled_port_mask & (1 << portid)) == 0) { 3532 printf("\nSkipping disabled port %d\n", portid); 3533 continue; 3534 } 3535 3536 /* init port */ 3537 printf("Initializing port %d ... ", portid); 3538 fflush(stdout); 3539 3540 nb_rx_queue = get_port_n_rx_queues(portid); 3541 n_tx_queue = nb_lcores; 3542 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 3543 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 3544 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 3545 nb_rx_queue, (unsigned)n_tx_queue); 3546 rte_eth_dev_info_get(portid, &dev_info); 3547 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 3548 local_port_conf.txmode.offloads |= 3549 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 3550 3551 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 3552 dev_info.flow_type_rss_offloads; 3553 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 3554 port_conf.rx_adv_conf.rss_conf.rss_hf) { 3555 printf("Port %u modified RSS hash function based on hardware support," 3556 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 3557 portid, 3558 port_conf.rx_adv_conf.rss_conf.rss_hf, 3559 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 3560 } 3561 3562 ret = rte_eth_dev_configure(portid, nb_rx_queue, 3563 (uint16_t)n_tx_queue, &local_port_conf); 3564 if (ret < 0) 3565 rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", 3566 ret, portid); 3567 3568 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 3569 &nb_txd); 3570 if (ret < 0) 3571 rte_exit(EXIT_FAILURE, 3572 "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n", 3573 ret, portid); 3574 3575 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 3576 print_ethaddr(" Address:", &ports_eth_addr[portid]); 3577 printf(", "); 3578 print_ethaddr("Destination:", 3579 (const struct ether_addr *)&dest_eth_addr[portid]); 3580 printf(", "); 3581 3582 /* 3583 * prepare src MACs for each port. 3584 */ 3585 ether_addr_copy(&ports_eth_addr[portid], 3586 (struct ether_addr *)(val_eth + portid) + 1); 3587 3588 /* init memory */ 3589 ret = init_mem(NB_MBUF); 3590 if (ret < 0) 3591 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 3592 3593 /* init one TX queue per couple (lcore,port) */ 3594 queueid = 0; 3595 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 3596 if (rte_lcore_is_enabled(lcore_id) == 0) 3597 continue; 3598 3599 if (numa_on) 3600 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 3601 else 3602 socketid = 0; 3603 3604 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 3605 fflush(stdout); 3606 3607 txconf = &dev_info.default_txconf; 3608 txconf->offloads = local_port_conf.txmode.offloads; 3609 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 3610 socketid, txconf); 3611 if (ret < 0) 3612 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " 3613 "port=%d\n", ret, portid); 3614 3615 tx_thread[lcore_id].tx_queue_id[portid] = queueid; 3616 queueid++; 3617 } 3618 printf("\n"); 3619 } 3620 3621 for (i = 0; i < n_rx_thread; i++) { 3622 lcore_id = rx_thread[i].conf.lcore_id; 3623 3624 if (rte_lcore_is_enabled(lcore_id) == 0) { 3625 rte_exit(EXIT_FAILURE, 3626 "Cannot start Rx thread on lcore %u: lcore disabled\n", 3627 lcore_id 3628 ); 3629 } 3630 3631 printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ", 3632 i, lcore_id); 3633 fflush(stdout); 3634 3635 /* init RX queues */ 3636 for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) { 3637 struct rte_eth_dev *dev; 3638 struct rte_eth_conf *conf; 3639 struct rte_eth_rxconf rxq_conf; 3640 3641 portid = rx_thread[i].rx_queue_list[queue].port_id; 3642 queueid = rx_thread[i].rx_queue_list[queue].queue_id; 3643 dev = &rte_eth_devices[portid]; 3644 conf = &dev->data->dev_conf; 3645 3646 if (numa_on) 3647 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 3648 else 3649 socketid = 0; 3650 3651 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 3652 fflush(stdout); 3653 3654 rte_eth_dev_info_get(portid, &dev_info); 3655 rxq_conf = dev_info.default_rxconf; 3656 rxq_conf.offloads = conf->rxmode.offloads; 3657 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 3658 socketid, 3659 &rxq_conf, 3660 pktmbuf_pool[socketid]); 3661 if (ret < 0) 3662 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, " 3663 "port=%d\n", ret, portid); 3664 } 3665 } 3666 3667 printf("\n"); 3668 3669 /* start ports */ 3670 RTE_ETH_FOREACH_DEV(portid) { 3671 if ((enabled_port_mask & (1 << portid)) == 0) 3672 continue; 3673 3674 /* Start device */ 3675 ret = rte_eth_dev_start(portid); 3676 if (ret < 0) 3677 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 3678 ret, portid); 3679 3680 /* 3681 * If enabled, put device in promiscuous mode. 3682 * This allows IO forwarding mode to forward packets 3683 * to itself through 2 cross-connected ports of the 3684 * target machine. 3685 */ 3686 if (promiscuous_on) 3687 rte_eth_promiscuous_enable(portid); 3688 } 3689 3690 for (i = 0; i < n_rx_thread; i++) { 3691 lcore_id = rx_thread[i].conf.lcore_id; 3692 if (rte_lcore_is_enabled(lcore_id) == 0) 3693 continue; 3694 3695 /* check if hw packet type is supported */ 3696 for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) { 3697 portid = rx_thread[i].rx_queue_list[queue].port_id; 3698 queueid = rx_thread[i].rx_queue_list[queue].queue_id; 3699 3700 if (parse_ptype_on) { 3701 if (!rte_eth_add_rx_callback(portid, queueid, 3702 cb_parse_ptype, NULL)) 3703 rte_exit(EXIT_FAILURE, 3704 "Failed to add rx callback: " 3705 "port=%d\n", portid); 3706 } else if (!check_ptype(portid)) 3707 rte_exit(EXIT_FAILURE, 3708 "Port %d cannot parse packet type.\n\n" 3709 "Please add --parse-ptype to use sw " 3710 "packet type analyzer.\n\n", 3711 portid); 3712 } 3713 } 3714 3715 check_all_ports_link_status(enabled_port_mask); 3716 3717 if (lthreads_on) { 3718 printf("Starting L-Threading Model\n"); 3719 3720 #if (APP_CPU_LOAD > 0) 3721 if (cpu_load_lcore_id > 0) 3722 /* Use one lcore for cpu load collector */ 3723 nb_lcores--; 3724 #endif 3725 3726 lthread_num_schedulers_set(nb_lcores); 3727 rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MASTER); 3728 lthread_master_spawner(NULL); 3729 3730 } else { 3731 printf("Starting P-Threading Model\n"); 3732 /* launch per-lcore init on every lcore */ 3733 rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MASTER); 3734 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 3735 if (rte_eal_wait_lcore(lcore_id) < 0) 3736 return -1; 3737 } 3738 } 3739 3740 return 0; 3741 } 3742