1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define _GNU_SOURCE
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <stdint.h>
39 #include <inttypes.h>
40 #include <sys/types.h>
41 #include <string.h>
42 #include <sys/queue.h>
43 #include <stdarg.h>
44 #include <errno.h>
45 #include <getopt.h>
46 
47 #include <rte_common.h>
48 #include <rte_vect.h>
49 #include <rte_byteorder.h>
50 #include <rte_log.h>
51 #include <rte_memory.h>
52 #include <rte_memcpy.h>
53 #include <rte_eal.h>
54 #include <rte_launch.h>
55 #include <rte_atomic.h>
56 #include <rte_cycles.h>
57 #include <rte_prefetch.h>
58 #include <rte_lcore.h>
59 #include <rte_per_lcore.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_ring.h>
67 #include <rte_mempool.h>
68 #include <rte_mbuf.h>
69 #include <rte_ip.h>
70 #include <rte_tcp.h>
71 #include <rte_udp.h>
72 #include <rte_string_fns.h>
73 #include <rte_pause.h>
74 
75 #include <cmdline_parse.h>
76 #include <cmdline_parse_etheraddr.h>
77 
78 #include <lthread_api.h>
79 
80 #define APP_LOOKUP_EXACT_MATCH          0
81 #define APP_LOOKUP_LPM                  1
82 #define DO_RFC_1812_CHECKS
83 
84 /* Enable cpu-load stats 0-off, 1-on */
85 #define APP_CPU_LOAD                 1
86 
87 #ifndef APP_LOOKUP_METHOD
88 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
89 #endif
90 
91 #ifndef __GLIBC__ /* sched_getcpu() is glibc specific */
92 #define sched_getcpu() rte_lcore_id()
93 #endif
94 
95 static int
96 check_ptype(int portid)
97 {
98 	int i, ret;
99 	int ipv4 = 0, ipv6 = 0;
100 
101 	ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL,
102 			0);
103 	if (ret <= 0)
104 		return 0;
105 
106 	uint32_t ptypes[ret];
107 
108 	ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK,
109 			ptypes, ret);
110 	for (i = 0; i < ret; ++i) {
111 		if (ptypes[i] & RTE_PTYPE_L3_IPV4)
112 			ipv4 = 1;
113 		if (ptypes[i] & RTE_PTYPE_L3_IPV6)
114 			ipv6 = 1;
115 	}
116 
117 	if (ipv4 && ipv6)
118 		return 1;
119 
120 	return 0;
121 }
122 
123 static inline void
124 parse_ptype(struct rte_mbuf *m)
125 {
126 	struct ether_hdr *eth_hdr;
127 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
128 	uint16_t ether_type;
129 
130 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
131 	ether_type = eth_hdr->ether_type;
132 	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
133 		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
134 	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
135 		packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
136 
137 	m->packet_type = packet_type;
138 }
139 
140 static uint16_t
141 cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue,
142 		struct rte_mbuf *pkts[], uint16_t nb_pkts,
143 		__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
144 {
145 	unsigned int i;
146 
147 	for (i = 0; i < nb_pkts; i++)
148 		parse_ptype(pkts[i]);
149 
150 	return nb_pkts;
151 }
152 
153 /*
154  *  When set to zero, simple forwaring path is eanbled.
155  *  When set to one, optimized forwarding path is enabled.
156  *  Note that LPM optimisation path uses SSE4.1 instructions.
157  */
158 #define ENABLE_MULTI_BUFFER_OPTIMIZE	1
159 
160 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
161 #include <rte_hash.h>
162 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
163 #include <rte_lpm.h>
164 #include <rte_lpm6.h>
165 #else
166 #error "APP_LOOKUP_METHOD set to incorrect value"
167 #endif
168 
169 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
170 
171 #define MAX_JUMBO_PKT_LEN  9600
172 
173 #define IPV6_ADDR_LEN 16
174 
175 #define MEMPOOL_CACHE_SIZE 256
176 
177 /*
178  * This expression is used to calculate the number of mbufs needed depending on
179  * user input, taking into account memory for rx and tx hardware rings, cache
180  * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
181  * NB_MBUF never goes below a minimum value of 8192
182  */
183 
184 #define NB_MBUF RTE_MAX(\
185 		(nb_ports*nb_rx_queue*nb_rxd +      \
186 		nb_ports*nb_lcores*MAX_PKT_BURST +  \
187 		nb_ports*n_tx_queue*nb_txd +        \
188 		nb_lcores*MEMPOOL_CACHE_SIZE),      \
189 		(unsigned)8192)
190 
191 #define MAX_PKT_BURST     32
192 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
193 
194 /*
195  * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
196  */
197 #define	MAX_TX_BURST  (MAX_PKT_BURST / 2)
198 #define BURST_SIZE    MAX_TX_BURST
199 
200 #define NB_SOCKETS 8
201 
202 /* Configure how many packets ahead to prefetch, when reading packets */
203 #define PREFETCH_OFFSET	3
204 
205 /* Used to mark destination port as 'invalid'. */
206 #define	BAD_PORT	((uint16_t)-1)
207 
208 #define FWDSTEP	4
209 
210 /*
211  * Configurable number of RX/TX ring descriptors
212  */
213 #define RTE_TEST_RX_DESC_DEFAULT 128
214 #define RTE_TEST_TX_DESC_DEFAULT 128
215 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
216 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
217 
218 /* ethernet addresses of ports */
219 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
220 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
221 
222 static xmm_t val_eth[RTE_MAX_ETHPORTS];
223 
224 /* replace first 12B of the ethernet header. */
225 #define	MASK_ETH 0x3f
226 
227 /* mask of enabled ports */
228 static uint32_t enabled_port_mask;
229 static int promiscuous_on; /**< Set in promiscuous mode off by default. */
230 static int numa_on = 1;    /**< NUMA is enabled by default. */
231 static int parse_ptype_on;
232 
233 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
234 static int ipv6;           /**< ipv6 is false by default. */
235 #endif
236 
237 #if (APP_CPU_LOAD == 1)
238 
239 #define MAX_CPU RTE_MAX_LCORE
240 #define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000)  /**< Timeout for collecting 5s */
241 
242 #define CPU_PROCESS     0
243 #define CPU_POLL        1
244 #define MAX_CPU_COUNTER 2
245 
246 struct cpu_load {
247 	uint16_t       n_cpu;
248 	uint64_t       counter;
249 	uint64_t       hits[MAX_CPU_COUNTER][MAX_CPU];
250 } __rte_cache_aligned;
251 
252 static struct cpu_load cpu_load;
253 static int cpu_load_lcore_id = -1;
254 
255 #define SET_CPU_BUSY(thread, counter) \
256 		thread->conf.busy[counter] = 1
257 
258 #define SET_CPU_IDLE(thread, counter) \
259 		thread->conf.busy[counter] = 0
260 
261 #define IS_CPU_BUSY(thread, counter) \
262 		(thread->conf.busy[counter] > 0)
263 
264 #else
265 
266 #define SET_CPU_BUSY(thread, counter)
267 #define SET_CPU_IDLE(thread, counter)
268 #define IS_CPU_BUSY(thread, counter) 0
269 
270 #endif
271 
272 struct mbuf_table {
273 	uint16_t len;
274 	struct rte_mbuf *m_table[MAX_PKT_BURST];
275 };
276 
277 struct lcore_rx_queue {
278 	uint16_t port_id;
279 	uint8_t queue_id;
280 } __rte_cache_aligned;
281 
282 #define MAX_RX_QUEUE_PER_LCORE 16
283 #define MAX_TX_QUEUE_PER_PORT  RTE_MAX_ETHPORTS
284 #define MAX_RX_QUEUE_PER_PORT  128
285 
286 #define MAX_LCORE_PARAMS       1024
287 struct rx_thread_params {
288 	uint16_t port_id;
289 	uint8_t queue_id;
290 	uint8_t lcore_id;
291 	uint8_t thread_id;
292 } __rte_cache_aligned;
293 
294 static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS];
295 static struct rx_thread_params rx_thread_params_array_default[] = {
296 	{0, 0, 2, 0},
297 	{0, 1, 2, 1},
298 	{0, 2, 2, 2},
299 	{1, 0, 2, 3},
300 	{1, 1, 2, 4},
301 	{1, 2, 2, 5},
302 	{2, 0, 2, 6},
303 	{3, 0, 3, 7},
304 	{3, 1, 3, 8},
305 };
306 
307 static struct rx_thread_params *rx_thread_params =
308 		rx_thread_params_array_default;
309 static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default);
310 
311 struct tx_thread_params {
312 	uint8_t lcore_id;
313 	uint8_t thread_id;
314 } __rte_cache_aligned;
315 
316 static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS];
317 static struct tx_thread_params tx_thread_params_array_default[] = {
318 	{4, 0},
319 	{5, 1},
320 	{6, 2},
321 	{7, 3},
322 	{8, 4},
323 	{9, 5},
324 	{10, 6},
325 	{11, 7},
326 	{12, 8},
327 };
328 
329 static struct tx_thread_params *tx_thread_params =
330 		tx_thread_params_array_default;
331 static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default);
332 
333 static struct rte_eth_conf port_conf = {
334 	.rxmode = {
335 		.mq_mode = ETH_MQ_RX_RSS,
336 		.max_rx_pkt_len = ETHER_MAX_LEN,
337 		.split_hdr_size = 0,
338 		.header_split   = 0, /**< Header Split disabled */
339 		.hw_ip_checksum = 1, /**< IP checksum offload enabled */
340 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
341 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
342 		.hw_strip_crc   = 1, /**< CRC stripped by hardware */
343 	},
344 	.rx_adv_conf = {
345 		.rss_conf = {
346 			.rss_key = NULL,
347 			.rss_hf = ETH_RSS_TCP,
348 		},
349 	},
350 	.txmode = {
351 		.mq_mode = ETH_MQ_TX_NONE,
352 	},
353 };
354 
355 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
356 
357 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
358 
359 #include <rte_hash_crc.h>
360 #define DEFAULT_HASH_FUNC       rte_hash_crc
361 
362 struct ipv4_5tuple {
363 	uint32_t ip_dst;
364 	uint32_t ip_src;
365 	uint16_t port_dst;
366 	uint16_t port_src;
367 	uint8_t  proto;
368 } __attribute__((__packed__));
369 
370 union ipv4_5tuple_host {
371 	struct {
372 		uint8_t  pad0;
373 		uint8_t  proto;
374 		uint16_t pad1;
375 		uint32_t ip_src;
376 		uint32_t ip_dst;
377 		uint16_t port_src;
378 		uint16_t port_dst;
379 	};
380 	__m128i xmm;
381 };
382 
383 #define XMM_NUM_IN_IPV6_5TUPLE 3
384 
385 struct ipv6_5tuple {
386 	uint8_t  ip_dst[IPV6_ADDR_LEN];
387 	uint8_t  ip_src[IPV6_ADDR_LEN];
388 	uint16_t port_dst;
389 	uint16_t port_src;
390 	uint8_t  proto;
391 } __attribute__((__packed__));
392 
393 union ipv6_5tuple_host {
394 	struct {
395 		uint16_t pad0;
396 		uint8_t  proto;
397 		uint8_t  pad1;
398 		uint8_t  ip_src[IPV6_ADDR_LEN];
399 		uint8_t  ip_dst[IPV6_ADDR_LEN];
400 		uint16_t port_src;
401 		uint16_t port_dst;
402 		uint64_t reserve;
403 	};
404 	__m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
405 };
406 
407 struct ipv4_l3fwd_route {
408 	struct ipv4_5tuple key;
409 	uint8_t if_out;
410 };
411 
412 struct ipv6_l3fwd_route {
413 	struct ipv6_5tuple key;
414 	uint8_t if_out;
415 };
416 
417 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
418 	{{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1),  101, 11, IPPROTO_TCP}, 0},
419 	{{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1),  102, 12, IPPROTO_TCP}, 1},
420 	{{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1),  101, 11, IPPROTO_TCP}, 2},
421 	{{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1),  102, 12, IPPROTO_TCP}, 3},
422 };
423 
424 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
425 	{{
426 	{0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
427 	{0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
428 			0x05},
429 	101, 11, IPPROTO_TCP}, 0},
430 
431 	{{
432 	{0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
433 	{0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
434 			0x05},
435 	102, 12, IPPROTO_TCP}, 1},
436 
437 	{{
438 	{0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
439 	{0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
440 			0x05},
441 	101, 11, IPPROTO_TCP}, 2},
442 
443 	{{
444 	{0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
445 	{0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
446 			0x05},
447 	102, 12, IPPROTO_TCP}, 3},
448 };
449 
450 typedef struct rte_hash lookup_struct_t;
451 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
452 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
453 
454 #ifdef RTE_ARCH_X86_64
455 /* default to 4 million hash entries (approx) */
456 #define L3FWD_HASH_ENTRIES (1024*1024*4)
457 #else
458 /* 32-bit has less address-space for hugepage memory, limit to 1M entries */
459 #define L3FWD_HASH_ENTRIES (1024*1024*1)
460 #endif
461 #define HASH_ENTRY_NUMBER_DEFAULT 4
462 
463 static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
464 
465 static inline uint32_t
466 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
467 		uint32_t init_val)
468 {
469 	const union ipv4_5tuple_host *k;
470 	uint32_t t;
471 	const uint32_t *p;
472 
473 	k = data;
474 	t = k->proto;
475 	p = (const uint32_t *)&k->port_src;
476 
477 	init_val = rte_hash_crc_4byte(t, init_val);
478 	init_val = rte_hash_crc_4byte(k->ip_src, init_val);
479 	init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
480 	init_val = rte_hash_crc_4byte(*p, init_val);
481 	return init_val;
482 }
483 
484 static inline uint32_t
485 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
486 		uint32_t init_val)
487 {
488 	const union ipv6_5tuple_host *k;
489 	uint32_t t;
490 	const uint32_t *p;
491 	const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
492 	const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
493 
494 	k = data;
495 	t = k->proto;
496 	p = (const uint32_t *)&k->port_src;
497 
498 	ip_src0 = (const uint32_t *) k->ip_src;
499 	ip_src1 = (const uint32_t *)(k->ip_src + 4);
500 	ip_src2 = (const uint32_t *)(k->ip_src + 8);
501 	ip_src3 = (const uint32_t *)(k->ip_src + 12);
502 	ip_dst0 = (const uint32_t *) k->ip_dst;
503 	ip_dst1 = (const uint32_t *)(k->ip_dst + 4);
504 	ip_dst2 = (const uint32_t *)(k->ip_dst + 8);
505 	ip_dst3 = (const uint32_t *)(k->ip_dst + 12);
506 	init_val = rte_hash_crc_4byte(t, init_val);
507 	init_val = rte_hash_crc_4byte(*ip_src0, init_val);
508 	init_val = rte_hash_crc_4byte(*ip_src1, init_val);
509 	init_val = rte_hash_crc_4byte(*ip_src2, init_val);
510 	init_val = rte_hash_crc_4byte(*ip_src3, init_val);
511 	init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
512 	init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
513 	init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
514 	init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
515 	init_val = rte_hash_crc_4byte(*p, init_val);
516 	return init_val;
517 }
518 
519 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
520 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
521 
522 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
523 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
524 
525 #endif
526 
527 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
528 struct ipv4_l3fwd_route {
529 	uint32_t ip;
530 	uint8_t  depth;
531 	uint8_t  if_out;
532 };
533 
534 struct ipv6_l3fwd_route {
535 	uint8_t ip[16];
536 	uint8_t depth;
537 	uint8_t if_out;
538 };
539 
540 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
541 	{IPv4(1, 1, 1, 0), 24, 0},
542 	{IPv4(2, 1, 1, 0), 24, 1},
543 	{IPv4(3, 1, 1, 0), 24, 2},
544 	{IPv4(4, 1, 1, 0), 24, 3},
545 	{IPv4(5, 1, 1, 0), 24, 4},
546 	{IPv4(6, 1, 1, 0), 24, 5},
547 	{IPv4(7, 1, 1, 0), 24, 6},
548 	{IPv4(8, 1, 1, 0), 24, 7},
549 };
550 
551 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
552 	{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
553 	{{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
554 	{{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
555 	{{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
556 	{{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
557 	{{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
558 	{{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
559 	{{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
560 };
561 
562 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
563 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
564 
565 #define IPV4_L3FWD_LPM_MAX_RULES         1024
566 #define IPV6_L3FWD_LPM_MAX_RULES         1024
567 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
568 
569 typedef struct rte_lpm lookup_struct_t;
570 typedef struct rte_lpm6 lookup6_struct_t;
571 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
572 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
573 #endif
574 
575 struct lcore_conf {
576 	lookup_struct_t *ipv4_lookup_struct;
577 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
578 	lookup6_struct_t *ipv6_lookup_struct;
579 #else
580 	lookup_struct_t *ipv6_lookup_struct;
581 #endif
582 	void *data;
583 } __rte_cache_aligned;
584 
585 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
586 RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
587 
588 #define MAX_RX_QUEUE_PER_THREAD 16
589 #define MAX_TX_PORT_PER_THREAD  RTE_MAX_ETHPORTS
590 #define MAX_TX_QUEUE_PER_PORT   RTE_MAX_ETHPORTS
591 #define MAX_RX_QUEUE_PER_PORT   128
592 
593 #define MAX_RX_THREAD 1024
594 #define MAX_TX_THREAD 1024
595 #define MAX_THREAD    (MAX_RX_THREAD + MAX_TX_THREAD)
596 
597 /**
598  * Producers and consumers threads configuration
599  */
600 static int lthreads_on = 1; /**< Use lthreads for processing*/
601 
602 rte_atomic16_t rx_counter;  /**< Number of spawned rx threads */
603 rte_atomic16_t tx_counter;  /**< Number of spawned tx threads */
604 
605 struct thread_conf {
606 	uint16_t lcore_id;      /**< Initial lcore for rx thread */
607 	uint16_t cpu_id;        /**< Cpu id for cpu load stats counter */
608 	uint16_t thread_id;     /**< Thread ID */
609 
610 #if (APP_CPU_LOAD > 0)
611 	int busy[MAX_CPU_COUNTER];
612 #endif
613 };
614 
615 struct thread_rx_conf {
616 	struct thread_conf conf;
617 
618 	uint16_t n_rx_queue;
619 	struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
620 
621 	uint16_t n_ring;        /**< Number of output rings */
622 	struct rte_ring *ring[RTE_MAX_LCORE];
623 	struct lthread_cond *ready[RTE_MAX_LCORE];
624 
625 #if (APP_CPU_LOAD > 0)
626 	int busy[MAX_CPU_COUNTER];
627 #endif
628 } __rte_cache_aligned;
629 
630 uint16_t n_rx_thread;
631 struct thread_rx_conf rx_thread[MAX_RX_THREAD];
632 
633 struct thread_tx_conf {
634 	struct thread_conf conf;
635 
636 	uint16_t tx_queue_id[RTE_MAX_LCORE];
637 	struct mbuf_table tx_mbufs[RTE_MAX_LCORE];
638 
639 	struct rte_ring *ring;
640 	struct lthread_cond **ready;
641 
642 } __rte_cache_aligned;
643 
644 uint16_t n_tx_thread;
645 struct thread_tx_conf tx_thread[MAX_TX_THREAD];
646 
647 /* Send burst of packets on an output interface */
648 static inline int
649 send_burst(struct thread_tx_conf *qconf, uint16_t n, uint16_t port)
650 {
651 	struct rte_mbuf **m_table;
652 	int ret;
653 	uint16_t queueid;
654 
655 	queueid = qconf->tx_queue_id[port];
656 	m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
657 
658 	ret = rte_eth_tx_burst(port, queueid, m_table, n);
659 	if (unlikely(ret < n)) {
660 		do {
661 			rte_pktmbuf_free(m_table[ret]);
662 		} while (++ret < n);
663 	}
664 
665 	return 0;
666 }
667 
668 /* Enqueue a single packet, and send burst if queue is filled */
669 static inline int
670 send_single_packet(struct rte_mbuf *m, uint16_t port)
671 {
672 	uint16_t len;
673 	struct thread_tx_conf *qconf;
674 
675 	if (lthreads_on)
676 		qconf = (struct thread_tx_conf *)lthread_get_data();
677 	else
678 		qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
679 
680 	len = qconf->tx_mbufs[port].len;
681 	qconf->tx_mbufs[port].m_table[len] = m;
682 	len++;
683 
684 	/* enough pkts to be sent */
685 	if (unlikely(len == MAX_PKT_BURST)) {
686 		send_burst(qconf, MAX_PKT_BURST, port);
687 		len = 0;
688 	}
689 
690 	qconf->tx_mbufs[port].len = len;
691 	return 0;
692 }
693 
694 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
695 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
696 static __rte_always_inline void
697 send_packetsx4(uint16_t port,
698 	struct rte_mbuf *m[], uint32_t num)
699 {
700 	uint32_t len, j, n;
701 	struct thread_tx_conf *qconf;
702 
703 	if (lthreads_on)
704 		qconf = (struct thread_tx_conf *)lthread_get_data();
705 	else
706 		qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
707 
708 	len = qconf->tx_mbufs[port].len;
709 
710 	/*
711 	 * If TX buffer for that queue is empty, and we have enough packets,
712 	 * then send them straightway.
713 	 */
714 	if (num >= MAX_TX_BURST && len == 0) {
715 		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
716 		if (unlikely(n < num)) {
717 			do {
718 				rte_pktmbuf_free(m[n]);
719 			} while (++n < num);
720 		}
721 		return;
722 	}
723 
724 	/*
725 	 * Put packets into TX buffer for that queue.
726 	 */
727 
728 	n = len + num;
729 	n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
730 
731 	j = 0;
732 	switch (n % FWDSTEP) {
733 	while (j < n) {
734 	case 0:
735 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
736 		j++;
737 		/* fall-through */
738 	case 3:
739 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
740 		j++;
741 		/* fall-through */
742 	case 2:
743 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
744 		j++;
745 		/* fall-through */
746 	case 1:
747 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
748 		j++;
749 	}
750 	}
751 
752 	len += n;
753 
754 	/* enough pkts to be sent */
755 	if (unlikely(len == MAX_PKT_BURST)) {
756 
757 		send_burst(qconf, MAX_PKT_BURST, port);
758 
759 		/* copy rest of the packets into the TX buffer. */
760 		len = num - n;
761 		j = 0;
762 		switch (len % FWDSTEP) {
763 		while (j < len) {
764 		case 0:
765 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
766 			j++;
767 			/* fall-through */
768 		case 3:
769 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
770 			j++;
771 			/* fall-through */
772 		case 2:
773 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
774 			j++;
775 			/* fall-through */
776 		case 1:
777 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
778 			j++;
779 		}
780 		}
781 	}
782 
783 	qconf->tx_mbufs[port].len = len;
784 }
785 #endif /* APP_LOOKUP_LPM */
786 
787 #ifdef DO_RFC_1812_CHECKS
788 static inline int
789 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
790 {
791 	/* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
792 	/*
793 	 * 1. The packet length reported by the Link Layer must be large
794 	 * enough to hold the minimum length legal IP datagram (20 bytes).
795 	 */
796 	if (link_len < sizeof(struct ipv4_hdr))
797 		return -1;
798 
799 	/* 2. The IP checksum must be correct. */
800 	/* this is checked in H/W */
801 
802 	/*
803 	 * 3. The IP version number must be 4. If the version number is not 4
804 	 * then the packet may be another version of IP, such as IPng or
805 	 * ST-II.
806 	 */
807 	if (((pkt->version_ihl) >> 4) != 4)
808 		return -3;
809 	/*
810 	 * 4. The IP header length field must be large enough to hold the
811 	 * minimum length legal IP datagram (20 bytes = 5 words).
812 	 */
813 	if ((pkt->version_ihl & 0xf) < 5)
814 		return -4;
815 
816 	/*
817 	 * 5. The IP total length field must be large enough to hold the IP
818 	 * datagram header, whose length is specified in the IP header length
819 	 * field.
820 	 */
821 	if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
822 		return -5;
823 
824 	return 0;
825 }
826 #endif
827 
828 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
829 
830 static __m128i mask0;
831 static __m128i mask1;
832 static __m128i mask2;
833 static inline uint16_t
834 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
835 		lookup_struct_t *ipv4_l3fwd_lookup_struct)
836 {
837 	int ret = 0;
838 	union ipv4_5tuple_host key;
839 
840 	ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
841 	__m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr));
842 	/* Get 5 tuple: dst port, src port, dst IP address, src IP address and
843 	   protocol */
844 	key.xmm = _mm_and_si128(data, mask0);
845 	/* Find destination port */
846 	ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
847 	return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
848 }
849 
850 static inline uint16_t
851 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid,
852 		lookup_struct_t *ipv6_l3fwd_lookup_struct)
853 {
854 	int ret = 0;
855 	union ipv6_5tuple_host key;
856 
857 	ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
858 	__m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr));
859 	__m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
860 			sizeof(__m128i)));
861 	__m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
862 			sizeof(__m128i) + sizeof(__m128i)));
863 	/* Get part of 5 tuple: src IP address lower 96 bits and protocol */
864 	key.xmm[0] = _mm_and_si128(data0, mask1);
865 	/* Get part of 5 tuple: dst IP address lower 96 bits and src IP address
866 	   higher 32 bits */
867 	key.xmm[1] = data1;
868 	/* Get part of 5 tuple: dst port and src port and dst IP address higher
869 	   32 bits */
870 	key.xmm[2] = _mm_and_si128(data2, mask2);
871 
872 	/* Find destination port */
873 	ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
874 	return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
875 }
876 #endif
877 
878 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
879 
880 static inline uint16_t
881 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
882 		lookup_struct_t *ipv4_l3fwd_lookup_struct)
883 {
884 	uint32_t next_hop;
885 
886 	return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
887 		rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
888 		&next_hop) == 0) ? next_hop : portid);
889 }
890 
891 static inline uint16_t
892 get_ipv6_dst_port(void *ipv6_hdr,  uint16_t portid,
893 		lookup6_struct_t *ipv6_l3fwd_lookup_struct)
894 {
895 	uint32_t next_hop;
896 
897 	return ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
898 			((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ?
899 			next_hop : portid);
900 }
901 #endif
902 
903 static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
904 		__attribute__((unused));
905 
906 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
907 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
908 
909 #define MASK_ALL_PKTS   0xff
910 #define EXCLUDE_1ST_PKT 0xfe
911 #define EXCLUDE_2ND_PKT 0xfd
912 #define EXCLUDE_3RD_PKT 0xfb
913 #define EXCLUDE_4TH_PKT 0xf7
914 #define EXCLUDE_5TH_PKT 0xef
915 #define EXCLUDE_6TH_PKT 0xdf
916 #define EXCLUDE_7TH_PKT 0xbf
917 #define EXCLUDE_8TH_PKT 0x7f
918 
919 static inline void
920 simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
921 {
922 	struct ether_hdr *eth_hdr[8];
923 	struct ipv4_hdr *ipv4_hdr[8];
924 	uint16_t dst_port[8];
925 	int32_t ret[8];
926 	union ipv4_5tuple_host key[8];
927 	__m128i data[8];
928 
929 	eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
930 	eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
931 	eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
932 	eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
933 	eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
934 	eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
935 	eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
936 	eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
937 
938 	/* Handle IPv4 headers.*/
939 	ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *,
940 			sizeof(struct ether_hdr));
941 	ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *,
942 			sizeof(struct ether_hdr));
943 	ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *,
944 			sizeof(struct ether_hdr));
945 	ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
946 			sizeof(struct ether_hdr));
947 	ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
948 			sizeof(struct ether_hdr));
949 	ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
950 			sizeof(struct ether_hdr));
951 	ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
952 			sizeof(struct ether_hdr));
953 	ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
954 			sizeof(struct ether_hdr));
955 
956 #ifdef DO_RFC_1812_CHECKS
957 	/* Check to make sure the packet is valid (RFC1812) */
958 	uint8_t valid_mask = MASK_ALL_PKTS;
959 
960 	if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) {
961 		rte_pktmbuf_free(m[0]);
962 		valid_mask &= EXCLUDE_1ST_PKT;
963 	}
964 	if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) {
965 		rte_pktmbuf_free(m[1]);
966 		valid_mask &= EXCLUDE_2ND_PKT;
967 	}
968 	if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) {
969 		rte_pktmbuf_free(m[2]);
970 		valid_mask &= EXCLUDE_3RD_PKT;
971 	}
972 	if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) {
973 		rte_pktmbuf_free(m[3]);
974 		valid_mask &= EXCLUDE_4TH_PKT;
975 	}
976 	if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) {
977 		rte_pktmbuf_free(m[4]);
978 		valid_mask &= EXCLUDE_5TH_PKT;
979 	}
980 	if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) {
981 		rte_pktmbuf_free(m[5]);
982 		valid_mask &= EXCLUDE_6TH_PKT;
983 	}
984 	if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) {
985 		rte_pktmbuf_free(m[6]);
986 		valid_mask &= EXCLUDE_7TH_PKT;
987 	}
988 	if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) {
989 		rte_pktmbuf_free(m[7]);
990 		valid_mask &= EXCLUDE_8TH_PKT;
991 	}
992 	if (unlikely(valid_mask != MASK_ALL_PKTS)) {
993 		if (valid_mask == 0)
994 			return;
995 
996 		uint8_t i = 0;
997 
998 		for (i = 0; i < 8; i++)
999 			if ((0x1 << i) & valid_mask)
1000 				l3fwd_simple_forward(m[i], portid);
1001 	}
1002 #endif /* End of #ifdef DO_RFC_1812_CHECKS */
1003 
1004 	data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
1005 			sizeof(struct ether_hdr) +
1006 			offsetof(struct ipv4_hdr, time_to_live)));
1007 	data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
1008 			sizeof(struct ether_hdr) +
1009 			offsetof(struct ipv4_hdr, time_to_live)));
1010 	data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
1011 			sizeof(struct ether_hdr) +
1012 			offsetof(struct ipv4_hdr, time_to_live)));
1013 	data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
1014 			sizeof(struct ether_hdr) +
1015 			offsetof(struct ipv4_hdr, time_to_live)));
1016 	data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
1017 			sizeof(struct ether_hdr) +
1018 			offsetof(struct ipv4_hdr, time_to_live)));
1019 	data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
1020 			sizeof(struct ether_hdr) +
1021 			offsetof(struct ipv4_hdr, time_to_live)));
1022 	data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
1023 			sizeof(struct ether_hdr) +
1024 			offsetof(struct ipv4_hdr, time_to_live)));
1025 	data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
1026 			sizeof(struct ether_hdr) +
1027 			offsetof(struct ipv4_hdr, time_to_live)));
1028 
1029 	key[0].xmm = _mm_and_si128(data[0], mask0);
1030 	key[1].xmm = _mm_and_si128(data[1], mask0);
1031 	key[2].xmm = _mm_and_si128(data[2], mask0);
1032 	key[3].xmm = _mm_and_si128(data[3], mask0);
1033 	key[4].xmm = _mm_and_si128(data[4], mask0);
1034 	key[5].xmm = _mm_and_si128(data[5], mask0);
1035 	key[6].xmm = _mm_and_si128(data[6], mask0);
1036 	key[7].xmm = _mm_and_si128(data[7], mask0);
1037 
1038 	const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
1039 			&key[4], &key[5], &key[6], &key[7]};
1040 
1041 	rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
1042 			&key_array[0], 8, ret);
1043 	dst_port[0] = ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
1044 	dst_port[1] = ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
1045 	dst_port[2] = ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
1046 	dst_port[3] = ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
1047 	dst_port[4] = ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
1048 	dst_port[5] = ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
1049 	dst_port[6] = ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
1050 	dst_port[7] = ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
1051 
1052 	if (dst_port[0] >= RTE_MAX_ETHPORTS ||
1053 			(enabled_port_mask & 1 << dst_port[0]) == 0)
1054 		dst_port[0] = portid;
1055 	if (dst_port[1] >= RTE_MAX_ETHPORTS ||
1056 			(enabled_port_mask & 1 << dst_port[1]) == 0)
1057 		dst_port[1] = portid;
1058 	if (dst_port[2] >= RTE_MAX_ETHPORTS ||
1059 			(enabled_port_mask & 1 << dst_port[2]) == 0)
1060 		dst_port[2] = portid;
1061 	if (dst_port[3] >= RTE_MAX_ETHPORTS ||
1062 			(enabled_port_mask & 1 << dst_port[3]) == 0)
1063 		dst_port[3] = portid;
1064 	if (dst_port[4] >= RTE_MAX_ETHPORTS ||
1065 			(enabled_port_mask & 1 << dst_port[4]) == 0)
1066 		dst_port[4] = portid;
1067 	if (dst_port[5] >= RTE_MAX_ETHPORTS ||
1068 			(enabled_port_mask & 1 << dst_port[5]) == 0)
1069 		dst_port[5] = portid;
1070 	if (dst_port[6] >= RTE_MAX_ETHPORTS ||
1071 			(enabled_port_mask & 1 << dst_port[6]) == 0)
1072 		dst_port[6] = portid;
1073 	if (dst_port[7] >= RTE_MAX_ETHPORTS ||
1074 			(enabled_port_mask & 1 << dst_port[7]) == 0)
1075 		dst_port[7] = portid;
1076 
1077 #ifdef DO_RFC_1812_CHECKS
1078 	/* Update time to live and header checksum */
1079 	--(ipv4_hdr[0]->time_to_live);
1080 	--(ipv4_hdr[1]->time_to_live);
1081 	--(ipv4_hdr[2]->time_to_live);
1082 	--(ipv4_hdr[3]->time_to_live);
1083 	++(ipv4_hdr[0]->hdr_checksum);
1084 	++(ipv4_hdr[1]->hdr_checksum);
1085 	++(ipv4_hdr[2]->hdr_checksum);
1086 	++(ipv4_hdr[3]->hdr_checksum);
1087 	--(ipv4_hdr[4]->time_to_live);
1088 	--(ipv4_hdr[5]->time_to_live);
1089 	--(ipv4_hdr[6]->time_to_live);
1090 	--(ipv4_hdr[7]->time_to_live);
1091 	++(ipv4_hdr[4]->hdr_checksum);
1092 	++(ipv4_hdr[5]->hdr_checksum);
1093 	++(ipv4_hdr[6]->hdr_checksum);
1094 	++(ipv4_hdr[7]->hdr_checksum);
1095 #endif
1096 
1097 	/* dst addr */
1098 	*(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
1099 	*(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
1100 	*(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
1101 	*(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
1102 	*(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
1103 	*(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
1104 	*(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
1105 	*(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
1106 
1107 	/* src addr */
1108 	ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
1109 	ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
1110 	ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
1111 	ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
1112 	ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
1113 	ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
1114 	ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
1115 	ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
1116 
1117 	send_single_packet(m[0], (uint8_t)dst_port[0]);
1118 	send_single_packet(m[1], (uint8_t)dst_port[1]);
1119 	send_single_packet(m[2], (uint8_t)dst_port[2]);
1120 	send_single_packet(m[3], (uint8_t)dst_port[3]);
1121 	send_single_packet(m[4], (uint8_t)dst_port[4]);
1122 	send_single_packet(m[5], (uint8_t)dst_port[5]);
1123 	send_single_packet(m[6], (uint8_t)dst_port[6]);
1124 	send_single_packet(m[7], (uint8_t)dst_port[7]);
1125 
1126 }
1127 
1128 static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
1129 		__m128i mask1, union ipv6_5tuple_host *key)
1130 {
1131 	__m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1132 			__m128i *, sizeof(struct ether_hdr) +
1133 			offsetof(struct ipv6_hdr, payload_len)));
1134 	__m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1135 			__m128i *, sizeof(struct ether_hdr) +
1136 			offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i)));
1137 	__m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1138 			__m128i *, sizeof(struct ether_hdr) +
1139 			offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) +
1140 			sizeof(__m128i)));
1141 	key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
1142 	key->xmm[1] = tmpdata1;
1143 	key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
1144 }
1145 
1146 static inline void
1147 simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
1148 {
1149 	int32_t ret[8];
1150 	uint16_t dst_port[8];
1151 	struct ether_hdr *eth_hdr[8];
1152 	union ipv6_5tuple_host key[8];
1153 
1154 	__attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
1155 
1156 	eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
1157 	eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
1158 	eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
1159 	eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
1160 	eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
1161 	eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
1162 	eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
1163 	eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
1164 
1165 	/* Handle IPv6 headers.*/
1166 	ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *,
1167 			sizeof(struct ether_hdr));
1168 	ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *,
1169 			sizeof(struct ether_hdr));
1170 	ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *,
1171 			sizeof(struct ether_hdr));
1172 	ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
1173 			sizeof(struct ether_hdr));
1174 	ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
1175 			sizeof(struct ether_hdr));
1176 	ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
1177 			sizeof(struct ether_hdr));
1178 	ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
1179 			sizeof(struct ether_hdr));
1180 	ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
1181 			sizeof(struct ether_hdr));
1182 
1183 	get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
1184 	get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
1185 	get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
1186 	get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
1187 	get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
1188 	get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
1189 	get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
1190 	get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
1191 
1192 	const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
1193 			&key[4], &key[5], &key[6], &key[7]};
1194 
1195 	rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
1196 			&key_array[0], 4, ret);
1197 	dst_port[0] = ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]);
1198 	dst_port[1] = ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]);
1199 	dst_port[2] = ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]);
1200 	dst_port[3] = ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]);
1201 	dst_port[4] = ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]);
1202 	dst_port[5] = ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]);
1203 	dst_port[6] = ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]);
1204 	dst_port[7] = ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]);
1205 
1206 	if (dst_port[0] >= RTE_MAX_ETHPORTS ||
1207 			(enabled_port_mask & 1 << dst_port[0]) == 0)
1208 		dst_port[0] = portid;
1209 	if (dst_port[1] >= RTE_MAX_ETHPORTS ||
1210 			(enabled_port_mask & 1 << dst_port[1]) == 0)
1211 		dst_port[1] = portid;
1212 	if (dst_port[2] >= RTE_MAX_ETHPORTS ||
1213 			(enabled_port_mask & 1 << dst_port[2]) == 0)
1214 		dst_port[2] = portid;
1215 	if (dst_port[3] >= RTE_MAX_ETHPORTS ||
1216 			(enabled_port_mask & 1 << dst_port[3]) == 0)
1217 		dst_port[3] = portid;
1218 	if (dst_port[4] >= RTE_MAX_ETHPORTS ||
1219 			(enabled_port_mask & 1 << dst_port[4]) == 0)
1220 		dst_port[4] = portid;
1221 	if (dst_port[5] >= RTE_MAX_ETHPORTS ||
1222 			(enabled_port_mask & 1 << dst_port[5]) == 0)
1223 		dst_port[5] = portid;
1224 	if (dst_port[6] >= RTE_MAX_ETHPORTS ||
1225 			(enabled_port_mask & 1 << dst_port[6]) == 0)
1226 		dst_port[6] = portid;
1227 	if (dst_port[7] >= RTE_MAX_ETHPORTS ||
1228 			(enabled_port_mask & 1 << dst_port[7]) == 0)
1229 		dst_port[7] = portid;
1230 
1231 	/* dst addr */
1232 	*(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
1233 	*(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
1234 	*(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
1235 	*(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
1236 	*(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
1237 	*(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
1238 	*(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
1239 	*(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
1240 
1241 	/* src addr */
1242 	ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
1243 	ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
1244 	ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
1245 	ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
1246 	ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
1247 	ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
1248 	ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
1249 	ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
1250 
1251 	send_single_packet(m[0], dst_port[0]);
1252 	send_single_packet(m[1], dst_port[1]);
1253 	send_single_packet(m[2], dst_port[2]);
1254 	send_single_packet(m[3], dst_port[3]);
1255 	send_single_packet(m[4], dst_port[4]);
1256 	send_single_packet(m[5], dst_port[5]);
1257 	send_single_packet(m[6], dst_port[6]);
1258 	send_single_packet(m[7], dst_port[7]);
1259 
1260 }
1261 #endif /* APP_LOOKUP_METHOD */
1262 
1263 static __rte_always_inline void
1264 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
1265 {
1266 	struct ether_hdr *eth_hdr;
1267 	struct ipv4_hdr *ipv4_hdr;
1268 	uint16_t dst_port;
1269 
1270 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
1271 
1272 	if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
1273 		/* Handle IPv4 headers.*/
1274 		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
1275 				sizeof(struct ether_hdr));
1276 
1277 #ifdef DO_RFC_1812_CHECKS
1278 		/* Check to make sure the packet is valid (RFC1812) */
1279 		if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
1280 			rte_pktmbuf_free(m);
1281 			return;
1282 		}
1283 #endif
1284 
1285 		 dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
1286 			RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct);
1287 		if (dst_port >= RTE_MAX_ETHPORTS ||
1288 				(enabled_port_mask & 1 << dst_port) == 0)
1289 			dst_port = portid;
1290 
1291 #ifdef DO_RFC_1812_CHECKS
1292 		/* Update time to live and header checksum */
1293 		--(ipv4_hdr->time_to_live);
1294 		++(ipv4_hdr->hdr_checksum);
1295 #endif
1296 		/* dst addr */
1297 		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
1298 
1299 		/* src addr */
1300 		ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
1301 
1302 		send_single_packet(m, dst_port);
1303 	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
1304 		/* Handle IPv6 headers.*/
1305 		struct ipv6_hdr *ipv6_hdr;
1306 
1307 		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
1308 				sizeof(struct ether_hdr));
1309 
1310 		dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
1311 				RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct);
1312 
1313 		if (dst_port >= RTE_MAX_ETHPORTS ||
1314 				(enabled_port_mask & 1 << dst_port) == 0)
1315 			dst_port = portid;
1316 
1317 		/* dst addr */
1318 		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
1319 
1320 		/* src addr */
1321 		ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
1322 
1323 		send_single_packet(m, dst_port);
1324 	} else
1325 		/* Free the mbuf that contains non-IPV4/IPV6 packet */
1326 		rte_pktmbuf_free(m);
1327 }
1328 
1329 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1330 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1331 #ifdef DO_RFC_1812_CHECKS
1332 
1333 #define	IPV4_MIN_VER_IHL	0x45
1334 #define	IPV4_MAX_VER_IHL	0x4f
1335 #define	IPV4_MAX_VER_IHL_DIFF	(IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
1336 
1337 /* Minimum value of IPV4 total length (20B) in network byte order. */
1338 #define	IPV4_MIN_LEN_BE	(sizeof(struct ipv4_hdr) << 8)
1339 
1340 /*
1341  * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
1342  * - The IP version number must be 4.
1343  * - The IP header length field must be large enough to hold the
1344  *    minimum length legal IP datagram (20 bytes = 5 words).
1345  * - The IP total length field must be large enough to hold the IP
1346  *   datagram header, whose length is specified in the IP header length
1347  *   field.
1348  * If we encounter invalid IPV4 packet, then set destination port for it
1349  * to BAD_PORT value.
1350  */
1351 static __rte_always_inline void
1352 rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
1353 {
1354 	uint8_t ihl;
1355 
1356 	if (RTE_ETH_IS_IPV4_HDR(ptype)) {
1357 		ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
1358 
1359 		ipv4_hdr->time_to_live--;
1360 		ipv4_hdr->hdr_checksum++;
1361 
1362 		if (ihl > IPV4_MAX_VER_IHL_DIFF ||
1363 				((uint8_t)ipv4_hdr->total_length == 0 &&
1364 				ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) {
1365 			dp[0] = BAD_PORT;
1366 		}
1367 	}
1368 }
1369 
1370 #else
1371 #define	rfc1812_process(mb, dp, ptype)	do { } while (0)
1372 #endif /* DO_RFC_1812_CHECKS */
1373 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */
1374 
1375 
1376 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1377 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1378 
1379 static __rte_always_inline uint16_t
1380 get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint16_t portid)
1381 {
1382 	uint32_t next_hop;
1383 	struct ipv6_hdr *ipv6_hdr;
1384 	struct ether_hdr *eth_hdr;
1385 
1386 	if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
1387 		return (uint16_t) ((rte_lpm_lookup(
1388 				RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dst_ipv4,
1389 				&next_hop) == 0) ? next_hop : portid);
1390 
1391 	} else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
1392 
1393 		eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
1394 		ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
1395 
1396 		return (uint16_t) ((rte_lpm6_lookup(
1397 				RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
1398 				ipv6_hdr->dst_addr, &next_hop) == 0) ?
1399 				next_hop : portid);
1400 
1401 	}
1402 
1403 	return portid;
1404 }
1405 
1406 static inline void
1407 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint16_t portid)
1408 {
1409 	struct ether_hdr *eth_hdr;
1410 	struct ipv4_hdr *ipv4_hdr;
1411 	uint32_t dst_ipv4;
1412 	uint16_t dp;
1413 	__m128i te, ve;
1414 
1415 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
1416 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1417 
1418 	dst_ipv4 = ipv4_hdr->dst_addr;
1419 	dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);
1420 	dp = get_dst_port(pkt, dst_ipv4, portid);
1421 
1422 	te = _mm_load_si128((__m128i *)eth_hdr);
1423 	ve = val_eth[dp];
1424 
1425 	dst_port[0] = dp;
1426 	rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type);
1427 
1428 	te =  _mm_blend_epi16(te, ve, MASK_ETH);
1429 	_mm_store_si128((__m128i *)eth_hdr, te);
1430 }
1431 
1432 /*
1433  * Read packet_type and destination IPV4 addresses from 4 mbufs.
1434  */
1435 static inline void
1436 processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
1437 		__m128i *dip,
1438 		uint32_t *ipv4_flag)
1439 {
1440 	struct ipv4_hdr *ipv4_hdr;
1441 	struct ether_hdr *eth_hdr;
1442 	uint32_t x0, x1, x2, x3;
1443 
1444 	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
1445 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1446 	x0 = ipv4_hdr->dst_addr;
1447 	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
1448 
1449 	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
1450 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1451 	x1 = ipv4_hdr->dst_addr;
1452 	ipv4_flag[0] &= pkt[1]->packet_type;
1453 
1454 	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
1455 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1456 	x2 = ipv4_hdr->dst_addr;
1457 	ipv4_flag[0] &= pkt[2]->packet_type;
1458 
1459 	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
1460 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1461 	x3 = ipv4_hdr->dst_addr;
1462 	ipv4_flag[0] &= pkt[3]->packet_type;
1463 
1464 	dip[0] = _mm_set_epi32(x3, x2, x1, x0);
1465 }
1466 
1467 /*
1468  * Lookup into LPM for destination port.
1469  * If lookup fails, use incoming port (portid) as destination port.
1470  */
1471 static inline void
1472 processx4_step2(__m128i dip,
1473 		uint32_t ipv4_flag,
1474 		uint16_t portid,
1475 		struct rte_mbuf *pkt[FWDSTEP],
1476 		uint16_t dprt[FWDSTEP])
1477 {
1478 	rte_xmm_t dst;
1479 	const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
1480 			4, 5, 6, 7, 0, 1, 2, 3);
1481 
1482 	/* Byte swap 4 IPV4 addresses. */
1483 	dip = _mm_shuffle_epi8(dip, bswap_mask);
1484 
1485 	/* if all 4 packets are IPV4. */
1486 	if (likely(ipv4_flag)) {
1487 		rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip,
1488 				dst.u32, portid);
1489 
1490 		/* get rid of unused upper 16 bit for each dport. */
1491 		dst.x = _mm_packs_epi32(dst.x, dst.x);
1492 		*(uint64_t *)dprt = dst.u64[0];
1493 	} else {
1494 		dst.x = dip;
1495 		dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
1496 		dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid);
1497 		dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid);
1498 		dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid);
1499 	}
1500 }
1501 
1502 /*
1503  * Update source and destination MAC addresses in the ethernet header.
1504  * Perform RFC1812 checks and updates for IPV4 packets.
1505  */
1506 static inline void
1507 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
1508 {
1509 	__m128i te[FWDSTEP];
1510 	__m128i ve[FWDSTEP];
1511 	__m128i *p[FWDSTEP];
1512 
1513 	p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *);
1514 	p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *);
1515 	p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *);
1516 	p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *);
1517 
1518 	ve[0] = val_eth[dst_port[0]];
1519 	te[0] = _mm_load_si128(p[0]);
1520 
1521 	ve[1] = val_eth[dst_port[1]];
1522 	te[1] = _mm_load_si128(p[1]);
1523 
1524 	ve[2] = val_eth[dst_port[2]];
1525 	te[2] = _mm_load_si128(p[2]);
1526 
1527 	ve[3] = val_eth[dst_port[3]];
1528 	te[3] = _mm_load_si128(p[3]);
1529 
1530 	/* Update first 12 bytes, keep rest bytes intact. */
1531 	te[0] =  _mm_blend_epi16(te[0], ve[0], MASK_ETH);
1532 	te[1] =  _mm_blend_epi16(te[1], ve[1], MASK_ETH);
1533 	te[2] =  _mm_blend_epi16(te[2], ve[2], MASK_ETH);
1534 	te[3] =  _mm_blend_epi16(te[3], ve[3], MASK_ETH);
1535 
1536 	_mm_store_si128(p[0], te[0]);
1537 	_mm_store_si128(p[1], te[1]);
1538 	_mm_store_si128(p[2], te[2]);
1539 	_mm_store_si128(p[3], te[3]);
1540 
1541 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
1542 			&dst_port[0], pkt[0]->packet_type);
1543 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
1544 			&dst_port[1], pkt[1]->packet_type);
1545 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
1546 			&dst_port[2], pkt[2]->packet_type);
1547 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
1548 			&dst_port[3], pkt[3]->packet_type);
1549 }
1550 
1551 /*
1552  * We group consecutive packets with the same destionation port into one burst.
1553  * To avoid extra latency this is done together with some other packet
1554  * processing, but after we made a final decision about packet's destination.
1555  * To do this we maintain:
1556  * pnum - array of number of consecutive packets with the same dest port for
1557  * each packet in the input burst.
1558  * lp - pointer to the last updated element in the pnum.
1559  * dlp - dest port value lp corresponds to.
1560  */
1561 
1562 #define	GRPSZ	(1 << FWDSTEP)
1563 #define	GRPMSK	(GRPSZ - 1)
1564 
1565 #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx)	do { \
1566 	if (likely((dlp) == (dcp)[(idx)])) {         \
1567 		(lp)[0]++;                           \
1568 	} else {                                     \
1569 		(dlp) = (dcp)[idx];                  \
1570 		(lp) = (pn) + (idx);                 \
1571 		(lp)[0] = 1;                         \
1572 	}                                            \
1573 } while (0)
1574 
1575 /*
1576  * Group consecutive packets with the same destination port in bursts of 4.
1577  * Suppose we have array of destionation ports:
1578  * dst_port[] = {a, b, c, d,, e, ... }
1579  * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
1580  * We doing 4 comparisons at once and the result is 4 bit mask.
1581  * This mask is used as an index into prebuild array of pnum values.
1582  */
1583 static inline uint16_t *
1584 port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
1585 {
1586 	static const struct {
1587 		uint64_t pnum; /* prebuild 4 values for pnum[]. */
1588 		int32_t  idx;  /* index for new last updated elemnet. */
1589 		uint16_t lpv;  /* add value to the last updated element. */
1590 	} gptbl[GRPSZ] = {
1591 	{
1592 		/* 0: a != b, b != c, c != d, d != e */
1593 		.pnum = UINT64_C(0x0001000100010001),
1594 		.idx = 4,
1595 		.lpv = 0,
1596 	},
1597 	{
1598 		/* 1: a == b, b != c, c != d, d != e */
1599 		.pnum = UINT64_C(0x0001000100010002),
1600 		.idx = 4,
1601 		.lpv = 1,
1602 	},
1603 	{
1604 		/* 2: a != b, b == c, c != d, d != e */
1605 		.pnum = UINT64_C(0x0001000100020001),
1606 		.idx = 4,
1607 		.lpv = 0,
1608 	},
1609 	{
1610 		/* 3: a == b, b == c, c != d, d != e */
1611 		.pnum = UINT64_C(0x0001000100020003),
1612 		.idx = 4,
1613 		.lpv = 2,
1614 	},
1615 	{
1616 		/* 4: a != b, b != c, c == d, d != e */
1617 		.pnum = UINT64_C(0x0001000200010001),
1618 		.idx = 4,
1619 		.lpv = 0,
1620 	},
1621 	{
1622 		/* 5: a == b, b != c, c == d, d != e */
1623 		.pnum = UINT64_C(0x0001000200010002),
1624 		.idx = 4,
1625 		.lpv = 1,
1626 	},
1627 	{
1628 		/* 6: a != b, b == c, c == d, d != e */
1629 		.pnum = UINT64_C(0x0001000200030001),
1630 		.idx = 4,
1631 		.lpv = 0,
1632 	},
1633 	{
1634 		/* 7: a == b, b == c, c == d, d != e */
1635 		.pnum = UINT64_C(0x0001000200030004),
1636 		.idx = 4,
1637 		.lpv = 3,
1638 	},
1639 	{
1640 		/* 8: a != b, b != c, c != d, d == e */
1641 		.pnum = UINT64_C(0x0002000100010001),
1642 		.idx = 3,
1643 		.lpv = 0,
1644 	},
1645 	{
1646 		/* 9: a == b, b != c, c != d, d == e */
1647 		.pnum = UINT64_C(0x0002000100010002),
1648 		.idx = 3,
1649 		.lpv = 1,
1650 	},
1651 	{
1652 		/* 0xa: a != b, b == c, c != d, d == e */
1653 		.pnum = UINT64_C(0x0002000100020001),
1654 		.idx = 3,
1655 		.lpv = 0,
1656 	},
1657 	{
1658 		/* 0xb: a == b, b == c, c != d, d == e */
1659 		.pnum = UINT64_C(0x0002000100020003),
1660 		.idx = 3,
1661 		.lpv = 2,
1662 	},
1663 	{
1664 		/* 0xc: a != b, b != c, c == d, d == e */
1665 		.pnum = UINT64_C(0x0002000300010001),
1666 		.idx = 2,
1667 		.lpv = 0,
1668 	},
1669 	{
1670 		/* 0xd: a == b, b != c, c == d, d == e */
1671 		.pnum = UINT64_C(0x0002000300010002),
1672 		.idx = 2,
1673 		.lpv = 1,
1674 	},
1675 	{
1676 		/* 0xe: a != b, b == c, c == d, d == e */
1677 		.pnum = UINT64_C(0x0002000300040001),
1678 		.idx = 1,
1679 		.lpv = 0,
1680 	},
1681 	{
1682 		/* 0xf: a == b, b == c, c == d, d == e */
1683 		.pnum = UINT64_C(0x0002000300040005),
1684 		.idx = 0,
1685 		.lpv = 4,
1686 	},
1687 	};
1688 
1689 	union {
1690 		uint16_t u16[FWDSTEP + 1];
1691 		uint64_t u64;
1692 	} *pnum = (void *)pn;
1693 
1694 	int32_t v;
1695 
1696 	dp1 = _mm_cmpeq_epi16(dp1, dp2);
1697 	dp1 = _mm_unpacklo_epi16(dp1, dp1);
1698 	v = _mm_movemask_ps((__m128)dp1);
1699 
1700 	/* update last port counter. */
1701 	lp[0] += gptbl[v].lpv;
1702 
1703 	/* if dest port value has changed. */
1704 	if (v != GRPMSK) {
1705 		pnum->u64 = gptbl[v].pnum;
1706 		pnum->u16[FWDSTEP] = 1;
1707 		lp = pnum->u16 + gptbl[v].idx;
1708 	}
1709 
1710 	return lp;
1711 }
1712 
1713 #endif /* APP_LOOKUP_METHOD */
1714 
1715 static void
1716 process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
1717 		uint16_t portid)
1718 {
1719 
1720 	int j;
1721 
1722 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1723 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1724 	int32_t k;
1725 	uint16_t dlp;
1726 	uint16_t *lp;
1727 	uint16_t dst_port[MAX_PKT_BURST];
1728 	__m128i dip[MAX_PKT_BURST / FWDSTEP];
1729 	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
1730 	uint16_t pnum[MAX_PKT_BURST + 1];
1731 #endif
1732 
1733 
1734 #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
1735 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1736 	{
1737 		/*
1738 		 * Send nb_rx - nb_rx%8 packets
1739 		 * in groups of 8.
1740 		 */
1741 		int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
1742 
1743 		for (j = 0; j < n; j += 8) {
1744 			uint32_t pkt_type =
1745 				pkts_burst[j]->packet_type &
1746 				pkts_burst[j+1]->packet_type &
1747 				pkts_burst[j+2]->packet_type &
1748 				pkts_burst[j+3]->packet_type &
1749 				pkts_burst[j+4]->packet_type &
1750 				pkts_burst[j+5]->packet_type &
1751 				pkts_burst[j+6]->packet_type &
1752 				pkts_burst[j+7]->packet_type;
1753 			if (pkt_type & RTE_PTYPE_L3_IPV4) {
1754 				simple_ipv4_fwd_8pkts(&pkts_burst[j], portid);
1755 			} else if (pkt_type &
1756 				RTE_PTYPE_L3_IPV6) {
1757 				simple_ipv6_fwd_8pkts(&pkts_burst[j], portid);
1758 			} else {
1759 				l3fwd_simple_forward(pkts_burst[j], portid);
1760 				l3fwd_simple_forward(pkts_burst[j+1], portid);
1761 				l3fwd_simple_forward(pkts_burst[j+2], portid);
1762 				l3fwd_simple_forward(pkts_burst[j+3], portid);
1763 				l3fwd_simple_forward(pkts_burst[j+4], portid);
1764 				l3fwd_simple_forward(pkts_burst[j+5], portid);
1765 				l3fwd_simple_forward(pkts_burst[j+6], portid);
1766 				l3fwd_simple_forward(pkts_burst[j+7], portid);
1767 			}
1768 		}
1769 		for (; j < nb_rx ; j++)
1770 			l3fwd_simple_forward(pkts_burst[j], portid);
1771 	}
1772 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1773 
1774 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1775 	for (j = 0; j != k; j += FWDSTEP)
1776 		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
1777 				&ipv4_flag[j / FWDSTEP]);
1778 
1779 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1780 	for (j = 0; j != k; j += FWDSTEP)
1781 		processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP],
1782 				portid, &pkts_burst[j], &dst_port[j]);
1783 
1784 	/*
1785 	 * Finish packet processing and group consecutive
1786 	 * packets with the same destination port.
1787 	 */
1788 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1789 	if (k != 0) {
1790 		__m128i dp1, dp2;
1791 
1792 		lp = pnum;
1793 		lp[0] = 1;
1794 
1795 		processx4_step3(pkts_burst, dst_port);
1796 
1797 		/* dp1: <d[0], d[1], d[2], d[3], ... > */
1798 		dp1 = _mm_loadu_si128((__m128i *)dst_port);
1799 
1800 		for (j = FWDSTEP; j != k; j += FWDSTEP) {
1801 			processx4_step3(&pkts_burst[j], &dst_port[j]);
1802 
1803 			/*
1804 			 * dp2:
1805 			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
1806 			 */
1807 			dp2 = _mm_loadu_si128(
1808 					(__m128i *)&dst_port[j - FWDSTEP + 1]);
1809 			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
1810 
1811 			/*
1812 			 * dp1:
1813 			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
1814 			 */
1815 			dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) *
1816 					sizeof(dst_port[0]));
1817 		}
1818 
1819 		/*
1820 		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
1821 		 */
1822 		dp2 = _mm_shufflelo_epi16(dp1, 0xf9);
1823 		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
1824 
1825 		/*
1826 		 * remove values added by the last repeated
1827 		 * dst port.
1828 		 */
1829 		lp[0]--;
1830 		dlp = dst_port[j - 1];
1831 	} else {
1832 		/* set dlp and lp to the never used values. */
1833 		dlp = BAD_PORT - 1;
1834 		lp = pnum + MAX_PKT_BURST;
1835 	}
1836 
1837 	/* Process up to last 3 packets one by one. */
1838 	switch (nb_rx % FWDSTEP) {
1839 	case 3:
1840 		process_packet(pkts_burst[j], dst_port + j, portid);
1841 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1842 		j++;
1843 		/* fall-through */
1844 	case 2:
1845 		process_packet(pkts_burst[j], dst_port + j, portid);
1846 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1847 		j++;
1848 		/* fall-through */
1849 	case 1:
1850 		process_packet(pkts_burst[j], dst_port + j, portid);
1851 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1852 		j++;
1853 	}
1854 
1855 	/*
1856 	 * Send packets out, through destination port.
1857 	 * Consecuteve pacekts with the same destination port
1858 	 * are already grouped together.
1859 	 * If destination port for the packet equals BAD_PORT,
1860 	 * then free the packet without sending it out.
1861 	 */
1862 	for (j = 0; j < nb_rx; j += k) {
1863 
1864 		int32_t m;
1865 		uint16_t pn;
1866 
1867 		pn = dst_port[j];
1868 		k = pnum[j];
1869 
1870 		if (likely(pn != BAD_PORT))
1871 			send_packetsx4(pn, pkts_burst + j, k);
1872 		else
1873 			for (m = j; m != j + k; m++)
1874 				rte_pktmbuf_free(pkts_burst[m]);
1875 
1876 	}
1877 
1878 #endif /* APP_LOOKUP_METHOD */
1879 #else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */
1880 
1881 	/* Prefetch first packets */
1882 	for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
1883 		rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
1884 
1885 	/* Prefetch and forward already prefetched packets */
1886 	for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1887 		rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1888 				j + PREFETCH_OFFSET], void *));
1889 		l3fwd_simple_forward(pkts_burst[j], portid);
1890 	}
1891 
1892 	/* Forward remaining prefetched packets */
1893 	for (; j < nb_rx; j++)
1894 		l3fwd_simple_forward(pkts_burst[j], portid);
1895 
1896 #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
1897 
1898 }
1899 
1900 #if (APP_CPU_LOAD > 0)
1901 
1902 /*
1903  * CPU-load stats collector
1904  */
1905 static int
1906 cpu_load_collector(__rte_unused void *arg) {
1907 	unsigned i, j, k;
1908 	uint64_t hits;
1909 	uint64_t prev_tsc, diff_tsc, cur_tsc;
1910 	uint64_t total[MAX_CPU] = { 0 };
1911 	unsigned min_cpu = MAX_CPU;
1912 	unsigned max_cpu = 0;
1913 	unsigned cpu_id;
1914 	int busy_total = 0;
1915 	int busy_flag = 0;
1916 
1917 	unsigned int n_thread_per_cpu[MAX_CPU] = { 0 };
1918 	struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD];
1919 
1920 	struct thread_conf *thread_conf;
1921 
1922 	const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
1923 		US_PER_S * CPU_LOAD_TIMEOUT_US;
1924 
1925 	prev_tsc = 0;
1926 	/*
1927 	 * Wait for all threads
1928 	 */
1929 
1930 	printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
1931 			n_tx_thread);
1932 
1933 	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
1934 		rte_pause();
1935 
1936 	while (rte_atomic16_read(&tx_counter) < n_tx_thread)
1937 		rte_pause();
1938 
1939 	for (i = 0; i < n_rx_thread; i++) {
1940 
1941 		thread_conf = &rx_thread[i].conf;
1942 		cpu_id = thread_conf->cpu_id;
1943 		thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
1944 
1945 		if (cpu_id > max_cpu)
1946 			max_cpu = cpu_id;
1947 		if (cpu_id < min_cpu)
1948 			min_cpu = cpu_id;
1949 	}
1950 	for (i = 0; i < n_tx_thread; i++) {
1951 
1952 		thread_conf = &tx_thread[i].conf;
1953 		cpu_id = thread_conf->cpu_id;
1954 		thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
1955 
1956 		if (thread_conf->cpu_id > max_cpu)
1957 			max_cpu = thread_conf->cpu_id;
1958 		if (thread_conf->cpu_id < min_cpu)
1959 			min_cpu = thread_conf->cpu_id;
1960 	}
1961 
1962 	while (1) {
1963 
1964 		cpu_load.counter++;
1965 		for (i = min_cpu; i <= max_cpu; i++) {
1966 			for (j = 0; j < MAX_CPU_COUNTER; j++) {
1967 				for (k = 0; k < n_thread_per_cpu[i]; k++)
1968 					if (thread_per_cpu[i][k]->busy[j]) {
1969 						busy_flag = 1;
1970 						break;
1971 					}
1972 				if (busy_flag) {
1973 					cpu_load.hits[j][i]++;
1974 					busy_total = 1;
1975 					busy_flag = 0;
1976 				}
1977 			}
1978 
1979 			if (busy_total) {
1980 				total[i]++;
1981 				busy_total = 0;
1982 			}
1983 		}
1984 
1985 		cur_tsc = rte_rdtsc();
1986 
1987 		diff_tsc = cur_tsc - prev_tsc;
1988 		if (unlikely(diff_tsc > interval_tsc)) {
1989 
1990 			printf("\033c");
1991 
1992 			printf("Cpu usage for %d rx threads and %d tx threads:\n\n",
1993 					n_rx_thread, n_tx_thread);
1994 
1995 			printf("cpu#     proc%%  poll%%  overhead%%\n\n");
1996 
1997 			for (i = min_cpu; i <= max_cpu; i++) {
1998 				hits = 0;
1999 				printf("CPU %d:", i);
2000 				for (j = 0; j < MAX_CPU_COUNTER; j++) {
2001 					printf("%7" PRIu64 "",
2002 							cpu_load.hits[j][i] * 100 / cpu_load.counter);
2003 					hits += cpu_load.hits[j][i];
2004 					cpu_load.hits[j][i] = 0;
2005 				}
2006 				printf("%7" PRIu64 "\n",
2007 						100 - total[i] * 100 / cpu_load.counter);
2008 				total[i] = 0;
2009 			}
2010 			cpu_load.counter = 0;
2011 
2012 			prev_tsc = cur_tsc;
2013 		}
2014 
2015 	}
2016 }
2017 #endif /* APP_CPU_LOAD */
2018 
2019 /*
2020  * Null processing lthread loop
2021  *
2022  * This loop is used to start empty scheduler on lcore.
2023  */
2024 static void
2025 lthread_null(__rte_unused void *args)
2026 {
2027 	int lcore_id = rte_lcore_id();
2028 
2029 	RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id);
2030 	lthread_exit(NULL);
2031 }
2032 
2033 /* main processing loop */
2034 static void
2035 lthread_tx_per_ring(void *dummy)
2036 {
2037 	int nb_rx;
2038 	uint16_t portid;
2039 	struct rte_ring *ring;
2040 	struct thread_tx_conf *tx_conf;
2041 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2042 	struct lthread_cond *ready;
2043 
2044 	tx_conf = (struct thread_tx_conf *)dummy;
2045 	ring = tx_conf->ring;
2046 	ready = *tx_conf->ready;
2047 
2048 	lthread_set_data((void *)tx_conf);
2049 
2050 	/*
2051 	 * Move this lthread to lcore
2052 	 */
2053 	lthread_set_affinity(tx_conf->conf.lcore_id);
2054 
2055 	RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
2056 
2057 	nb_rx = 0;
2058 	rte_atomic16_inc(&tx_counter);
2059 	while (1) {
2060 
2061 		/*
2062 		 * Read packet from ring
2063 		 */
2064 		SET_CPU_BUSY(tx_conf, CPU_POLL);
2065 		nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst,
2066 				MAX_PKT_BURST, NULL);
2067 		SET_CPU_IDLE(tx_conf, CPU_POLL);
2068 
2069 		if (nb_rx > 0) {
2070 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2071 			portid = pkts_burst[0]->port;
2072 			process_burst(pkts_burst, nb_rx, portid);
2073 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2074 			lthread_yield();
2075 		} else
2076 			lthread_cond_wait(ready, 0);
2077 
2078 	}
2079 }
2080 
2081 /*
2082  * Main tx-lthreads spawner lthread.
2083  *
2084  * This lthread is used to spawn one new lthread per ring from producers.
2085  *
2086  */
2087 static void
2088 lthread_tx(void *args)
2089 {
2090 	struct lthread *lt;
2091 
2092 	unsigned lcore_id;
2093 	uint16_t portid;
2094 	struct thread_tx_conf *tx_conf;
2095 
2096 	tx_conf = (struct thread_tx_conf *)args;
2097 	lthread_set_data((void *)tx_conf);
2098 
2099 	/*
2100 	 * Move this lthread to the selected lcore
2101 	 */
2102 	lthread_set_affinity(tx_conf->conf.lcore_id);
2103 
2104 	/*
2105 	 * Spawn tx readers (one per input ring)
2106 	 */
2107 	lthread_create(&lt, tx_conf->conf.lcore_id, lthread_tx_per_ring,
2108 			(void *)tx_conf);
2109 
2110 	lcore_id = rte_lcore_id();
2111 
2112 	RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id);
2113 
2114 	tx_conf->conf.cpu_id = sched_getcpu();
2115 	while (1) {
2116 
2117 		lthread_sleep(BURST_TX_DRAIN_US * 1000);
2118 
2119 		/*
2120 		 * TX burst queue drain
2121 		 */
2122 		for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
2123 			if (tx_conf->tx_mbufs[portid].len == 0)
2124 				continue;
2125 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2126 			send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
2127 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2128 			tx_conf->tx_mbufs[portid].len = 0;
2129 		}
2130 
2131 	}
2132 }
2133 
2134 static void
2135 lthread_rx(void *dummy)
2136 {
2137 	int ret;
2138 	uint16_t nb_rx;
2139 	int i;
2140 	uint16_t portid;
2141 	uint8_t queueid;
2142 	int worker_id;
2143 	int len[RTE_MAX_LCORE] = { 0 };
2144 	int old_len, new_len;
2145 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2146 	struct thread_rx_conf *rx_conf;
2147 
2148 	rx_conf = (struct thread_rx_conf *)dummy;
2149 	lthread_set_data((void *)rx_conf);
2150 
2151 	/*
2152 	 * Move this lthread to lcore
2153 	 */
2154 	lthread_set_affinity(rx_conf->conf.lcore_id);
2155 
2156 	if (rx_conf->n_rx_queue == 0) {
2157 		RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id());
2158 		return;
2159 	}
2160 
2161 	RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id());
2162 
2163 	for (i = 0; i < rx_conf->n_rx_queue; i++) {
2164 
2165 		portid = rx_conf->rx_queue_list[i].port_id;
2166 		queueid = rx_conf->rx_queue_list[i].queue_id;
2167 		RTE_LOG(INFO, L3FWD,
2168 			" -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
2169 				rte_lcore_id(), portid, queueid);
2170 	}
2171 
2172 	/*
2173 	 * Init all condition variables (one per rx thread)
2174 	 */
2175 	for (i = 0; i < rx_conf->n_rx_queue; i++)
2176 		lthread_cond_init(NULL, &rx_conf->ready[i], NULL);
2177 
2178 	worker_id = 0;
2179 
2180 	rx_conf->conf.cpu_id = sched_getcpu();
2181 	rte_atomic16_inc(&rx_counter);
2182 	while (1) {
2183 
2184 		/*
2185 		 * Read packet from RX queues
2186 		 */
2187 		for (i = 0; i < rx_conf->n_rx_queue; ++i) {
2188 			portid = rx_conf->rx_queue_list[i].port_id;
2189 			queueid = rx_conf->rx_queue_list[i].queue_id;
2190 
2191 			SET_CPU_BUSY(rx_conf, CPU_POLL);
2192 			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
2193 				MAX_PKT_BURST);
2194 			SET_CPU_IDLE(rx_conf, CPU_POLL);
2195 
2196 			if (nb_rx != 0) {
2197 				worker_id = (worker_id + 1) % rx_conf->n_ring;
2198 				old_len = len[worker_id];
2199 
2200 				SET_CPU_BUSY(rx_conf, CPU_PROCESS);
2201 				ret = rte_ring_sp_enqueue_burst(
2202 						rx_conf->ring[worker_id],
2203 						(void **) pkts_burst,
2204 						nb_rx, NULL);
2205 
2206 				new_len = old_len + ret;
2207 
2208 				if (new_len >= BURST_SIZE) {
2209 					lthread_cond_signal(rx_conf->ready[worker_id]);
2210 					new_len = 0;
2211 				}
2212 
2213 				len[worker_id] = new_len;
2214 
2215 				if (unlikely(ret < nb_rx)) {
2216 					uint32_t k;
2217 
2218 					for (k = ret; k < nb_rx; k++) {
2219 						struct rte_mbuf *m = pkts_burst[k];
2220 
2221 						rte_pktmbuf_free(m);
2222 					}
2223 				}
2224 				SET_CPU_IDLE(rx_conf, CPU_PROCESS);
2225 			}
2226 
2227 			lthread_yield();
2228 		}
2229 	}
2230 }
2231 
2232 /*
2233  * Start scheduler with initial lthread on lcore
2234  *
2235  * This lthread loop spawns all rx and tx lthreads on master lcore
2236  */
2237 
2238 static void
2239 lthread_spawner(__rte_unused void *arg) {
2240 	struct lthread *lt[MAX_THREAD];
2241 	int i;
2242 	int n_thread = 0;
2243 
2244 	printf("Entering lthread_spawner\n");
2245 
2246 	/*
2247 	 * Create producers (rx threads) on default lcore
2248 	 */
2249 	for (i = 0; i < n_rx_thread; i++) {
2250 		rx_thread[i].conf.thread_id = i;
2251 		lthread_create(&lt[n_thread], -1, lthread_rx,
2252 				(void *)&rx_thread[i]);
2253 		n_thread++;
2254 	}
2255 
2256 	/*
2257 	 * Wait for all producers. Until some producers can be started on the same
2258 	 * scheduler as this lthread, yielding is required to let them to run and
2259 	 * prevent deadlock here.
2260 	 */
2261 	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
2262 		lthread_sleep(100000);
2263 
2264 	/*
2265 	 * Create consumers (tx threads) on default lcore_id
2266 	 */
2267 	for (i = 0; i < n_tx_thread; i++) {
2268 		tx_thread[i].conf.thread_id = i;
2269 		lthread_create(&lt[n_thread], -1, lthread_tx,
2270 				(void *)&tx_thread[i]);
2271 		n_thread++;
2272 	}
2273 
2274 	/*
2275 	 * Wait for all threads finished
2276 	 */
2277 	for (i = 0; i < n_thread; i++)
2278 		lthread_join(lt[i], NULL);
2279 
2280 }
2281 
2282 /*
2283  * Start master scheduler with initial lthread spawning rx and tx lthreads
2284  * (main_lthread_master).
2285  */
2286 static int
2287 lthread_master_spawner(__rte_unused void *arg) {
2288 	struct lthread *lt;
2289 	int lcore_id = rte_lcore_id();
2290 
2291 	RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2292 	lthread_create(&lt, -1, lthread_spawner, NULL);
2293 	lthread_run();
2294 
2295 	return 0;
2296 }
2297 
2298 /*
2299  * Start scheduler on lcore.
2300  */
2301 static int
2302 sched_spawner(__rte_unused void *arg) {
2303 	struct lthread *lt;
2304 	int lcore_id = rte_lcore_id();
2305 
2306 #if (APP_CPU_LOAD)
2307 	if (lcore_id == cpu_load_lcore_id) {
2308 		cpu_load_collector(arg);
2309 		return 0;
2310 	}
2311 #endif /* APP_CPU_LOAD */
2312 
2313 	RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2314 	lthread_create(&lt, -1, lthread_null, NULL);
2315 	lthread_run();
2316 
2317 	return 0;
2318 }
2319 
2320 /* main processing loop */
2321 static int
2322 pthread_tx(void *dummy)
2323 {
2324 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2325 	uint64_t prev_tsc, diff_tsc, cur_tsc;
2326 	int nb_rx;
2327 	uint16_t portid;
2328 	struct thread_tx_conf *tx_conf;
2329 
2330 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
2331 		US_PER_S * BURST_TX_DRAIN_US;
2332 
2333 	prev_tsc = 0;
2334 
2335 	tx_conf = (struct thread_tx_conf *)dummy;
2336 
2337 	RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
2338 
2339 	tx_conf->conf.cpu_id = sched_getcpu();
2340 	rte_atomic16_inc(&tx_counter);
2341 	while (1) {
2342 
2343 		cur_tsc = rte_rdtsc();
2344 
2345 		/*
2346 		 * TX burst queue drain
2347 		 */
2348 		diff_tsc = cur_tsc - prev_tsc;
2349 		if (unlikely(diff_tsc > drain_tsc)) {
2350 
2351 			/*
2352 			 * This could be optimized (use queueid instead of
2353 			 * portid), but it is not called so often
2354 			 */
2355 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2356 			for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
2357 				if (tx_conf->tx_mbufs[portid].len == 0)
2358 					continue;
2359 				send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
2360 				tx_conf->tx_mbufs[portid].len = 0;
2361 			}
2362 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2363 
2364 			prev_tsc = cur_tsc;
2365 		}
2366 
2367 		/*
2368 		 * Read packet from ring
2369 		 */
2370 		SET_CPU_BUSY(tx_conf, CPU_POLL);
2371 		nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring,
2372 				(void **)pkts_burst, MAX_PKT_BURST, NULL);
2373 		SET_CPU_IDLE(tx_conf, CPU_POLL);
2374 
2375 		if (unlikely(nb_rx == 0)) {
2376 			sched_yield();
2377 			continue;
2378 		}
2379 
2380 		SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2381 		portid = pkts_burst[0]->port;
2382 		process_burst(pkts_burst, nb_rx, portid);
2383 		SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2384 
2385 	}
2386 }
2387 
2388 static int
2389 pthread_rx(void *dummy)
2390 {
2391 	int i;
2392 	int worker_id;
2393 	uint32_t n;
2394 	uint32_t nb_rx;
2395 	unsigned lcore_id;
2396 	uint8_t queueid;
2397 	uint16_t portid;
2398 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2399 
2400 	struct thread_rx_conf *rx_conf;
2401 
2402 	lcore_id = rte_lcore_id();
2403 	rx_conf = (struct thread_rx_conf *)dummy;
2404 
2405 	if (rx_conf->n_rx_queue == 0) {
2406 		RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
2407 		return 0;
2408 	}
2409 
2410 	RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id);
2411 
2412 	for (i = 0; i < rx_conf->n_rx_queue; i++) {
2413 
2414 		portid = rx_conf->rx_queue_list[i].port_id;
2415 		queueid = rx_conf->rx_queue_list[i].queue_id;
2416 		RTE_LOG(INFO, L3FWD,
2417 			" -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
2418 				lcore_id, portid, queueid);
2419 	}
2420 
2421 	worker_id = 0;
2422 	rx_conf->conf.cpu_id = sched_getcpu();
2423 	rte_atomic16_inc(&rx_counter);
2424 	while (1) {
2425 
2426 		/*
2427 		 * Read packet from RX queues
2428 		 */
2429 		for (i = 0; i < rx_conf->n_rx_queue; ++i) {
2430 			portid = rx_conf->rx_queue_list[i].port_id;
2431 			queueid = rx_conf->rx_queue_list[i].queue_id;
2432 
2433 			SET_CPU_BUSY(rx_conf, CPU_POLL);
2434 			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
2435 				MAX_PKT_BURST);
2436 			SET_CPU_IDLE(rx_conf, CPU_POLL);
2437 
2438 			if (nb_rx == 0) {
2439 				sched_yield();
2440 				continue;
2441 			}
2442 
2443 			SET_CPU_BUSY(rx_conf, CPU_PROCESS);
2444 			worker_id = (worker_id + 1) % rx_conf->n_ring;
2445 			n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id],
2446 					(void **)pkts_burst, nb_rx, NULL);
2447 
2448 			if (unlikely(n != nb_rx)) {
2449 				uint32_t k;
2450 
2451 				for (k = n; k < nb_rx; k++) {
2452 					struct rte_mbuf *m = pkts_burst[k];
2453 
2454 					rte_pktmbuf_free(m);
2455 				}
2456 			}
2457 
2458 			SET_CPU_IDLE(rx_conf, CPU_PROCESS);
2459 
2460 		}
2461 	}
2462 }
2463 
2464 /*
2465  * P-Thread spawner.
2466  */
2467 static int
2468 pthread_run(__rte_unused void *arg) {
2469 	int lcore_id = rte_lcore_id();
2470 	int i;
2471 
2472 	for (i = 0; i < n_rx_thread; i++)
2473 		if (rx_thread[i].conf.lcore_id == lcore_id) {
2474 			printf("Start rx thread on %d...\n", lcore_id);
2475 			RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2476 			RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i];
2477 			pthread_rx((void *)&rx_thread[i]);
2478 			return 0;
2479 		}
2480 
2481 	for (i = 0; i < n_tx_thread; i++)
2482 		if (tx_thread[i].conf.lcore_id == lcore_id) {
2483 			printf("Start tx thread on %d...\n", lcore_id);
2484 			RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2485 			RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i];
2486 			pthread_tx((void *)&tx_thread[i]);
2487 			return 0;
2488 		}
2489 
2490 #if (APP_CPU_LOAD)
2491 	if (lcore_id == cpu_load_lcore_id)
2492 		cpu_load_collector(arg);
2493 #endif /* APP_CPU_LOAD */
2494 
2495 	return 0;
2496 }
2497 
2498 static int
2499 check_lcore_params(void)
2500 {
2501 	uint8_t queue, lcore;
2502 	uint16_t i;
2503 	int socketid;
2504 
2505 	for (i = 0; i < nb_rx_thread_params; ++i) {
2506 		queue = rx_thread_params[i].queue_id;
2507 		if (queue >= MAX_RX_QUEUE_PER_PORT) {
2508 			printf("invalid queue number: %hhu\n", queue);
2509 			return -1;
2510 		}
2511 		lcore = rx_thread_params[i].lcore_id;
2512 		if (!rte_lcore_is_enabled(lcore)) {
2513 			printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
2514 			return -1;
2515 		}
2516 		socketid = rte_lcore_to_socket_id(lcore);
2517 		if ((socketid != 0) && (numa_on == 0))
2518 			printf("warning: lcore %hhu is on socket %d with numa off\n",
2519 				lcore, socketid);
2520 	}
2521 	return 0;
2522 }
2523 
2524 static int
2525 check_port_config(const unsigned nb_ports)
2526 {
2527 	unsigned portid;
2528 	uint16_t i;
2529 
2530 	for (i = 0; i < nb_rx_thread_params; ++i) {
2531 		portid = rx_thread_params[i].port_id;
2532 		if ((enabled_port_mask & (1 << portid)) == 0) {
2533 			printf("port %u is not enabled in port mask\n", portid);
2534 			return -1;
2535 		}
2536 		if (portid >= nb_ports) {
2537 			printf("port %u is not present on the board\n", portid);
2538 			return -1;
2539 		}
2540 	}
2541 	return 0;
2542 }
2543 
2544 static uint8_t
2545 get_port_n_rx_queues(const uint16_t port)
2546 {
2547 	int queue = -1;
2548 	uint16_t i;
2549 
2550 	for (i = 0; i < nb_rx_thread_params; ++i)
2551 		if (rx_thread_params[i].port_id == port &&
2552 				rx_thread_params[i].queue_id > queue)
2553 			queue = rx_thread_params[i].queue_id;
2554 
2555 	return (uint8_t)(++queue);
2556 }
2557 
2558 static int
2559 init_rx_rings(void)
2560 {
2561 	unsigned socket_io;
2562 	struct thread_rx_conf *rx_conf;
2563 	struct thread_tx_conf *tx_conf;
2564 	unsigned rx_thread_id, tx_thread_id;
2565 	char name[256];
2566 	struct rte_ring *ring = NULL;
2567 
2568 	for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) {
2569 
2570 		tx_conf = &tx_thread[tx_thread_id];
2571 
2572 		printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id,
2573 				tx_conf->conf.thread_id);
2574 
2575 		rx_thread_id = tx_conf->conf.thread_id;
2576 		if (rx_thread_id > n_tx_thread) {
2577 			printf("connection from tx-thread %u to rx-thread %u fails "
2578 					"(rx-thread not defined)\n", tx_thread_id, rx_thread_id);
2579 			return -1;
2580 		}
2581 
2582 		rx_conf = &rx_thread[rx_thread_id];
2583 		socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id);
2584 
2585 		snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u",
2586 				socket_io, rx_thread_id, tx_thread_id);
2587 
2588 		ring = rte_ring_create(name, 1024 * 4, socket_io,
2589 				RING_F_SP_ENQ | RING_F_SC_DEQ);
2590 
2591 		if (ring == NULL) {
2592 			rte_panic("Cannot create ring to connect rx-thread %u "
2593 					"with tx-thread %u\n", rx_thread_id, tx_thread_id);
2594 		}
2595 
2596 		rx_conf->ring[rx_conf->n_ring] = ring;
2597 
2598 		tx_conf->ring = ring;
2599 		tx_conf->ready = &rx_conf->ready[rx_conf->n_ring];
2600 
2601 		rx_conf->n_ring++;
2602 	}
2603 	return 0;
2604 }
2605 
2606 static int
2607 init_rx_queues(void)
2608 {
2609 	uint16_t i, nb_rx_queue;
2610 	uint8_t thread;
2611 
2612 	n_rx_thread = 0;
2613 
2614 	for (i = 0; i < nb_rx_thread_params; ++i) {
2615 		thread = rx_thread_params[i].thread_id;
2616 		nb_rx_queue = rx_thread[thread].n_rx_queue;
2617 
2618 		if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
2619 			printf("error: too many queues (%u) for thread: %u\n",
2620 				(unsigned)nb_rx_queue + 1, (unsigned)thread);
2621 			return -1;
2622 		}
2623 
2624 		rx_thread[thread].conf.thread_id = thread;
2625 		rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id;
2626 		rx_thread[thread].rx_queue_list[nb_rx_queue].port_id =
2627 			rx_thread_params[i].port_id;
2628 		rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id =
2629 			rx_thread_params[i].queue_id;
2630 		rx_thread[thread].n_rx_queue++;
2631 
2632 		if (thread >= n_rx_thread)
2633 			n_rx_thread = thread + 1;
2634 
2635 	}
2636 	return 0;
2637 }
2638 
2639 static int
2640 init_tx_threads(void)
2641 {
2642 	int i;
2643 
2644 	n_tx_thread = 0;
2645 	for (i = 0; i < nb_tx_thread_params; ++i) {
2646 		tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id;
2647 		tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id;
2648 		n_tx_thread++;
2649 	}
2650 	return 0;
2651 }
2652 
2653 /* display usage */
2654 static void
2655 print_usage(const char *prgname)
2656 {
2657 	printf("%s [EAL options] -- -p PORTMASK -P"
2658 		"  [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]"
2659 		"  [--tx (lcore,thread)[,(lcore,thread]]"
2660 		"  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
2661 		"  [--parse-ptype]\n\n"
2662 		"  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
2663 		"  -P : enable promiscuous mode\n"
2664 		"  --rx (port,queue,lcore,thread): rx queues configuration\n"
2665 		"  --tx (lcore,thread): tx threads configuration\n"
2666 		"  --stat-lcore LCORE: use lcore for stat collector\n"
2667 		"  --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
2668 		"  --no-numa: optional, disable numa awareness\n"
2669 		"  --ipv6: optional, specify it if running ipv6 packets\n"
2670 		"  --enable-jumbo: enable jumbo frame"
2671 		" which max packet len is PKTLEN in decimal (64-9600)\n"
2672 		"  --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n"
2673 		"  --no-lthreads: turn off lthread model\n"
2674 		"  --parse-ptype: set to use software to analyze packet type\n\n",
2675 		prgname);
2676 }
2677 
2678 static int parse_max_pkt_len(const char *pktlen)
2679 {
2680 	char *end = NULL;
2681 	unsigned long len;
2682 
2683 	/* parse decimal string */
2684 	len = strtoul(pktlen, &end, 10);
2685 	if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
2686 		return -1;
2687 
2688 	if (len == 0)
2689 		return -1;
2690 
2691 	return len;
2692 }
2693 
2694 static int
2695 parse_portmask(const char *portmask)
2696 {
2697 	char *end = NULL;
2698 	unsigned long pm;
2699 
2700 	/* parse hexadecimal string */
2701 	pm = strtoul(portmask, &end, 16);
2702 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
2703 		return -1;
2704 
2705 	if (pm == 0)
2706 		return -1;
2707 
2708 	return pm;
2709 }
2710 
2711 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2712 static int
2713 parse_hash_entry_number(const char *hash_entry_num)
2714 {
2715 	char *end = NULL;
2716 	unsigned long hash_en;
2717 
2718 	/* parse hexadecimal string */
2719 	hash_en = strtoul(hash_entry_num, &end, 16);
2720 	if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
2721 		return -1;
2722 
2723 	if (hash_en == 0)
2724 		return -1;
2725 
2726 	return hash_en;
2727 }
2728 #endif
2729 
2730 static int
2731 parse_rx_config(const char *q_arg)
2732 {
2733 	char s[256];
2734 	const char *p, *p0 = q_arg;
2735 	char *end;
2736 	enum fieldnames {
2737 		FLD_PORT = 0,
2738 		FLD_QUEUE,
2739 		FLD_LCORE,
2740 		FLD_THREAD,
2741 		_NUM_FLD
2742 	};
2743 	unsigned long int_fld[_NUM_FLD];
2744 	char *str_fld[_NUM_FLD];
2745 	int i;
2746 	unsigned size;
2747 
2748 	nb_rx_thread_params = 0;
2749 
2750 	while ((p = strchr(p0, '(')) != NULL) {
2751 		++p;
2752 		p0 = strchr(p, ')');
2753 		if (p0 == NULL)
2754 			return -1;
2755 
2756 		size = p0 - p;
2757 		if (size >= sizeof(s))
2758 			return -1;
2759 
2760 		snprintf(s, sizeof(s), "%.*s", size, p);
2761 		if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
2762 			return -1;
2763 		for (i = 0; i < _NUM_FLD; i++) {
2764 			errno = 0;
2765 			int_fld[i] = strtoul(str_fld[i], &end, 0);
2766 			if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
2767 				return -1;
2768 		}
2769 		if (nb_rx_thread_params >= MAX_LCORE_PARAMS) {
2770 			printf("exceeded max number of rx params: %hu\n",
2771 					nb_rx_thread_params);
2772 			return -1;
2773 		}
2774 		rx_thread_params_array[nb_rx_thread_params].port_id =
2775 				int_fld[FLD_PORT];
2776 		rx_thread_params_array[nb_rx_thread_params].queue_id =
2777 				(uint8_t)int_fld[FLD_QUEUE];
2778 		rx_thread_params_array[nb_rx_thread_params].lcore_id =
2779 				(uint8_t)int_fld[FLD_LCORE];
2780 		rx_thread_params_array[nb_rx_thread_params].thread_id =
2781 				(uint8_t)int_fld[FLD_THREAD];
2782 		++nb_rx_thread_params;
2783 	}
2784 	rx_thread_params = rx_thread_params_array;
2785 	return 0;
2786 }
2787 
2788 static int
2789 parse_tx_config(const char *q_arg)
2790 {
2791 	char s[256];
2792 	const char *p, *p0 = q_arg;
2793 	char *end;
2794 	enum fieldnames {
2795 		FLD_LCORE = 0,
2796 		FLD_THREAD,
2797 		_NUM_FLD
2798 	};
2799 	unsigned long int_fld[_NUM_FLD];
2800 	char *str_fld[_NUM_FLD];
2801 	int i;
2802 	unsigned size;
2803 
2804 	nb_tx_thread_params = 0;
2805 
2806 	while ((p = strchr(p0, '(')) != NULL) {
2807 		++p;
2808 		p0 = strchr(p, ')');
2809 		if (p0 == NULL)
2810 			return -1;
2811 
2812 		size = p0 - p;
2813 		if (size >= sizeof(s))
2814 			return -1;
2815 
2816 		snprintf(s, sizeof(s), "%.*s", size, p);
2817 		if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
2818 			return -1;
2819 		for (i = 0; i < _NUM_FLD; i++) {
2820 			errno = 0;
2821 			int_fld[i] = strtoul(str_fld[i], &end, 0);
2822 			if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
2823 				return -1;
2824 		}
2825 		if (nb_tx_thread_params >= MAX_LCORE_PARAMS) {
2826 			printf("exceeded max number of tx params: %hu\n",
2827 				nb_tx_thread_params);
2828 			return -1;
2829 		}
2830 		tx_thread_params_array[nb_tx_thread_params].lcore_id =
2831 				(uint8_t)int_fld[FLD_LCORE];
2832 		tx_thread_params_array[nb_tx_thread_params].thread_id =
2833 				(uint8_t)int_fld[FLD_THREAD];
2834 		++nb_tx_thread_params;
2835 	}
2836 	tx_thread_params = tx_thread_params_array;
2837 
2838 	return 0;
2839 }
2840 
2841 #if (APP_CPU_LOAD > 0)
2842 static int
2843 parse_stat_lcore(const char *stat_lcore)
2844 {
2845 	char *end = NULL;
2846 	unsigned long lcore_id;
2847 
2848 	lcore_id = strtoul(stat_lcore, &end, 10);
2849 	if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0'))
2850 		return -1;
2851 
2852 	return lcore_id;
2853 }
2854 #endif
2855 
2856 static void
2857 parse_eth_dest(const char *optarg)
2858 {
2859 	uint16_t portid;
2860 	char *port_end;
2861 	uint8_t c, *dest, peer_addr[6];
2862 
2863 	errno = 0;
2864 	portid = strtoul(optarg, &port_end, 10);
2865 	if (errno != 0 || port_end == optarg || *port_end++ != ',')
2866 		rte_exit(EXIT_FAILURE,
2867 		"Invalid eth-dest: %s", optarg);
2868 	if (portid >= RTE_MAX_ETHPORTS)
2869 		rte_exit(EXIT_FAILURE,
2870 		"eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
2871 		portid, RTE_MAX_ETHPORTS);
2872 
2873 	if (cmdline_parse_etheraddr(NULL, port_end,
2874 		&peer_addr, sizeof(peer_addr)) < 0)
2875 		rte_exit(EXIT_FAILURE,
2876 		"Invalid ethernet address: %s\n",
2877 		port_end);
2878 	dest = (uint8_t *)&dest_eth_addr[portid];
2879 	for (c = 0; c < 6; c++)
2880 		dest[c] = peer_addr[c];
2881 	*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
2882 }
2883 
2884 #define CMD_LINE_OPT_RX_CONFIG "rx"
2885 #define CMD_LINE_OPT_TX_CONFIG "tx"
2886 #define CMD_LINE_OPT_STAT_LCORE "stat-lcore"
2887 #define CMD_LINE_OPT_ETH_DEST "eth-dest"
2888 #define CMD_LINE_OPT_NO_NUMA "no-numa"
2889 #define CMD_LINE_OPT_IPV6 "ipv6"
2890 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
2891 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
2892 #define CMD_LINE_OPT_NO_LTHREADS "no-lthreads"
2893 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
2894 
2895 /* Parse the argument given in the command line of the application */
2896 static int
2897 parse_args(int argc, char **argv)
2898 {
2899 	int opt, ret;
2900 	char **argvopt;
2901 	int option_index;
2902 	char *prgname = argv[0];
2903 	static struct option lgopts[] = {
2904 		{CMD_LINE_OPT_RX_CONFIG, 1, 0, 0},
2905 		{CMD_LINE_OPT_TX_CONFIG, 1, 0, 0},
2906 		{CMD_LINE_OPT_STAT_LCORE, 1, 0, 0},
2907 		{CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
2908 		{CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
2909 		{CMD_LINE_OPT_IPV6, 0, 0, 0},
2910 		{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
2911 		{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
2912 		{CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0},
2913 		{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
2914 		{NULL, 0, 0, 0}
2915 	};
2916 
2917 	argvopt = argv;
2918 
2919 	while ((opt = getopt_long(argc, argvopt, "p:P",
2920 				lgopts, &option_index)) != EOF) {
2921 
2922 		switch (opt) {
2923 		/* portmask */
2924 		case 'p':
2925 			enabled_port_mask = parse_portmask(optarg);
2926 			if (enabled_port_mask == 0) {
2927 				printf("invalid portmask\n");
2928 				print_usage(prgname);
2929 				return -1;
2930 			}
2931 			break;
2932 		case 'P':
2933 			printf("Promiscuous mode selected\n");
2934 			promiscuous_on = 1;
2935 			break;
2936 
2937 		/* long options */
2938 		case 0:
2939 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG,
2940 				sizeof(CMD_LINE_OPT_RX_CONFIG))) {
2941 				ret = parse_rx_config(optarg);
2942 				if (ret) {
2943 					printf("invalid rx-config\n");
2944 					print_usage(prgname);
2945 					return -1;
2946 				}
2947 			}
2948 
2949 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG,
2950 				sizeof(CMD_LINE_OPT_TX_CONFIG))) {
2951 				ret = parse_tx_config(optarg);
2952 				if (ret) {
2953 					printf("invalid tx-config\n");
2954 					print_usage(prgname);
2955 					return -1;
2956 				}
2957 			}
2958 
2959 #if (APP_CPU_LOAD > 0)
2960 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE,
2961 					sizeof(CMD_LINE_OPT_STAT_LCORE))) {
2962 				cpu_load_lcore_id = parse_stat_lcore(optarg);
2963 			}
2964 #endif
2965 
2966 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST,
2967 				sizeof(CMD_LINE_OPT_ETH_DEST)))
2968 					parse_eth_dest(optarg);
2969 
2970 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
2971 				sizeof(CMD_LINE_OPT_NO_NUMA))) {
2972 				printf("numa is disabled\n");
2973 				numa_on = 0;
2974 			}
2975 
2976 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2977 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
2978 				sizeof(CMD_LINE_OPT_IPV6))) {
2979 				printf("ipv6 is specified\n");
2980 				ipv6 = 1;
2981 			}
2982 #endif
2983 
2984 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS,
2985 					sizeof(CMD_LINE_OPT_NO_LTHREADS))) {
2986 				printf("l-threads model is disabled\n");
2987 				lthreads_on = 0;
2988 			}
2989 
2990 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_PARSE_PTYPE,
2991 					sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
2992 				printf("software packet type parsing enabled\n");
2993 				parse_ptype_on = 1;
2994 			}
2995 
2996 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
2997 				sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
2998 				struct option lenopts = {"max-pkt-len", required_argument, 0,
2999 						0};
3000 
3001 				printf("jumbo frame is enabled - disabling simple TX path\n");
3002 				port_conf.rxmode.jumbo_frame = 1;
3003 
3004 				/* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
3005 				if (0 == getopt_long(argc, argvopt, "", &lenopts,
3006 						&option_index)) {
3007 
3008 					ret = parse_max_pkt_len(optarg);
3009 					if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) {
3010 						printf("invalid packet length\n");
3011 						print_usage(prgname);
3012 						return -1;
3013 					}
3014 					port_conf.rxmode.max_rx_pkt_len = ret;
3015 				}
3016 				printf("set jumbo frame max packet length to %u\n",
3017 						(unsigned int)port_conf.rxmode.max_rx_pkt_len);
3018 			}
3019 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
3020 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
3021 				sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
3022 				ret = parse_hash_entry_number(optarg);
3023 				if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
3024 					hash_entry_number = ret;
3025 				} else {
3026 					printf("invalid hash entry number\n");
3027 					print_usage(prgname);
3028 					return -1;
3029 				}
3030 			}
3031 #endif
3032 			break;
3033 
3034 		default:
3035 			print_usage(prgname);
3036 			return -1;
3037 		}
3038 	}
3039 
3040 	if (optind >= 0)
3041 		argv[optind-1] = prgname;
3042 
3043 	ret = optind-1;
3044 	optind = 1; /* reset getopt lib */
3045 	return ret;
3046 }
3047 
3048 static void
3049 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
3050 {
3051 	char buf[ETHER_ADDR_FMT_SIZE];
3052 
3053 	ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
3054 	printf("%s%s", name, buf);
3055 }
3056 
3057 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
3058 
3059 static void convert_ipv4_5tuple(struct ipv4_5tuple *key1,
3060 		union ipv4_5tuple_host *key2)
3061 {
3062 	key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
3063 	key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
3064 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
3065 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
3066 	key2->proto = key1->proto;
3067 	key2->pad0 = 0;
3068 	key2->pad1 = 0;
3069 }
3070 
3071 static void convert_ipv6_5tuple(struct ipv6_5tuple *key1,
3072 		union ipv6_5tuple_host *key2)
3073 {
3074 	uint32_t i;
3075 
3076 	for (i = 0; i < 16; i++) {
3077 		key2->ip_dst[i] = key1->ip_dst[i];
3078 		key2->ip_src[i] = key1->ip_src[i];
3079 	}
3080 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
3081 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
3082 	key2->proto = key1->proto;
3083 	key2->pad0 = 0;
3084 	key2->pad1 = 0;
3085 	key2->reserve = 0;
3086 }
3087 
3088 #define BYTE_VALUE_MAX 256
3089 #define ALL_32_BITS 0xffffffff
3090 #define BIT_8_TO_15 0x0000ff00
3091 static inline void
3092 populate_ipv4_few_flow_into_table(const struct rte_hash *h)
3093 {
3094 	uint32_t i;
3095 	int32_t ret;
3096 	uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array);
3097 
3098 	mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
3099 	for (i = 0; i < array_len; i++) {
3100 		struct ipv4_l3fwd_route  entry;
3101 		union ipv4_5tuple_host newkey;
3102 
3103 		entry = ipv4_l3fwd_route_array[i];
3104 		convert_ipv4_5tuple(&entry.key, &newkey);
3105 		ret = rte_hash_add_key(h, (void *)&newkey);
3106 		if (ret < 0) {
3107 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
3108 				" to the l3fwd hash.\n", i);
3109 		}
3110 		ipv4_l3fwd_out_if[ret] = entry.if_out;
3111 	}
3112 	printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len);
3113 }
3114 
3115 #define BIT_16_TO_23 0x00ff0000
3116 static inline void
3117 populate_ipv6_few_flow_into_table(const struct rte_hash *h)
3118 {
3119 	uint32_t i;
3120 	int32_t ret;
3121 	uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array);
3122 
3123 	mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
3124 	mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
3125 	for (i = 0; i < array_len; i++) {
3126 		struct ipv6_l3fwd_route entry;
3127 		union ipv6_5tuple_host newkey;
3128 
3129 		entry = ipv6_l3fwd_route_array[i];
3130 		convert_ipv6_5tuple(&entry.key, &newkey);
3131 		ret = rte_hash_add_key(h, (void *)&newkey);
3132 		if (ret < 0) {
3133 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
3134 				" to the l3fwd hash.\n", i);
3135 		}
3136 		ipv6_l3fwd_out_if[ret] = entry.if_out;
3137 	}
3138 	printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len);
3139 }
3140 
3141 #define NUMBER_PORT_USED 4
3142 static inline void
3143 populate_ipv4_many_flow_into_table(const struct rte_hash *h,
3144 		unsigned int nr_flow)
3145 {
3146 	unsigned i;
3147 
3148 	mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
3149 
3150 	for (i = 0; i < nr_flow; i++) {
3151 		struct ipv4_l3fwd_route entry;
3152 		union ipv4_5tuple_host newkey;
3153 		uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
3154 		uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
3155 				BYTE_VALUE_MAX);
3156 		uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
3157 				BYTE_VALUE_MAX));
3158 		/* Create the ipv4 exact match flow */
3159 		memset(&entry, 0, sizeof(entry));
3160 		switch (i & (NUMBER_PORT_USED - 1)) {
3161 		case 0:
3162 			entry = ipv4_l3fwd_route_array[0];
3163 			entry.key.ip_dst = IPv4(101, c, b, a);
3164 			break;
3165 		case 1:
3166 			entry = ipv4_l3fwd_route_array[1];
3167 			entry.key.ip_dst = IPv4(201, c, b, a);
3168 			break;
3169 		case 2:
3170 			entry = ipv4_l3fwd_route_array[2];
3171 			entry.key.ip_dst = IPv4(111, c, b, a);
3172 			break;
3173 		case 3:
3174 			entry = ipv4_l3fwd_route_array[3];
3175 			entry.key.ip_dst = IPv4(211, c, b, a);
3176 			break;
3177 		};
3178 		convert_ipv4_5tuple(&entry.key, &newkey);
3179 		int32_t ret = rte_hash_add_key(h, (void *)&newkey);
3180 
3181 		if (ret < 0)
3182 			rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
3183 
3184 		ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out;
3185 
3186 	}
3187 	printf("Hash: Adding 0x%x keys\n", nr_flow);
3188 }
3189 
3190 static inline void
3191 populate_ipv6_many_flow_into_table(const struct rte_hash *h,
3192 		unsigned int nr_flow)
3193 {
3194 	unsigned i;
3195 
3196 	mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
3197 	mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
3198 	for (i = 0; i < nr_flow; i++) {
3199 		struct ipv6_l3fwd_route entry;
3200 		union ipv6_5tuple_host newkey;
3201 
3202 		uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
3203 		uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
3204 				BYTE_VALUE_MAX);
3205 		uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
3206 				BYTE_VALUE_MAX));
3207 
3208 		/* Create the ipv6 exact match flow */
3209 		memset(&entry, 0, sizeof(entry));
3210 		switch (i & (NUMBER_PORT_USED - 1)) {
3211 		case 0:
3212 			entry = ipv6_l3fwd_route_array[0];
3213 			break;
3214 		case 1:
3215 			entry = ipv6_l3fwd_route_array[1];
3216 			break;
3217 		case 2:
3218 			entry = ipv6_l3fwd_route_array[2];
3219 			break;
3220 		case 3:
3221 			entry = ipv6_l3fwd_route_array[3];
3222 			break;
3223 		};
3224 		entry.key.ip_dst[13] = c;
3225 		entry.key.ip_dst[14] = b;
3226 		entry.key.ip_dst[15] = a;
3227 		convert_ipv6_5tuple(&entry.key, &newkey);
3228 		int32_t ret = rte_hash_add_key(h, (void *)&newkey);
3229 
3230 		if (ret < 0)
3231 			rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
3232 
3233 		ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
3234 
3235 	}
3236 	printf("Hash: Adding 0x%x keys\n", nr_flow);
3237 }
3238 
3239 static void
3240 setup_hash(int socketid)
3241 {
3242 	struct rte_hash_parameters ipv4_l3fwd_hash_params = {
3243 		.name = NULL,
3244 		.entries = L3FWD_HASH_ENTRIES,
3245 		.key_len = sizeof(union ipv4_5tuple_host),
3246 		.hash_func = ipv4_hash_crc,
3247 		.hash_func_init_val = 0,
3248 	};
3249 
3250 	struct rte_hash_parameters ipv6_l3fwd_hash_params = {
3251 		.name = NULL,
3252 		.entries = L3FWD_HASH_ENTRIES,
3253 		.key_len = sizeof(union ipv6_5tuple_host),
3254 		.hash_func = ipv6_hash_crc,
3255 		.hash_func_init_val = 0,
3256 	};
3257 
3258 	char s[64];
3259 
3260 	/* create ipv4 hash */
3261 	snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
3262 	ipv4_l3fwd_hash_params.name = s;
3263 	ipv4_l3fwd_hash_params.socket_id = socketid;
3264 	ipv4_l3fwd_lookup_struct[socketid] =
3265 			rte_hash_create(&ipv4_l3fwd_hash_params);
3266 	if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
3267 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
3268 				"socket %d\n", socketid);
3269 
3270 	/* create ipv6 hash */
3271 	snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
3272 	ipv6_l3fwd_hash_params.name = s;
3273 	ipv6_l3fwd_hash_params.socket_id = socketid;
3274 	ipv6_l3fwd_lookup_struct[socketid] =
3275 			rte_hash_create(&ipv6_l3fwd_hash_params);
3276 	if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
3277 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
3278 				"socket %d\n", socketid);
3279 
3280 	if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) {
3281 		/* For testing hash matching with a large number of flows we
3282 		 * generate millions of IP 5-tuples with an incremented dst
3283 		 * address to initialize the hash table. */
3284 		if (ipv6 == 0) {
3285 			/* populate the ipv4 hash */
3286 			populate_ipv4_many_flow_into_table(
3287 				ipv4_l3fwd_lookup_struct[socketid], hash_entry_number);
3288 		} else {
3289 			/* populate the ipv6 hash */
3290 			populate_ipv6_many_flow_into_table(
3291 				ipv6_l3fwd_lookup_struct[socketid], hash_entry_number);
3292 		}
3293 	} else {
3294 		/* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize
3295 		 * the hash table */
3296 		if (ipv6 == 0) {
3297 			/* populate the ipv4 hash */
3298 			populate_ipv4_few_flow_into_table(
3299 					ipv4_l3fwd_lookup_struct[socketid]);
3300 		} else {
3301 			/* populate the ipv6 hash */
3302 			populate_ipv6_few_flow_into_table(
3303 					ipv6_l3fwd_lookup_struct[socketid]);
3304 		}
3305 	}
3306 }
3307 #endif
3308 
3309 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
3310 static void
3311 setup_lpm(int socketid)
3312 {
3313 	struct rte_lpm6_config config;
3314 	struct rte_lpm_config lpm_ipv4_config;
3315 	unsigned i;
3316 	int ret;
3317 	char s[64];
3318 
3319 	/* create the LPM table */
3320 	snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
3321 	lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
3322 	lpm_ipv4_config.number_tbl8s = 256;
3323 	lpm_ipv4_config.flags = 0;
3324 	ipv4_l3fwd_lookup_struct[socketid] =
3325 			rte_lpm_create(s, socketid, &lpm_ipv4_config);
3326 	if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
3327 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
3328 				" on socket %d\n", socketid);
3329 
3330 	/* populate the LPM table */
3331 	for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
3332 
3333 		/* skip unused ports */
3334 		if ((1 << ipv4_l3fwd_route_array[i].if_out &
3335 				enabled_port_mask) == 0)
3336 			continue;
3337 
3338 		ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
3339 			ipv4_l3fwd_route_array[i].ip,
3340 			ipv4_l3fwd_route_array[i].depth,
3341 			ipv4_l3fwd_route_array[i].if_out);
3342 
3343 		if (ret < 0) {
3344 			rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
3345 				"l3fwd LPM table on socket %d\n",
3346 				i, socketid);
3347 		}
3348 
3349 		printf("LPM: Adding route 0x%08x / %d (%d)\n",
3350 			(unsigned)ipv4_l3fwd_route_array[i].ip,
3351 			ipv4_l3fwd_route_array[i].depth,
3352 			ipv4_l3fwd_route_array[i].if_out);
3353 	}
3354 
3355 	/* create the LPM6 table */
3356 	snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
3357 
3358 	config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
3359 	config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
3360 	config.flags = 0;
3361 	ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
3362 				&config);
3363 	if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
3364 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
3365 				" on socket %d\n", socketid);
3366 
3367 	/* populate the LPM table */
3368 	for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
3369 
3370 		/* skip unused ports */
3371 		if ((1 << ipv6_l3fwd_route_array[i].if_out &
3372 				enabled_port_mask) == 0)
3373 			continue;
3374 
3375 		ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
3376 			ipv6_l3fwd_route_array[i].ip,
3377 			ipv6_l3fwd_route_array[i].depth,
3378 			ipv6_l3fwd_route_array[i].if_out);
3379 
3380 		if (ret < 0) {
3381 			rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
3382 				"l3fwd LPM table on socket %d\n",
3383 				i, socketid);
3384 		}
3385 
3386 		printf("LPM: Adding route %s / %d (%d)\n",
3387 			"IPV6",
3388 			ipv6_l3fwd_route_array[i].depth,
3389 			ipv6_l3fwd_route_array[i].if_out);
3390 	}
3391 }
3392 #endif
3393 
3394 static int
3395 init_mem(unsigned nb_mbuf)
3396 {
3397 	struct lcore_conf *qconf;
3398 	int socketid;
3399 	unsigned lcore_id;
3400 	char s[64];
3401 
3402 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
3403 		if (rte_lcore_is_enabled(lcore_id) == 0)
3404 			continue;
3405 
3406 		if (numa_on)
3407 			socketid = rte_lcore_to_socket_id(lcore_id);
3408 		else
3409 			socketid = 0;
3410 
3411 		if (socketid >= NB_SOCKETS) {
3412 			rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
3413 				socketid, lcore_id, NB_SOCKETS);
3414 		}
3415 		if (pktmbuf_pool[socketid] == NULL) {
3416 			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
3417 			pktmbuf_pool[socketid] =
3418 				rte_pktmbuf_pool_create(s, nb_mbuf,
3419 					MEMPOOL_CACHE_SIZE, 0,
3420 					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
3421 			if (pktmbuf_pool[socketid] == NULL)
3422 				rte_exit(EXIT_FAILURE,
3423 						"Cannot init mbuf pool on socket %d\n", socketid);
3424 			else
3425 				printf("Allocated mbuf pool on socket %d\n", socketid);
3426 
3427 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
3428 			setup_lpm(socketid);
3429 #else
3430 			setup_hash(socketid);
3431 #endif
3432 		}
3433 		qconf = &lcore_conf[lcore_id];
3434 		qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
3435 		qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
3436 	}
3437 	return 0;
3438 }
3439 
3440 /* Check the link status of all ports in up to 9s, and print them finally */
3441 static void
3442 check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
3443 {
3444 #define CHECK_INTERVAL 100 /* 100ms */
3445 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3446 	uint16_t portid;
3447 	uint8_t count, all_ports_up, print_flag = 0;
3448 	struct rte_eth_link link;
3449 
3450 	printf("\nChecking link status");
3451 	fflush(stdout);
3452 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3453 		all_ports_up = 1;
3454 		for (portid = 0; portid < port_num; portid++) {
3455 			if ((port_mask & (1 << portid)) == 0)
3456 				continue;
3457 			memset(&link, 0, sizeof(link));
3458 			rte_eth_link_get_nowait(portid, &link);
3459 			/* print link status if flag set */
3460 			if (print_flag == 1) {
3461 				if (link.link_status)
3462 					printf(
3463 					"Port%d Link Up. Speed %u Mbps - %s\n",
3464 						portid, link.link_speed,
3465 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
3466 					("full-duplex") : ("half-duplex\n"));
3467 				else
3468 					printf("Port %d Link Down\n", portid);
3469 				continue;
3470 			}
3471 			/* clear all_ports_up flag if any link down */
3472 			if (link.link_status == ETH_LINK_DOWN) {
3473 				all_ports_up = 0;
3474 				break;
3475 			}
3476 		}
3477 		/* after finally printing all link status, get out */
3478 		if (print_flag == 1)
3479 			break;
3480 
3481 		if (all_ports_up == 0) {
3482 			printf(".");
3483 			fflush(stdout);
3484 			rte_delay_ms(CHECK_INTERVAL);
3485 		}
3486 
3487 		/* set the print_flag if all ports up or timeout */
3488 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3489 			print_flag = 1;
3490 			printf("done\n");
3491 		}
3492 	}
3493 }
3494 
3495 int
3496 main(int argc, char **argv)
3497 {
3498 	struct rte_eth_dev_info dev_info;
3499 	struct rte_eth_txconf *txconf;
3500 	int ret;
3501 	int i;
3502 	unsigned nb_ports;
3503 	uint16_t queueid, portid;
3504 	unsigned lcore_id;
3505 	uint32_t n_tx_queue, nb_lcores;
3506 	uint8_t nb_rx_queue, queue, socketid;
3507 
3508 	/* init EAL */
3509 	ret = rte_eal_init(argc, argv);
3510 	if (ret < 0)
3511 		rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
3512 	argc -= ret;
3513 	argv += ret;
3514 
3515 	/* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
3516 	for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
3517 		dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR +
3518 				((uint64_t)portid << 40);
3519 		*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
3520 	}
3521 
3522 	/* parse application arguments (after the EAL ones) */
3523 	ret = parse_args(argc, argv);
3524 	if (ret < 0)
3525 		rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
3526 
3527 	if (check_lcore_params() < 0)
3528 		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
3529 
3530 	printf("Initializing rx-queues...\n");
3531 	ret = init_rx_queues();
3532 	if (ret < 0)
3533 		rte_exit(EXIT_FAILURE, "init_rx_queues failed\n");
3534 
3535 	printf("Initializing tx-threads...\n");
3536 	ret = init_tx_threads();
3537 	if (ret < 0)
3538 		rte_exit(EXIT_FAILURE, "init_tx_threads failed\n");
3539 
3540 	printf("Initializing rings...\n");
3541 	ret = init_rx_rings();
3542 	if (ret < 0)
3543 		rte_exit(EXIT_FAILURE, "init_rx_rings failed\n");
3544 
3545 	nb_ports = rte_eth_dev_count();
3546 
3547 	if (check_port_config(nb_ports) < 0)
3548 		rte_exit(EXIT_FAILURE, "check_port_config failed\n");
3549 
3550 	nb_lcores = rte_lcore_count();
3551 
3552 	/* initialize all ports */
3553 	for (portid = 0; portid < nb_ports; portid++) {
3554 		/* skip ports that are not enabled */
3555 		if ((enabled_port_mask & (1 << portid)) == 0) {
3556 			printf("\nSkipping disabled port %d\n", portid);
3557 			continue;
3558 		}
3559 
3560 		/* init port */
3561 		printf("Initializing port %d ... ", portid);
3562 		fflush(stdout);
3563 
3564 		nb_rx_queue = get_port_n_rx_queues(portid);
3565 		n_tx_queue = nb_lcores;
3566 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
3567 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
3568 		printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
3569 			nb_rx_queue, (unsigned)n_tx_queue);
3570 		ret = rte_eth_dev_configure(portid, nb_rx_queue,
3571 					(uint16_t)n_tx_queue, &port_conf);
3572 		if (ret < 0)
3573 			rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
3574 				ret, portid);
3575 
3576 		ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
3577 						       &nb_txd);
3578 		if (ret < 0)
3579 			rte_exit(EXIT_FAILURE,
3580 				 "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n",
3581 				 ret, portid);
3582 
3583 		rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
3584 		print_ethaddr(" Address:", &ports_eth_addr[portid]);
3585 		printf(", ");
3586 		print_ethaddr("Destination:",
3587 			(const struct ether_addr *)&dest_eth_addr[portid]);
3588 		printf(", ");
3589 
3590 		/*
3591 		 * prepare src MACs for each port.
3592 		 */
3593 		ether_addr_copy(&ports_eth_addr[portid],
3594 			(struct ether_addr *)(val_eth + portid) + 1);
3595 
3596 		/* init memory */
3597 		ret = init_mem(NB_MBUF);
3598 		if (ret < 0)
3599 			rte_exit(EXIT_FAILURE, "init_mem failed\n");
3600 
3601 		/* init one TX queue per couple (lcore,port) */
3602 		queueid = 0;
3603 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
3604 			if (rte_lcore_is_enabled(lcore_id) == 0)
3605 				continue;
3606 
3607 			if (numa_on)
3608 				socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
3609 			else
3610 				socketid = 0;
3611 
3612 			printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
3613 			fflush(stdout);
3614 
3615 			rte_eth_dev_info_get(portid, &dev_info);
3616 			txconf = &dev_info.default_txconf;
3617 			if (port_conf.rxmode.jumbo_frame)
3618 				txconf->txq_flags = 0;
3619 			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
3620 						     socketid, txconf);
3621 			if (ret < 0)
3622 				rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
3623 					"port=%d\n", ret, portid);
3624 
3625 			tx_thread[lcore_id].tx_queue_id[portid] = queueid;
3626 			queueid++;
3627 		}
3628 		printf("\n");
3629 	}
3630 
3631 	for (i = 0; i < n_rx_thread; i++) {
3632 		lcore_id = rx_thread[i].conf.lcore_id;
3633 
3634 		if (rte_lcore_is_enabled(lcore_id) == 0) {
3635 			rte_exit(EXIT_FAILURE,
3636 					"Cannot start Rx thread on lcore %u: lcore disabled\n",
3637 					lcore_id
3638 				);
3639 		}
3640 
3641 		printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ",
3642 				i, lcore_id);
3643 		fflush(stdout);
3644 
3645 		/* init RX queues */
3646 		for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
3647 			portid = rx_thread[i].rx_queue_list[queue].port_id;
3648 			queueid = rx_thread[i].rx_queue_list[queue].queue_id;
3649 
3650 			if (numa_on)
3651 				socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
3652 			else
3653 				socketid = 0;
3654 
3655 			printf("rxq=%d,%d,%d ", portid, queueid, socketid);
3656 			fflush(stdout);
3657 
3658 			ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
3659 					socketid,
3660 					NULL,
3661 					pktmbuf_pool[socketid]);
3662 			if (ret < 0)
3663 				rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, "
3664 						"port=%d\n", ret, portid);
3665 		}
3666 	}
3667 
3668 	printf("\n");
3669 
3670 	/* start ports */
3671 	for (portid = 0; portid < nb_ports; portid++) {
3672 		if ((enabled_port_mask & (1 << portid)) == 0)
3673 			continue;
3674 
3675 		/* Start device */
3676 		ret = rte_eth_dev_start(portid);
3677 		if (ret < 0)
3678 			rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
3679 				ret, portid);
3680 
3681 		/*
3682 		 * If enabled, put device in promiscuous mode.
3683 		 * This allows IO forwarding mode to forward packets
3684 		 * to itself through 2 cross-connected  ports of the
3685 		 * target machine.
3686 		 */
3687 		if (promiscuous_on)
3688 			rte_eth_promiscuous_enable(portid);
3689 	}
3690 
3691 	for (i = 0; i < n_rx_thread; i++) {
3692 		lcore_id = rx_thread[i].conf.lcore_id;
3693 		if (rte_lcore_is_enabled(lcore_id) == 0)
3694 			continue;
3695 
3696 		/* check if hw packet type is supported */
3697 		for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
3698 			portid = rx_thread[i].rx_queue_list[queue].port_id;
3699 			queueid = rx_thread[i].rx_queue_list[queue].queue_id;
3700 
3701 			if (parse_ptype_on) {
3702 				if (!rte_eth_add_rx_callback(portid, queueid,
3703 						cb_parse_ptype, NULL))
3704 					rte_exit(EXIT_FAILURE,
3705 						"Failed to add rx callback: "
3706 						"port=%d\n", portid);
3707 			} else if (!check_ptype(portid))
3708 				rte_exit(EXIT_FAILURE,
3709 					"Port %d cannot parse packet type.\n\n"
3710 					"Please add --parse-ptype to use sw "
3711 					"packet type analyzer.\n\n",
3712 					portid);
3713 		}
3714 	}
3715 
3716 	check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
3717 
3718 	if (lthreads_on) {
3719 		printf("Starting L-Threading Model\n");
3720 
3721 #if (APP_CPU_LOAD > 0)
3722 		if (cpu_load_lcore_id > 0)
3723 			/* Use one lcore for cpu load collector */
3724 			nb_lcores--;
3725 #endif
3726 
3727 		lthread_num_schedulers_set(nb_lcores);
3728 		rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MASTER);
3729 		lthread_master_spawner(NULL);
3730 
3731 	} else {
3732 		printf("Starting P-Threading Model\n");
3733 		/* launch per-lcore init on every lcore */
3734 		rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MASTER);
3735 		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3736 			if (rte_eal_wait_lcore(lcore_id) < 0)
3737 				return -1;
3738 		}
3739 	}
3740 
3741 	return 0;
3742 }
3743